o
    %ݫi59                     @   sn   d Z ddlmZ ddlZddlmZmZ G dd deeZG dd deZG d	d
 d
eZ	G dd deZ
dS )aM   Specifies the inference interfaces for text-processing modules.

Authors:
 * Aku Rouhe 2021
 * Peter Plantinga 2021
 * Loren Lugosch 2020
 * Mirco Ravanelli 2020
 * Titouan Parcollet 2021
 * Abdel Heba 2021
 * Andreas Nautsch 2022, 2023
 * Pooneh Mousavi 2023
 * Sylvain de Langen 2023
 * Adel Moumen 2023
 * Pradnya Kandarkar 2023
    )chainN)EncodeDecodePipelineMixin
Pretrainedc                       sv   e Zd ZdZdgZdgZ fddZedd Zedd	 Z	d
d Z
dd Zdd Zdd Zdd ZdddZ  ZS )GraphemeToPhonemea  
    A pretrained model implementation for Grapheme-to-Phoneme (G2P) models
    that take raw natural language text as an input and

    Arguments
    ---------
    *args : tuple
    **kwargs : dict
        Arguments are forwarded to ``Pretrained`` parent class.

    Example
    -------
    >>> text = ("English is tough. It can be understood "
    ...         "through thorough thought though")
    >>> from speechbrain.inference.text import GraphemeToPhoneme
    >>> tmpdir = getfixture('tmpdir')
    >>> g2p = GraphemeToPhoneme.from_hparams('path/to/model', savedir=tmpdir) # doctest: +SKIP
    >>> phonemes = g2p.g2p(text) # doctest: +SKIP
    txtphonemesc                    s&   t  j|i | |   |   d S N)super__init__create_pipelinesload_dependenciesselfargskwargs	__class__ N/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/inference/text.pyr
   3   s   zGraphemeToPhoneme.__init__c                 C      | j jS )zReturns the available phonemes)hparamsr   r   r   r   r   r   8      zGraphemeToPhoneme.phonemesc                 C   r   )z6Returns the language for which this model is available)r   languager   r   r   r   r   =   r   zGraphemeToPhoneme.languagec                    s   t |t}|r
|g}| d|i}| | | t| jdr) fdd| jjD  | jjdi  }| 	|}|d }| 
|}|rF|d }|S )	as  Performs the Grapheme-to-Phoneme conversion

        Arguments
        ---------
        text: str or list[str]
            a single string to be encoded to phonemes - or a
            sequence of strings

        Returns
        -------
        result: list
            if a single example was provided, the return value is a
            single list of phonemes
        r   model_input_keysc                    s   i | ]}| | qS r   r   ).0kmodel_inputsr   r   
<dictcomp>Z   s    
z)GraphemeToPhoneme.g2p.<locals>.<dictcomp>r   r   Nr   )
isinstancestrencode_input_update_graphemeshasattrr   r   modsmodeldecode_output_remove_eos)r   textsingleencoded_inputsmodel_outputsdecoded_outputr   r   r   r   g2pB   s"   




zGraphemeToPhoneme.g2pc                 C   s   dd |D S )a  Removes the EOS character from the end of the sequence,
        if encountered

        Arguments
        ---------
        phonemes : list
            a list of phomemic transcriptions

        Returns
        -------
        result : list
            phonemes, without <eos>
        c                 S   s,   g | ]}|r|d  dkr|dd  n|qS )z<eos>Nr   )r   itemr   r   r   
<listcomp>t   s    z1GraphemeToPhoneme._remove_eos.<locals>.<listcomp>r   )r   r   r   r   r   r(   f   s   zGraphemeToPhoneme._remove_eosc                 C   sF   t | jd}|r|dkrd| }||v r!|| |d< d S d S d S d S )Ngrapheme_sequence_moderawgrapheme_encoded_grapheme_encoded)getattrr   )r   r   r2   grapheme_encoded_keyr   r   r   r#   y   s   
z#GraphemeToPhoneme._update_graphemesc                 C   s*   t | jdd}|r|  |  dS dS )z%Loads any relevant model dependenciesdeps_pretrainerN)r6   r   collect_filesload_collected)r   r8   r   r   r   r      s
   z#GraphemeToPhoneme.load_dependenciesc                 C   s
   |  |S )at  A convenience callable wrapper - same as G2P

        Arguments
        ---------
        text: str or list[str]
            a single string to be encoded to phonemes - or a
            sequence of strings

        Returns
        -------
        result: list
            if a single example was provided, the return value is a
            single list of phonemes
        )r.   )r   r)   r   r   r   __call__   s   
zGraphemeToPhoneme.__call__Nc                 C   s   |  ||S )z#Runs enhancement on the noisy input)enhance_batch)r   noisylengthsr   r   r   forward   s   zGraphemeToPhoneme.forwardr   )__name__
__module____qualname____doc__INPUT_STATIC_KEYSOUTPUT_KEYSr
   propertyr   r   r.   r(   r#   r   r;   r?   __classcell__r   r   r   r   r      s    

$	r   c                       s>   e Zd ZdZdgZ fddZdd Zdd Zd	d
 Z  Z	S )ResponseGeneratora  A ready-to-use Response Generator  model

    The class can be used to generate and continue dialogue given the user input.
    The given YAML must contain the fields specified in the *_NEEDED[] lists.
    It needs to be used with custom.py to load the expanded  model with added tokens like bos,eos, and speaker's tokens.

    Arguments
    ---------
    *args : tuple
    **kwargs : dict
        Arguments are forwarded to ``Pretrained`` parent class.
    r&   c                    sB   t  j|i | | jj| _| jj| _d| jj d | _g | _d S )N      )r	   r
   r   r&   	tokenizermax_historyhistory_windowhistoryr   r   r   r   r
      s
   


zResponseGenerator.__init__c                 C   sd   | j | |  }| |}| jjj|dd|d jd df ddd}|d }| j | |S )a5  
        Complete a dialogue given the user's input.
        Arguments
        ---------
        turn: str
            User input which is the last turn of the dialogue.

        Returns
        -------
        response
            Generated response for the user input based on the dialogue history.
        Nr   rJ   T)skip_special_tokensclean_up_tokenization_spaces)rN   appendprepare_inputgenerater&   rK   batch_decodeshape)r   turninputshypspredicted_wordsresponser   r   r   generate_response   s   
z#ResponseGenerator.generate_responsec                 C      t z?Users should modify this function according to their own tasks.NotImplementedErrorr   r   r   r   rR         zResponseGenerator.prepare_inputc                 C   r\   r]   r^   r   r   r   r   rS      r`   zResponseGenerator.generate)
r@   rA   rB   rC   MODULES_NEEDEDr
   r[   rR   rS   rG   r   r   r   r   rH      s    rH   c                       0   e Zd ZdZ fddZdd Zdd Z  ZS )GPTResponseGeneratoraa  A ready-to-use Response Generator  model

    The class can be used to generate and continue dialogue given the user input.
    The given YAML must contain the fields specified in the *_NEEDED[] lists.
    It needs to be used with custom.py to load the expanded GPT model with added tokens like bos,eos, and speaker's tokens.

    Arguments
    ---------
    *args : tuple
    **kwargs : dict
        Arguments are forwarded to ``Pretrained`` parent class.

    Example
    -------
    >>> from speechbrain.inference.text import GPTResponseGenerator

    >>> tmpdir = getfixture("tmpdir")
    >>> res_gen_model = GPTResponseGenerator.from_hparams(source="speechbrain/MultiWOZ-GPT-Response_Generation",
    ... pymodule_file="custom.py")  # doctest: +SKIP
    >>> response = res_gen_model.generate_response("I want to book a table for dinner")  # doctest: +SKIP
    c                    s8   t  j|i | | jj| jj\| _| _| _	| _
d S r   )r	   r
   r&   rK   convert_tokens_to_idsr   special_tokensboseossystemuserr   r   r   r   r
      s   zGPTResponseGenerator.__init__c                 C   sB   |\}}| j j|| jjjd }| j| | | d}|S )a  
        Complete a dialogue given the user's input.

        Arguments
        ---------
        inputs: tuple
            history_bos which is the tokenized history+input values with appropriate speaker token appended before each turn and history_token_type which determines
            the type of each token based on who is uttered that token (either User or System).

        Returns
        -------
        response
            Generated hypothesis for the user input based on the dialogue history.
        pad_idxbeam)r   padding_maskr&   rK   unk_token_idrS   detach)r   rW   history_boshistory_token_typerm   rX   r   r   r   rS      s   
zGPTResponseGenerator.generatec                    s    fdd j D } fddt|D }| j d }ttt| }tt j	g|t j
gf} fddt|D }ttt j
gg| j d   j
gg  }|d|dfS )am  Convert user input and previous histories to the format acceptable for  GPT model.
            It appends all previous history and input and truncates it based on max_history value.
            It then tokenizes the input and generates additional input that determines the type of each token (System or User).

        Returns
        -------
        history_bos: torch.Tensor
            Tokenized history+input values with appropriate speaker token appended before each turn.
        history_token_type: torch.LongTensor
            Type of each token based on who is uttered that token (either User or System)
        c                    s   g | ]	} j j|qS r   )r&   rK   encoder   rV   r   r   r   r1   %  s    z6GPTResponseGenerator.prepare_input.<locals>.<listcomp>c                    s.   g | ]\}}|d  dkr j n jg| qS rI   r   )ri   rh   r   iencoded_turnr   r   r   r1   +  s    Nc                    s2   g | ]\}}|d  dkr j n jgt| qS rt   )ri   rh   lenru   r   r   r   r1   ;  s     r   )rN   	enumeraterM   torch
LongTensorlistr   cattensorrf   rh   	unsqueeze)r   history_tokens_listshistory_input_listshistory_idsrp   history_token_type_listsrq   r   r   r   rR     s4   


z"GPTResponseGenerator.prepare_inputr@   rA   rB   rC   r
   rS   rR   rG   r   r   r   r   rc      s
    rc   c                       rb   )Llama2ResponseGeneratoram  A ready-to-use Response Generator  model

    The class can be used to generate and continue dialogue given the user input.
    The given YAML must contain the fields specified in the *_NEEDED[] lists.
    It needs to be used with custom.py to load the expanded Llama2 model with added tokens like bos,eos, and speaker's tokens.

    Arguments
    ---------
    *args : tuple
    **kwargs : dict
        Arguments are forwarded to ``Pretrained`` parent class.

    Example
    -------
    >>> from speechbrain.inference.text import Llama2ResponseGenerator

    >>> tmpdir = getfixture("tmpdir")
    >>> res_gen_model = Llama2ResponseGenerator.from_hparams(source="speechbrain/MultiWOZ-Llama2-Response_Generation",
    ... pymodule_file="custom.py")  # doctest: +SKIP
    >>> response = res_gen_model.generate_response("I want to book a table for dinner")  # doctest: +SKIP
    c                    s"   ddi}t  j|d|i| d S )Ndevicecudarun_opts)r	   r
   )r   r   r   r   r   r   r   r
   d  s   z Llama2ResponseGenerator.__init__c                 C   sF   |d  | jjj}| jj|| jjd }| j| | d}|S )aJ  
        Complete a dialogue given the user's input.
        Arguments
        ---------
        inputs: prompt_bos
            prompted inputs to be passed to llama2 model for generation.

        Returns
        -------
        response
            Generated hypothesis for the user input based on the dialogue history.
        r   rj   rl   )	tor&   r   r   rm   rK   pad_token_idrS   ro   )r   rW   
prompt_bosrm   rX   r   r   r   rS   i  s   z Llama2ResponseGenerator.generatec                    s|   dd }t t|t j} fdd|D }| j d }tt t| }tt	 j
jg|f}|djddS )a  Convert user input and previous histories to the format acceptable for  Llama2 model.
            It appends all previous history and input and truncates it based on max_history value.
            It then tokenizes the input and add prompts.

        Returns
        -------
        prompt_bos: torch.Tensor
            Tokenized history+input values with appropriate prompt.
        c                 S   s$   | \}}|d dkrd| d S |S )a|  add [INST] and [/INST] prompt to the start and end ogf item.

            Arguments
            ---------
            idx_and_item: tuple
                id and its corresponding text. If the id is even, it is user turn and [ INST] is added.

            Returns
            -------
            prompt_bos: torch.LongTensor
                prompted text for one item.
            rI   r   z[INST] z [/INST]r   )idx_and_itemindexr0   r   r   r   generate_prompt  s   z>Llama2ResponseGenerator.prepare_input.<locals>.generate_promptc                    s   g | ]} j |qS r   )rK   rr   rs   r   r   r   r1     s    z9Llama2ResponseGenerator.prepare_input.<locals>.<listcomp>Nr   )dim)r|   mapry   rN   rM   rz   r{   r   r}   r~   rK   bos_token_idr   )r   r   promptsprompt_tokens_lists
prompt_idsr   r   r   r   rR     s   z%Llama2ResponseGenerator.prepare_inputr   r   r   r   r   r   M  s
    r   )rC   	itertoolsr   rz    speechbrain.inference.interfacesr   r   r   rH   rc   r   r   r   r   r   <module>   s     ;s