o
    i8                     @   s   d Z ddlZddlmZmZmZ ddlZddlmZ ddl	m  m
Z ddlmZ ddlmZ ddlmZ ddlmZ G dd	 d	eeejZG d
d dejZG dd dejZdS )z@Default Recurrent Neural Network Languge Model in `lm_train.py`.    N)AnyListTuple)LMInterface)	to_device)BatchScorerInterface)	strtoboolc                   @   s|   e Zd ZdZedd Zdd Zdd Zdd	 Zd
d Z	dd Z
dd Zdejdee dejdeejee f fddZdS )DefaultRNNLMa  Default RNNLM for `LMInterface` Implementation.

    Note:
        PyTorch seems to have memory leak when one GPU compute this after data parallel.
        If parallel GPUs compute this, it seems to be fine.
        See also https://github.com/espnet/espnet/issues/1075

    c                 C   s   | j dtddddgdd | j ddtd	d
d | j ddtddd | j ddtdd | j dtddd | j dtddd | j dtddd | S )z.Add arguments to command line argument parser.z--typelstm?gruzWhich type of RNN to use)typedefaultnargschoiceshelpz--layerz-l   zNumber of hidden layers)r   r   r   z--unitz-ui  zNumber of hidden unitsz--embed-unitNznNumber of hidden units in embedding layer, if it is not specified, it keeps the same number with hidden units.)r   r   r   z--dropout-rate      ?zdropout probabilityz--emb-dropout-rate        zemb dropout probabilityz--tie-weightsFzTie input and output embeddings)add_argumentstrintfloatr   )parser r   Z/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/pytorch_backend/lm/default.pyadd_arguments   sH   

zDefaultRNNLM.add_argumentsc              
   C   sb   t j|  t|dd}t|dd}t|dd}t|dd}tt||j|j||j|||| _	dS )zInitialize class.

        Args:
            n_vocab (int): The size of the vocabulary
            args (argparse.Namespace): configurations. see py:method:`add_arguments`

        dropout_rater   
embed_unitNemb_dropout_ratetie_weightsF)
nnModule__init__getattrClassifierWithStateRNNLMlayerunitr   model)selfn_vocabargsr   r   r   r    r   r   r   r#   C   s"   
zDefaultRNNLM.__init__c                 C   s
   | j  S )zDump state dict.)r)   
state_dict)r*   r   r   r   r-   b   s   
zDefaultRNNLM.state_dictc                 C   s   | j | dS )zLoad state dict.N)r)   load_state_dict)r*   dr   r   r   r.   f   s   zDefaultRNNLM.load_state_dictc                 C   s   d}d}t d }d}|j\}}t|D ]>}	| ||dd|	f |dd|	f \}}
t j|dd|	f dk|
jd}||
 | 7 }|t |
| 7 }|t	|7 }q|| ||
|jfS )a)  Compute LM loss value from buffer sequences.

        Args:
            x (torch.Tensor): Input ids. (batch, len)
            t (torch.Tensor): Target ids. (batch, len)

        Returns:
            tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of
                loss to backward (scalar),
                negative log-likelihood of t: -log p(t) (scalar) and
                the number of elements in x (scalar)

        Notes:
            The last two return values are used
            in perplexity: p(t)^{-n} = exp(-log p(t) / n)

        r   N)dtype)torchtensorlongshaperanger)   sumr0   meanr   todevice)r*   xtlosslogpcountstate
batch_sizesequence_lengthi
loss_batch	non_zerosr   r   r   forwardj   s   
* zDefaultRNNLM.forwardc                 C   s*   | j ||d d\}}|d|fS )a  Score new token.

        Args:
            y (torch.Tensor): 1D torch.int64 prefix tokens.
            state: Scorer state for prefix tokens
            x (torch.Tensor): 2D encoder feature that generates ys.

        Returns:
            tuple[torch.Tensor, Any]: Tuple of
                torch.float32 scores for next token (n_vocab)
                and next state for ys

        r   )r)   predict	unsqueezesqueeze)r*   yr?   r:   	new_statescoresr   r   r   score   s   zDefaultRNNLM.scorec                 C   s   | j |S )zScore eos.

        Args:
            state: Scorer state for prefix tokens

        Returns:
            float: final score

        )r)   final)r*   r?   r   r   r   final_score   s   
zDefaultRNNLM.final_scoreysstatesxsreturnc                    s   t || jjj| jjjdkrd nd d du rdnfdd D | j|dddf \}| fd	d
tD fS )a  Score new token batch.

        Args:
            ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen).
            states (List[Any]): Scorer states for prefix tokens.
            xs (torch.Tensor):
                The encoder feature that generates ys (n_batch, xlen, n_feat).

        Returns:
            tuple[torch.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        r
   ch)rV   r   Nc                    s(   i | ]   fd dt D qS )c                    s,   g | ] t  fd dtD qS )c                    s   g | ]
}|    qS r   r   ).0b)rB   krQ   r   r   
<listcomp>       zBDefaultRNNLM.batch_score.<locals>.<dictcomp>.<listcomp>.<listcomp>)r1   stackr5   rW   )rY   n_batchrQ   )rB   r   rZ      s    z7DefaultRNNLM.batch_score.<locals>.<dictcomp>.<listcomp>r5   r]   )r^   n_layersrQ   rY   r   
<dictcomp>   s    z,DefaultRNNLM.batch_score.<locals>.<dictcomp>rF   c                    s"   g | ]  fd dD qS )c                    s(   i | ]   fd dt D qS )c                    s   g | ]
} |   qS r   r   )rW   rB   )rX   rY   rQ   r   r   rZ      r[   zBDefaultRNNLM.batch_score.<locals>.<listcomp>.<dictcomp>.<listcomp>r_   r]   )rX   r`   rQ   ra   r   rb      s   ( z7DefaultRNNLM.batch_score.<locals>.<listcomp>.<dictcomp>r   r]   )keysr`   rQ   )rX   r   rZ          z,DefaultRNNLM.batch_score.<locals>.<listcomp>)lenr)   	predictorr`   typrG   r5   )r*   rP   rQ   rR   r=   r   )rc   r^   r`   rQ   r   batch_score   s    
zDefaultRNNLM.batch_scoreN)__name__
__module____qualname____doc__staticmethodr   r#   r-   r.   rE   rM   rO   r1   Tensorr   r   r   rh   r   r   r   r   r	      s&    	
( r	   c                       sP   e Zd ZdZejdddf fdd	Zdd Zd	d
 Zdd Z	dddZ
  ZS )r%   zA wrapper for pytorch RNNLM.none)	reductionrF   c                    sN   t |ttfstdt| tt|   || _d| _	d| _
|| _|| _dS )zInitialize class.

        :param torch.nn.Module predictor : The RNNLM
        :param function lossfun : The loss function to use
        :param int/str label_key :

        z'label_key must be int or str, but is %sN)
isinstancer   r   	TypeErrorr   superr%   r#   lossfunrJ   r<   	label_keyrf   )r*   rf   rt   ru   	__class__r   r   r#      s   

zClassifierWithState.__init__c                 O   s  t | jtrBt| | j  krt|k s n d| j }t||| j }| jdkr1|dd }n.|d| j || jd d  }nt | jtr_| j|vrVd| j }t||| j }|| j= d| _d| _| j|g|R i |\}| _| 	| j|| _|| jfS )a  Compute the loss value for an input and label pair.

        Notes:
            It also computes accuracy and stores it to the attribute.
            When ``label_key`` is ``int``, the corresponding element in ``args``
            is treated as ground truth labels. And when it is ``str``, the
            element in ``kwargs`` is used.
            The all elements of ``args`` and ``kwargs`` except the groundtruth
            labels are features.
            It feeds features to the predictor and compare the result
            with ground truth labels.

        :param torch.Tensor state : the LM state
        :param list[torch.Tensor] args : Input minibatch
        :param dict[torch.Tensor] kwargs : Input minibatch
        :return loss value
        :rtype torch.Tensor

        zLabel key %d is out of boundsrF   N   zLabel key "%s" is not found)
rq   ru   r   re   
ValueErrorr   rJ   r<   rf   rt   )r*   r?   r,   kwargsmsgr;   r   r   r   rE      s&   "


"



zClassifierWithState.forwardc                 C   sB   t | jdr| jjr| ||S | ||\}}|tj|ddfS )a  Predict log probabilities for given state and input x using the predictor.

        :param torch.Tensor state : The current state
        :param torch.Tensor x : The input
        :return a tuple (new state, log prob vector)
        :rtype (torch.Tensor, torch.Tensor)
        
normalizedrx   )dim)hasattrrf   r|   Flog_softmax)r*   r?   r:   zr   r   r   rG     s   zClassifierWithState.predictc           	      C   s   | j jjdkr| ||S g }g }t|D ]#}|du rdn|| }| ||| d\}}|| || q|t|fS )z(Predict new tokens from buffered inputs.r&   Nr   )	rf   rw   ri   rG   r5   rH   appendr1   cat)	r*   r?   r:   nrK   	new_log_yrB   state_ilog_yr   r   r   buff_predict&  s   
z ClassifierWithState.buff_predictNc                 C   s4   t | jdr|dur| j|| S | j|S dS )zPredict final log probabilities for given state using the predictor.

        :param state: The state
        :return The final log probabilities
        :rtype torch.Tensor
        rN   Nr   )r~   rf   rN   )r*   r?   indexr   r   r   rN   5  s
   zClassifierWithState.final)N)ri   rj   rk   rl   r!   CrossEntropyLossr#   rE   rG   r   rN   __classcell__r   r   rv   r   r%      s    *r%   c                       s<   e Zd ZdZ					d fdd	Zd	d
 Zdd Z  ZS )r&   zA pytorch RNNLM.Nr
   r   r   Fc	           
         s`  t t|   |du r}t||| _|dkrd| _nt|| _|dkr?tt	|gfddt
|d D  | _ntt|gfddt
|d D  | _t fddt
|d D | _t|| _|| _| _|| _td	| td
  td| |r|ksJ d| jj| j_|  D ]	}	|	jdd qdS )zInitialize class.

        :param int n_vocab: The size of the vocabulary
        :param int n_layers: The number of layers to create
        :param int n_units: The number of units per layer
        :param str typ: The RNN type
        Nr   r
   c                       g | ]}t   qS r   )r!   LSTMCellrW   _n_unitsr   r   rZ   i      z"RNNLM.__init__.<locals>.<listcomp>rx   c                    r   r   )r!   GRUCellr   r   r   r   rZ   n  r   c                    s   g | ]}t  qS r   )r!   Dropoutr   )r   r   r   rZ   r  s    zTie weights set to {}zDropout set to {}zEmb Dropout set to {}z>Tie Weights: True need embedding and final dimensions to matchgg?)rs   r&   r#   r!   	Embeddingembed
embed_dropr   
ModuleListr   r5   rnnr   dropoutLinearlor`   r   rg   logginginfoformatweight
parametersdatauniform_)
r*   r+   r`   r   n_embedrg   r   r   r    paramrv   )r   r   r   r#   I  sH   
zRNNLM.__init__c                 C   s(   t |  }t|| jj|j|jdS )zInitialize state.)r9   r0   )nextr   r1   zerosr   r8   r9   r0   )r*   	batchsizepr   r   r   
zero_state  s   zRNNLM.zero_statec                    s  |du r, fddt  jD }d|i} jdkr, fddt  jD }||d}dg j } jdur@  }n } jdkrdg j } jd  jd ||d d |d	 d f\|d< |d< t d
 jD ]#} j|  j| ||d
  |d | |d	 | f\||< ||< qs||d}n7 jd  jd ||d d |d< t d
 jD ]} j|  j| ||d
  |d | ||< qd|i}  jd |d }||fS )zForward neural networks.Nc              	      "   g | ]}t  d qS r   r   r   sizerW   r   r*   r:   r   r   rZ     s   " z!RNNLM.forward.<locals>.<listcomp>rV   r
   c              	      r   r   r   r   r   r   r   rZ     rd   rT   r   rU   rx   rF   )r5   r`   rg   r   r   r   r   r   )r*   r?   r:   rV   rU   embr   rJ   r   r   r   rE     s8   




"*&0zRNNLM.forward)Nr
   r   r   F)ri   rj   rk   rl   r#   r   rE   r   r   r   rv   r   r&   F  s    >r&   )rl   r   typingr   r   r   r1   torch.nnr!   torch.nn.functional
functionalr   espnet.nets.lm_interfacer   #espnet.nets.pytorch_backend.e2e_asrr   espnet.nets.scorer_interfacer   espnet.utils.cli_utilsr   r"   r	   r%   r&   r   r   r   r   <module>   s     In