o
    iGG                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ	 d dl
Zd dlZd dlmZmZmZ d dlmZ d dlmZ d dlmZ d dlm  m  mZ d dlmZmZmZmZm Z  d dl!m"Z" d dl#m$Z$ d d	l%m&Z& d d
l'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2m3Z3 G dd de"ej4Z5G dd dej4Z6G dd dej4Z7G dd dej8j9Z:G dd de,Z;dd Z<dS )    N)linkreportertraining)convert)softmax_cross_entropy)
extensions)MakeSymlinkToBestModelParallelSentenceIteratorcompute_perplexitycount_tokensread_tokens)LMInterface)dynamic_import_optimizer)ChainerSchedulerdynamic_import_scheduler)set_deterministic_chainer)BaseEvaluator)ShufflingEnabler)TensorboardLogger)check_early_stopset_early_stopc                   @   s   e Zd ZdZedd ZdS )DefaultRNNLMzDefault RNNLM wrapper to compute reduce framewise loss values.

    Args:
        n_vocab (int): The size of the vocabulary
        args (argparse.Namespace): configurations. see `add_arguments`
    c                 C   sF   | j dtddddgdd | j ddtd	d
d | j ddtddd | S )Nz--typelstm?gruzWhich type of RNN to use)typedefaultnargschoiceshelpz--layerz-l   zNumber of hidden layers)r   r   r    z--unitz-ui  zNumber of hidden units)add_argumentstrint)parser r&   P/home/ubuntu/.local/lib/python3.10/site-packages/espnet/lm/chainer_backend/lm.pyadd_arguments6   s   

zDefaultRNNLM.add_argumentsN)__name__
__module____qualname____doc__staticmethodr(   r&   r&   r&   r'   r   .   s    r   c                       s@   e Zd ZdZejdf fdd	Zdd Zdd Zd	d
 Z  Z	S )ClassifierWithStatezA wrapper for a chainer RNNLM

    :param link.Chain predictor : The RNNLM
    :param function lossfun: The loss function to use
    :param int/str label_key:
    c                    sx   t |ttfstdt| tt|   || _d | _	d | _
|| _|   || _W d    d S 1 s5w   Y  d S )Nz'label_key must be int or str, but is %s)
isinstancer$   r#   	TypeErrorr   superr.   __init__lossfunyloss	label_key
init_scope	predictor)selfr9   r4   r7   	__class__r&   r'   r3   Q   s   
"zClassifierWithState.__init__c                 O   s  t | jtrBt| | j  krt|k s n d| j }t||| j }| jdkr1|dd }n.|d| j || jd d  }nt | jtr_| j|vrVd| j }t||| j }|| j= d| _d| _| j|g|R i |\}| _| 	| j|| _|| jfS )a  Computes the loss value for an input and label pair.

            It also computes accuracy and stores it to the attribute.
            When ``label_key`` is ``int``, the corresponding element in ``args``
            is treated as ground truth labels. And when it is ``str``, the
            element in ``kwargs`` is used.
            The all elements of ``args`` and ``kwargs`` except the groundtruth
            labels are features.
            It feeds features to the predictor and compare the result
            with ground truth labels.

        :param state : The LM state
        :param list[chainer.Variable] args : Input minibatch
        :param dict[chainer.Variable] kwargs : Input minibatch
        :return loss value
        :rtype chainer.Variable
        zLabel key %d is out of boundsr/   N   zLabel key "%s" is not found)
r0   r7   r$   len
ValueErrorr#   r5   r6   r9   r4   )r:   stateargskwargsmsgtr&   r&   r'   __call__c   s&   "


"



zClassifierWithState.__call__c                 C   s@   t | jdr| jjr| ||S | ||\}}|t|jfS )zPredict log probabilities for given state and input x using the predictor

        :param state : the state
        :param x : the input
        :return a tuple (state, log prob vector)
        :rtype cupy/numpy array
        
normalized)hasattrr9   rF   Flog_softmaxdata)r:   r@   xzr&   r&   r'   predict   s   zClassifierWithState.predictc                 C   s   t | jdr| j|S dS )zPredict final log probabilities for given state using the predictor

        :param state : the state
        :return log probability vector
        :rtype cupy/numpy array

        finalg        )rG   r9   rN   )r:   r@   r&   r&   r'   rN      s   zClassifierWithState.final)
r)   r*   r+   r,   r   r3   rE   rM   rN   __classcell__r&   r&   r;   r'   r.   I   s    
)r.   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )RNNLMzA chainer RNNLM

    :param int n_vocab: The size of the vocabulary
    :param int n_layers: The number of layers to create
    :param int n_units: The number of units per layer
    :param str type: The RNN type
    r   c                    s   t t|   |  7 t| | _|dkr%tj fddt	|D  ntj fddt	|D  | _
t || _W d    n1 sEw   Y  |  D ]}tjdd|jj|jd< qN|| _ | _|| _d S )Nr   c                       g | ]}t   qS r&   )LStatelessLSTM.0_n_unitsr&   r'   
<listcomp>       z"RNNLM.__init__.<locals>.<listcomp>c                    rQ   r&   )rR   StatelessGRUrT   rW   r&   r'   rY      rZ   gg?.)r2   rP   r3   r8   DLEmbedIDembedchainer	ChainListrangernnrR   LinearloparamsnprandomuniformrJ   shapen_layersrX   typ)r:   n_vocabrj   rX   rk   paramr;   rW   r'   r3      s$   
	
zRNNLM.__init__c           	   	   C   sv  |d u r| j dkrd g| j d g| j d}ndd g| j i}d g| j }| |}| j dkr~d g| j }| jd |d d |d d t|\|d< |d< tjd| jD ] }| j| |d | |d | t||d  \||< ||< qW||d}n|d d d u r| j	}t
jj| j t
|j|jd | jf|jd|d d< W d    n1 sw   Y  | jd |d d t||d< tjd| jD ]Y}|d | d u r| j	}t
jj| j% t
|j||d  jd | jf||d  jd|d |< W d    n	1 sw   Y  | j| |d | t||d  ||< qd|i}| t|d }||fS )	Nr   )chro   r   rn   r=   )dtyper/   )rk   rj   r^   rb   rH   dropoutsixmovesra   xpr_   backendscudaget_device_from_id
_device_idVariablezerosri   rX   rp   rd   )	r:   r@   rK   ro   embrn   nrt   r5   r&   r&   r'   rE      sH   


4$"",zRNNLM.__call__)r   )r)   r*   r+   r,   r3   rE   rO   r&   r&   r;   r'   rP      s    rP   c                       (   e Zd ZdZ fddZdd Z  ZS )BPTTUpdaterzAn updater for a chainer LM

    :param chainer.dataset.Iterator train_iter : The train iterator
    :param optimizer:
    :param schedulers:
    :param int device : The device id
    :param int accum_grad :
    c                    s,   t t| j|||d t||| _|| _d S Ndevice)r2   r~   r3   r   	scheduler
accum_grad)r:   
train_iter	optimizer
schedulersr   r   r;   r&   r'   r3      s   
zBPTTUpdater.__init__c                 C   sT  |  d}| d}d}d}|j  t| jD ]r}| }tj|| j	dd\}}t
jj|}	d}
d }|j\}}tj|D ]4}||t
|d d |f t
|d d |f \}}|	|d d |f }|
|| 7 }
|t|7 }q?|
|| j  }
|t|
j7 }|
  |
  qtd|i|j td|i|j |  | j| j d S )Nmainr   r   r/   r   paddingr6   count)get_iteratorget_optimizertarget
cleargradsra   r   __next__r   concat_examplesr   r_   ru   rv   get_array_moduleri   rr   rs   ry   count_nonzeror$   floatrJ   backwardunchain_backwardr   reportupdater   step	iteration)r:   r   r   r   sum_lossrV   batchrK   rD   rt   r6   r@   
batch_sizesequence_lengthi
loss_batch	non_zerosr&   r&   r'   update_core   s6   



*
zBPTTUpdater.update_core)r)   r*   r+   r,   r3   r   rO   r&   r&   r;   r'   r~      s    	r~   c                       r}   )LMEvaluatorzA custom evaluator for a chainer LM

    :param chainer.dataset.Iterator val_iter : The validation iterator
    :param eval_model : The model to evaluate
    :param int device : The device id to use
    c                    s   t t| j|||d d S r   )r2   r   r3   )r:   val_iter
eval_modelr   r;   r&   r'   r3   0  s   zLMEvaluator.__init__c                 C   s  |  d}| d}d}d}t|D ]O}tj|| jdd\}}tjj	|}d }	t
jt|d D ].}
||	|d d |
f |d d |
f \}	}||d d |
f }||j| 7 }|t|7 }q3qi }t| tdt|| i| W d    |S 1 sw   Y  |S )Nr   r   r   r   r6   )r   
get_targetcopyr   r   r   r_   ru   rv   r   rr   rs   ra   r>   r   rJ   r$   r   report_scoper   r   )r:   r   r   r6   r   r   rK   rD   rt   r@   r   r   r   observationr&   r&   r'   evaluate3  s*   

(
zLMEvaluator.evaluate)r)   r*   r+   r,   r3   r   rO   r&   r&   r;   r'   r   (  s    r   c              	      sr   j dkr	tdtdtj  t  tjjst	d tjj
s't	d  jd } jd }t j j}t j j}t||\}}t||\}}tdt j  td	tt|  td
t|  td|| d   tdtt|  tdt|  td|| d    jdkp jdk}	t| j j|||	 d}
t| j j||dd}tt|
j j }td|  tdt j|   t j j j j}t|} j dkrt	d  j dkrd}tj!|"  |#  nd} j$d }t%|d }td|  |&t'j(t) dddd*d W d    n	1 s3w   Y  t+ j, j-}|.| } j/d u rNg }n
 fd!d" j/D }|0| |1tj23 j4 t5|
||| j}t6j7| jd#f j$d$}|8t9|||d% |8t:j;t< j=d&fd' |j8t:>g d( j=d&fd) |8t:j? j=d* |8t:j@d+d, |8t:A|d- |8tBd.d/ |	r|j8tC|
g jdkr؈ jn jd#fd)  jDrtd0 jD  tjEF jD| tG| dd1  jHd ur. jHd2kr.zdd3lImJ} W n tKy   tLd4  w | jH}|j8tM| j=d&fd) |N  tO| j  jPrtd5 tjEF j$d6 | t jP j}t||\}}td7tt|  td8t|  td9|| d   t| j j||dd}t9|||d%}tQd:d | }W d    n	1 sw   Y  td;ttRStT|d<   d S d S )=zPTrain with the given args

    :param Namespace args: The program arguments
    r   z/chainer backend does not support --model-modulezchainer version = zcuda is not availablezcudnn is not availablez<unk>z<eos>z	#vocab = z"#sentences in the training data = z#tokens in the training data = z'oov rate in the training data = %.2f %%d   z$#sentences in the validation data = z!#tokens in the validation data = z)oov rate in the validation data = %.2f %%r/   r   )
max_lengthsoseosshuffleF)r   r   r   repeatz#iterations per epoch = %dz#total iterations = r=   z6currently, multi-gpu is not supported. use single gpu.z/model.jsonwbzwriting a model config file to    T)indentensure_ascii	sort_keysutf_8Nc                    s   g | ]\}}t || qS r&   r   )rU   kvrA   r&   r'   rY     s    ztrain.<locals>.<listcomp>epoch)outr   r   )postprocesstrigger)r   r   
perplexityval_perplexityelapsed_time)r   )update_intervalzsnapshot.ep.{.updater.epoch})filenamezrnnlm.model.{.updater.epoch}zvalidation/main/losszrnnlm.modelzresumed from %s)is_lm )SummaryWriterzPlease install tensorboardxztest the best modelz/rnnlm.model.bestz#sentences in the test data = z#tokens in the test data = z#oov rate in the test data = %.2f %%trainztest perplexity: z	main/loss)Umodel_moduleNotImplementedErrorlogginginfor_   __version__r   rv   	availablewarningcudnn_enabledchar_list_dictr   train_labelvalid_labelr   r#   rl   r>   	sortagradr	   	batchsizemaxlenr$   batch_indicesr   r   rP   layerunitr   r.   ngpurw   useto_gpuoutdiropenwritejsondumpsvarsencoder   optbackend	from_argsr   setupadd_hookr   GradientClippinggradclipr~   r   Trainerextendr   r   	LogReportr
   report_interval_itersPrintReportProgressBarsnapshotsnapshot_objectr   r   resumeserializersload_npzr   tensorboard_dirtensorboardXr   	Exceptionerrorr   runr   
test_labelusing_configrf   expr   )rA   unkr   r   valn_train_tokensn_train_oovsn_val_tokens
n_val_oovsuse_sortagradr   r   epoch_itersrb   modelgpu_id
model_conff	opt_classr   r   updatertrainerr   writertestn_test_tokensn_test_oovs	test_iter	evaluatorresultr&   r   r'   r   H  s   









	



$r   )=r   r   r   r_   chainer.functions	functionsrH   chainer.linkslinksrR   numpyrf   rr   r   r   r   chainer.datasetr   chainer.functions.lossr   chainer.trainingr   2espnet.nets.chainer_backend.deterministic_embed_idnetschainer_backenddeterministic_embed_idr\   espnet.lm.lm_utilsr   r	   r
   r   r   espnet.nets.lm_interfacer   espnet.optimizer.factoryr   espnet.scheduler.chainerr   espnet.scheduler.schedulerr    espnet.utils.deterministic_utilsr   espnet.utils.training.evaluatorr   espnet.utils.training.iteratorsr   (espnet.utils.training.tensorboard_loggerr   !espnet.utils.training.train_utilsr   r   Chainr   r.   rP   updatersStandardUpdaterr~   r   r   r&   r&   r&   r'   <module>   s:   	`F9 