o
    i:                     @   sh  d Z ddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZmZmZmZmZ dd	lmZm Z  dd
l!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0m1Z1 dd Z2G dd de
Z3dddZ4G dd dej5Z6G dd de*Z7dd Z8dS )zLM training in pytorch.    N)Chainreportertraining)convert)
extensions)data_parallel)snapshot_object
torch_loadtorch_resumetorch_snapshot)MakeSymlinkToBestModelParallelSentenceIteratorcount_tokensload_datasetread_tokens)LMInterfacedynamic_import_lm)dynamic_import_optimizer)PyTorchSchedulerdynamic_import_scheduler)set_deterministic_pytorch)BaseEvaluator)ShufflingEnabler)TensorboardLogger)check_early_stopset_early_stopc                 C   sD   t | d | d  | d< d| v r t | d | d  | d< dS dS )zgCompute and add the perplexity to the LogReport.

    :param dict result: The current observations
    zmain/nllz
main/count
perplexityzvalidation/main/nllzvalidation/main/countval_perplexityN)npexp)result r"   P/home/ubuntu/.local/lib/python3.10/site-packages/espnet/lm/pytorch_backend/lm.pycompute_perplexity-   s   r$   c                   @   s   e Zd ZdZdd ZdS )Reporterz&Dummy module to use chainer's trainer.c                 C   s   dS )zReport nothing.Nr"   )selflossr"   r"   r#   report=   s   zReporter.reportN)__name__
__module____qualname____doc__r(   r"   r"   r"   r#   r%   :   s    r%   c                 C   sR   t j| |d\}}t|}t|}|dur%|dkr%||}||}||fS )a  Concat examples in minibatch.

    :param np.ndarray batch: The batch to concatenate
    :param int device: The device to send to
    :param Tuple[int,int] padding: The padding to use
    :return: (inputs, targets)
    :rtype (torch.Tensor, torch.Tensor)
    )paddingNr   )r   concat_examplestorch
from_numpycuda)batchdevicer-   xtr"   r"   r#   r.   B   s   	



r.   c                       s0   e Zd ZdZ			d	 fdd	Zdd Z  ZS )
BPTTUpdaterzAn updater for a pytorch LM.NF   c	           	         s@   t t| || || _|| _|| _|| _t||| _|| _	dS )aQ  Initialize class.

        Args:
            train_iter (chainer.dataset.Iterator): The train iterator
            model (LMInterface) : The model to update
            optimizer (torch.optim.Optimizer): The optimizer for training
            schedulers (espnet.scheduler.scheduler.SchedulerInterface):
                The schedulers of `optimizer`
            device (int): The device id
            gradclip (float): The gradient clipping value to use
            use_apex (bool): The flag to use Apex in backprop.
            accum_grad (int): The number of gradient accumulation.

        N)
superr6   __init__modelr3   gradclipuse_apexr   	scheduler
accum_grad)	r&   
train_iterr:   	optimizer
schedulersr3   r;   r<   r>   	__class__r"   r#   r9   W   s   
zBPTTUpdater.__init__c              	   C   s  |  d}| d}| j  dddd}t| jD ]}| }t|| jd dd\}}| jd dkr=| ||\}}	}
nt	| j||f| j\}}	}
|
 | j }| jruddlm} |||}|  W d	   n1 sow   Y  n|  |d
  t|7  < |d  t|	 7  < |d  t|
 7  < q| D ]\}}t||i|j q| jd	urtj| j | j |  | jj| jd d	S )zUpdate the model.maing        r   )r'   nllcountr   ir3   r-   ampNr'   rE   rF   )n_iter)get_iteratorget_optimizerr:   	zero_gradranger>   __next__r.   r3   r   meanr<   apexrK   
scale_lossbackwardfloatsumintitemsr   r(   targetr;   nnutilsclip_grad_norm_
parametersstepr=   	iteration)r&   r?   r@   accum_r2   r4   r5   r'   rE   rF   rK   scaled_losskvr"   r"   r#   update_corey   s6   




zBPTTUpdater.update_core)NFr7   )r)   r*   r+   r,   r9   rf   __classcell__r"   r"   rB   r#   r6   T   s    	"r6   c                       s(   e Zd ZdZ fddZdd Z  ZS )LMEvaluatorz$A custom evaluator for a pytorch LM.c                    s&   t t| j||dd || _|| _dS )a  Initialize class.

        :param chainer.dataset.Iterator val_iter : The validation iterator
        :param LMInterface eval_model : The model to evaluate
        :param chainer.Reporter reporter : The observations reporter
        :param int device : The device id to use

        rI   r3   N)r8   rh   r9   r:   r3   )r&   val_iter
eval_modelr   r3   rB   r"   r#   r9      s   	
zLMEvaluator.__init__c                 C   sV  |  d}d}d}d}| j  t R t|D ]D}t|| jd dd\}}| jd dkr9| ||\}}	}
nt| j||f| j\}}	}
|t	|
 7 }|t	|	
 7 }|t|

 7 }qW d   n1 siw   Y  | j  i }t|' td|i| jj td|i| jj td	|i| jj W d   |S 1 sw   Y  |S )
zEvaluate the model.rD   r   rG   rH   rI   Nr'   rE   rF   )rM   r:   evalr/   no_gradcopyr.   r3   r   rV   rW   rX   trainr   report_scoper(   )r&   rj   r'   rE   rF   r2   r4   r5   lncobservationr"   r"   r#   evaluate   s4   




zLMEvaluator.evaluate)r)   r*   r+   r,   r9   ru   rg   r"   r"   rB   r#   rh      s    rh   c           $         s<  t  j j}t|tsJ dtdtj  t	  tj
 s&td  jd } jd }t j j j\}}}t j j j\}}}	tdt j  tdtt|  tdt|  td	|	| d
   tdtt|  tdt|  td|| d
    jdkp jdk}
 jt jd }| j  jkrtd j d| j  d t|| j|||
 d}t|| j||dd}tt|j j }td|  tdt j|    jdv rt t j}ntj!}| j j"|d} jdkr|"d t#t$ j}ndg} j%d }t&|d }td|  |'t(j)t* ddd d!+d" W d#   n	1 sLw   Y  td$,t-d%d& |. D t-d'd& |. D t-d(d& |. D d) t-d*d& |. D   t/ j0 j}|1|.  } j2d#u rg }n
 fd+d, j2D } jd-v rzdd.l3m4} W n t5y } zt6d/ j d0 |d#}~ww |j7|| jd1\}}d }nd}t8 t9|d2 t9|d3 t9|d4fd5d6 t:||||| j;| jd7}t<j=| jd8f j%d9}|>t?|||d: |>t@jAtB jCd;fd< |j>t@Dg d= jCd;fd> |>t@jE jCd? |>tFd@dA |>tG|dB |>tHdCdD |
rr|j>tI|g jdkrk jn jd8fd>  jJrtdE jJ  tK jJ| tL| d dF  jMd#ur jMdGkrddHlNmO} | jM}|j>tP| jCd;fd> |Q  tR| j  jSrtdI tT j%dJ | tU jS j}tV||\}} tdKtt|  tdLt|  tdM| | d
   t|| j||dd}!t?|!||d:}"|" }#tB|# tdN|#dO   d#S d#S )PzTrain with the given args.

    :param Namespace args: The program arguments
    :param type model_class: LMInterface class for training
    z"model should implement LMInterfaceztorch version = zcuda is not availablez<unk>z<eos>z	#vocab = z"#sentences in the training data = z#tokens in the training data = z'oov rate in the training data = %.2f %%d   z$#sentences in the validation data = z!#tokens in the validation data = z)oov rate in the validation data = %.2f %%rI   r   r7   z'batch size is automatically increased (z -> ))
max_lengthsoseosshuffleF)rx   ry   rz   repeatz#iterations per epoch = %dz#total iterations = )float16float32float64)dtyper1   z/model.jsonwbzwriting a model config file to    T)indentensure_ascii	sort_keysutf_8Nz6num. model params: {:,} (num. trained: {:,} ({:.1f}%))c                 s       | ]}|  V  qd S Nnumel.0pr"   r"   r#   	<genexpr>&      ztrain.<locals>.<genexpr>c                 s       | ]
}|j r| V  qd S r   requires_gradr   r   r"   r"   r#   r   '      c                 s   r   r   r   r   r"   r"   r#   r   (  r   g      Y@c                 s   r   r   r   r   r"   r"   r#   r   *  r   c                    s   g | ]\}}t || qS r"   r   )r   rd   re   )argsr"   r#   
<listcomp>4  s    ztrain.<locals>.<listcomp>)O0O1O2O3rJ   z+You need to install apex for --train-dtype z*. See https://github.com/NVIDIA/apex#linux)	opt_levelr   rZ   	serializec                    s
     | S r   )r   )s)r   r"   r#   <lambda>I  s   
 ztrain.<locals>.<lambda>)r;   r<   r>   epoch)outri   r`   )postprocesstrigger)r   r`   z	main/lossr   r   elapsed_time)r   )update_intervalzsnapshot.ep.{.updater.epoch})filenamezrnnlm.model.{.updater.epoch}zvalidation/main/losszrnnlm.modelzresumed from %s)is_lm )SummaryWriterztest the best modelz/rnnlm.model.bestz#sentences in the test data = z#tokens in the test data = z#oov rate in the test data = %.2f %%ztest perplexity: r   )Wr   model_modulebackend
issubclassr   logginginfor/   __version__r   r1   is_availablewarningchar_list_dictr   valid_labeldump_hdf5_pathtrain_labelstrn_vocablen	sortagrad	batchsizemaxngpur>   r   maxlenrX   batch_indicesr   train_dtypegetattrr~   tolistrP   outdiropenwritejsondumpsvarsencodeformatrW   r^   r   opt	from_argsrA   rS   rK   ImportErrorerror
initializer%   setattrr6   r;   r   Trainerextendrh   r   	LogReportr$   report_interval_itersPrintReportProgressBarr   r   r   r   resumer
   r   tensorboard_dirtorch.utils.tensorboardr   r   runr   
test_labelr	   r   r   )$r   model_classunkrz   valn_val_tokens
n_val_oovsro   n_train_tokensn_train_oovsuse_sortagrad
batch_sizer?   rj   epoch_itersr   r:   gpu_id
model_conff	opt_classr@   rA   rK   er<   updatertrainerr   writertestn_test_tokensn_test_oovs	test_iter	evaluatorr!   r"   )r   r   r#   ro      sH  













ro   )NN)9r,   rn   r   r   numpyr   r/   torch.nnr[   chainerr   r   r   chainer.datasetr   chainer.trainingr   torch.nn.parallelr   espnet.asr.asr_utilsr   r	   r
   r   espnet.lm.lm_utilsr   r   r   r   r   espnet.nets.lm_interfacer   r   espnet.optimizer.factoryr   espnet.scheduler.pytorchr   espnet.scheduler.schedulerr    espnet.utils.deterministic_utilsr   espnet.utils.training.evaluatorr   espnet.utils.training.iteratorsr   (espnet.utils.training.tensorboard_loggerr   !espnet.utils.training.train_utilsr   r   r$   r%   r.   StandardUpdaterr6   rh   ro   r"   r"   r"   r#   <module>   s8   
P,