o
    i M                     @   s0  d Z ddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZmZmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZm Z  dd
l!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z.m/Z/ G dd de0Z1dd Z2dd Z3dS )z;Training/decoding definition for the text translation task.    N)training)
extensions)	CompareValueTriggeradadelta_eps_decayadam_lr_decayadd_results_to_jsonrestore_snapshotsnapshot_object
torch_loadtorch_resumetorch_snapshot)CustomEvaluatorCustomUpdaterload_trained_model)MTInterface)pad_list)ChainerDataLoaderTransformDataset)set_deterministic_pytorch)dynamic_import)LoadInputsAndTargets)make_batchset)ShufflingEnabler)TensorboardLogger)check_early_stopset_early_stopc                   @   s*   e Zd ZdZdd ZedfddZdS )CustomConverterz#Custom batch converter for Pytorch.c                 C   s   d| _ d| _dS )z#Construct a CustomConverter object.r   N)	ignore_idpad)self r!   P/home/ubuntu/.local/lib/python3.10/site-packages/espnet/mt/pytorch_backend/mt.py__init__2   s   
zCustomConverter.__init__cpuc                 C   s   t |dksJ |d \}}tdd |D }tdd |D | j|}t||}tdd |D | j|}|||fS )zTransform a batch and send it to a device.

        Args:
            batch (list): The batch to transform.
            device (torch.device): The device to send to.

        Returns:
            tuple(torch.Tensor, torch.Tensor, torch.Tensor)

           r   c                 S   s   g | ]}|j d  qS )r   )shape.0xr!   r!   r"   
<listcomp>K   s    z,CustomConverter.__call__.<locals>.<listcomp>c                 S      g | ]	}t | qS r!   torch
from_numpylongr'   r!   r!   r"   r*   N       c                 S   r+   r!   r,   )r(   yr!   r!   r"   r*   P   r0   )	lennparrayr   r   tor-   r.   r   )r    batchdevicexsysilensxs_padys_padr!   r!   r"   __call__;   s   
zCustomConverter.__call__N)__name__
__module____qualname____doc__r#   r-   r7   r=   r!   r!   r!   r"   r   /   s    	r   c                    sZ
  t |  tj std t| jd}t	|d }W d   n1 s&w   Y  t
| }t||d  d d d d }t||d  d d d d }td	t|  td
t|  t| j}|||| }t|tsuJ tj| jst| j | jd }t|d#}td|  |tj||t| fddddd W d   n1 sw   Y  tt|  D ]}	td|	 d tt| |	   q|j| jdkr| jdkrtd| j| j| j f  |  j| j9  _t | jdkrdnd}
| j!dv rt"t| j!}ntj#}|j$|
|d}td%t&dd |' D t&dd |' D t&dd |' D d t&d d |' D   | j(d!kr`tj)j*|' d"| j+| j,d#}n4| j(d$krttj)j-|' | j.| j,d%}n | j(d&krdd'l/m0} ||' | j1| j2| j3}nt4d(| j( | j!d)v rzdd*l5m6} W n t7y } zt8d+| j! d, |d}~ww | j(d&kr|j9||j:| j!d-\}|_:n|j9||| j!d-\}}d}nd}t;|d. t;|d/fd0d1 t<  t| j=d}t	|d }W d   n	1 sw   Y  t| jd}t	|d }W d   n	1 s*w   Y  | j>d2kp9| j>dk}t?|| j| j@| jA| jB| jdkrM| jnd|| jC| jD| jE| jF| jGdddd3}t?|| j| j@| jA| jB| jdkrr| jnd| jC| jD| jE| jF| jGdddd4}tHd5dd6tHd5dd6tItJ| fd7d1d| jK| d8d1 d9}tItJ| fd:d1ddd;d1 | jKd<}tL|| jMd=|i||
| jd| jN|d>	}tOjP|| jQd?f| jd@}|r|jRtS|g| j>d2kr| j>n| jQd?fdA | jTrtdB| jT  tU| jT| | jVdkr|jRtW|d=|i|
| j| jVdCfdA n|RtW|d=|i|
| j | jXdkrmtt
|Y d| jX dDd1 ddE}tZ|dFrP|j[j\}|j[j]}n|j\}|j]}|||| jdG  |
dddH}|jR|dIdA nd}|Rt^j_dJdKgd?dLdM |Rt^j_dNdOgd?dPdM |Rt^j_dQdRgd?dSdM |Rt^j_dTdUgd?dVdM |jRt`|dWtOjabdKdA |jRt`|dXtOjacdOdA | jVdkr|jRtddYdZ| jVdCfdA n|jRtd dIdA | j(d!kr5| jed[kr|jRtf|| jd\ tgd]thdOd^d1 dA |jRti| jjthdOd_d1 dA n| jed`kr4|jRtf|| jda tgd]thdKdbd1 dA |jRti| jjthdKdcd1 dA nY| j(d$kr| jed[kre|jRtf|| jd\ tgd]thdOddd1 dA |jRtk| jlthdOded1 dA n)| jed`kr|jRtf|| jda tgd]thdKdfd1 dA |jRtk| jlthdKdgd1 dA |Rt^jm| jndCfdA g dh}| j(d!kr|jRt^odidjd1 | jndCfdA |pdi n| j(dkv r|jRt^odldmd1 | jndCfdA |pdl | jqr|pdT |pdU |jRt^r|| jndCfdA |Rt^js| jndn tt||  | judur!| judokr!ddplvmw} |jRtx|| ju|| jndCfdA |y  tz|| jQ dS )qz\Train with the given args.

    Args:
        args (namespace): The program arguments.

    zcuda is not availablerbuttsNr   outputr%   r&   z#input dims : z#output dims: z/model.jsonwbzwriting a model config file to    FTindentensure_ascii	sort_keysutf_8zARGS: z: z0batch size is automatically increased (%d -> %d)cudar$   )float16float32float64)r7   dtypez6num. model params: {:,} (num. trained: {:,} ({:.1f}%))c                 s       | ]}|  V  qd S Nnumelr(   pr!   r!   r"   	<genexpr>       ztrain.<locals>.<genexpr>c                 s       | ]
}|j r| V  qd S rR   requires_gradrT   rU   r!   r!   r"   rW          c                 s   rY   rR   rZ   rU   r!   r!   r"   rW      r\   g      Y@c                 s   rQ   rR   rS   rU   r!   r!   r"   rW      rX   adadeltagffffff?)rhoepsweight_decayadam)lrr`   noam)get_std_optzunknown optimizer: )O0O1O2O3)ampz+You need to install apex for --train-dtype z*. See https://github.com/NVIDIA/apex#linux)	opt_leveltarget	serializec                    s
     | S rR   )rl   )s)reporterr!   r"   <lambda>      
 ztrain.<locals>.<lambda>r   )
min_batch_sizeshortest_firstcount
batch_binsbatch_frames_inbatch_frames_outbatch_frames_inoutmtiaxisoaxis)	rq   rs   rt   ru   rv   rw   rx   ry   rz   rx   )modeload_outputc                        | gS rR   r!   data)	converterload_trr!   r"   ro          c                 S      | d S Nr   r!   r)   r!   r!   r"   ro         )dataset
batch_sizenum_workersshuffle
collate_fnc                    r}   rR   r!   r~   )r   load_cvr!   r"   ro     r   c                 S   r   r   r!   r   r!   r!   r"   ro   
  r   )r   r   r   r   r   main)use_apexepoch)out)triggerzresumed from %s	iterationc                 S   s   t | d d d d d S )Nr%   rD   r   r&   )intr   r!   r!   r"   ro   7  s    )keyreversemodulez/att_ws)r   	transformr7   ikeyry   )r%   r   	main/lossvalidation/main/losszloss.png)	file_namemain/accvalidation/main/acczacc.pngmain/pplvalidation/main/pplzppl.pngz	main/bleuzvalidation/main/bleuzbleu.pngzmodel.loss.bestzmodel.acc.bestz"snapshot.iter.{.updater.iteration})filenameaccz/model.acc.best)load_fnc                 S      | |kS rR   r!   
best_valuecurrent_valuer!   r!   r"   ro     r   c                 S   r   rR   r!   r   r!   r!   r"   ro     r   lossz/model.loss.bestc                 S      | |k S rR   r!   r   r!   r!   r"   ro     r   c                 S   r   rR   r!   r   r!   r!   r"   ro     r   c                 S   r   rR   r!   r   r!   r!   r"   ro     r   c                 S   r   rR   r!   r   r!   r!   r"   ro     r   c                 S   r   rR   r!   r   r!   r!   r"   ro     r   c                 S   r   rR   r!   r   r!   r!   r"   ro     r   )	r   r   r   r   r   r   r   r   elapsed_timer_   c                 S      | j djd d S )Nr   r   r_   updaterget_optimizerparam_groupstrainerr!   r!   r"   ro         )ra   rc   rb   c                 S   r   )Nr   r   rb   r   r   r!   r!   r"   ro     r   )update_interval )SummaryWriter){r   r-   rL   is_availableloggingwarningopen
valid_jsonjsonloadlistkeysr   infostrr   model_module
isinstancer   ospathexistsoutdirmakedirswritedumpsvarsencodesortedrn   ngpur   r7   train_dtypegetattrrN   r5   formatsum
parametersoptoptimAdadeltar_   r`   Adamrb   1espnet.nets.pytorch_backend.transformer.optimizerrd   adimtransformer_warmup_stepstransformer_lrNotImplementedErrorapexri   ImportErrorerror
initialize	optimizersetattrr   
train_json	sortagradr   	maxlen_in
maxlen_outminibatchesbatch_countrt   ru   rv   rw   r   r   r   n_iter_processesr   	grad_clip
accum_gradr   Trainerepochsextendr   resumer   save_interval_itersr   num_save_attentionitemshasattrr   calculate_all_attentionsattention_plot_classr   
PlotReportr	   triggersMinValueTriggerMaxValueTriggerr   	criterionr   r
   r   r   	eps_decayr   lr_decay	LogReportreport_interval_itersobserve_valueappendreport_bleuPrintReportProgressBarr   tensorboard_dirtorch.utils.tensorboardr   r   runr   )argsfr   rC   idimodimmodel_classmodel
model_confr   r7   rP   r   rd   ri   er   r   use_sortagradtrainvalid
train_iter
valid_iterr   r   r   
att_vis_fn
plot_classatt_reporterreport_keysr   r!   )r   r   r   rn   r"   r  W   s  

  

$



	







						
	


r  c              	      s  t |  t| j\}}t|tsJ | |_| jdkr.tt| j}t	
dt|  |  t| jd}t|d W d   n1 sFw   Y  i }|jrZdd  D n	dd  D | jd	krt A t dD ]1\}}t	
d
| |t  | d d d  g}||| |j}	t| |	|j||< qtW d   n1 sw   Y  n{ddd}
t fddD  ttt  fddd}fdd|D t C |
| jdD ]3}dd |D }fdd|D }||| |j}	t|	D ]\}}|| }t| ||j||< qqW d   n	1 s,w   Y  t| jd}| tj!d|idddd"d W d   dS 1 sUw   Y  dS )z]Decode with the given args.

    Args:
        args (namespace): The program arguments.

    r%   zgpu id: rB   rC   Nc                 S   sF   i | ]\}}|d  d d d dkr|d  d d d dkr||qS rD   r   r&   r%   r!   r(   kvr!   r!   r"   
<dictcomp>  
    0ztrans.<locals>.<dictcomp>c                 S   sF   i | ]\}}|d  d d d dkr|d  d d d dkr||qS r  r!   r  r!   r!   r"   r    r  r   z(%d/%d) decoding rD   tokenidc                 S   s   t |g|  }tj|d|iS )N	fillvalue)iter	itertoolszip_longest)niterabler  kargsr!   r!   r"   grouper,  s   ztrans.<locals>.grouperc                    s$   g | ]} | d  d d d qS )rD   r%   r&   r   r!   )r(   r   jsr!   r"   r*   2  s   $ ztrans.<locals>.<listcomp>c                    s
    |   S rR   r!   )i)	feat_lensr!   r"   ro   3  rp   ztrans.<locals>.<lambda>)r   c                    s   g | ]} | qS r!   r!   )r(   r'  )r   r!   r"   r*   4      c                 S   s   g | ]}|r|qS r!   r!   r(   namer!   r!   r"   r*   8  r)  c                    s6   g | ]}t jtt | d  d d  t jdqS )rD   r%   r  )rP   )r3   fromitermapr   splitint64r*  r%  r!   r"   r*   9  s    rE   rF   FTrG   rK   rR   )#r   r   r
  r   r   
trans_argsr   r   ranger   r   r   rL   r   
trans_jsonr   r   multilingualr   	batchsizer-   no_grad	enumerater   r2   r.  	translate	char_listr   r   translate_batchresult_labelr   r   r   )r  r
  
train_argsgpu_idr  new_jsidxr+  feat
nbest_hypsr$  sorted_indexnamesfeatsr'  	nbest_hypr!   )r(  r&  r   r"   trans  s~   






$rE  )4rA   r  r   r   r   numpyr3   r-   chainerr   chainer.trainingr   espnet.asr.asr_utilsr   r   r   r   r   r	   r
   r   r   espnet.asr.pytorch_backend.asrr   r   r   espnet.nets.mt_interfacer   #espnet.nets.pytorch_backend.e2e_asrr   espnet.utils.datasetr   r    espnet.utils.deterministic_utilsr   espnet.utils.dynamic_importr   espnet.utils.io_utilsr   espnet.utils.training.batchfyr   espnet.utils.training.iteratorsr   (espnet.utils.training.tensorboard_loggerr   !espnet.utils.training.train_utilsr   r   objectr   r  rE  r!   r!   r!   r"   <module>   s6   ,(   '