o
    i=Y                     @   sH  d Z ddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZ dd
l m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2m3Z3 G dd deZdd Z4dd Z5dS )z=Training/decoding definition for the speech translation task.    N)training)
extensions)	CompareValueTriggeradadelta_eps_decayadam_lr_decayadd_results_to_jsonrestore_snapshotsnapshot_object
torch_loadtorch_resumetorch_snapshot)CustomConverter)CustomEvaluatorCustomUpdater)load_trained_modelload_trained_modules)pad_list)STInterface)ChainerDataLoaderTransformDataset)set_deterministic_pytorch)dynamic_import)LoadInputsAndTargets)make_batchset)ShufflingEnabler)TensorboardLogger)check_early_stopset_early_stopc                       s<   e Zd ZdZdejdf fdd	ZedfddZ  Z	S )	r   zCustom batch converter for Pytorch.

    Args:
        subsampling_factor (int): The subsampling factor.
        dtype (torch.dtype): Data type to convert.
        use_source_text (bool): use source transcription.

       Fc                    s   t  j||d || _dS )z#Construct a CustomConverter object.)subsampling_factordtypeN)super__init__use_source_text)selfr   r    r#   	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/espnet/st/pytorch_backend/st.pyr"   4   s   
zCustomConverter.__init__cpuc           
      C   s   t |dksJ |d \}}}tdd |D }t||}tdd |D dj|| jd}tdd |D | j|}| j	rQtdd |D | j|}	nd	}	||||	fS )
zTransform a batch and send it to a device.

        Args:
            batch (list): The batch to transform.
            device (torch.device): The device to send to.

        Returns:
            tuple(torch.Tensor, torch.Tensor, torch.Tensor)

        r   r   c                 S   s   g | ]}|j d  qS )r   )shape.0xr'   r'   r(   
<listcomp>K   s    z,CustomConverter.__call__.<locals>.<listcomp>c                 S   s   g | ]	}t | qS r'   )torch
from_numpyfloatr+   r'   r'   r(   r.   N   s    r    c                 S   "   g | ]}t tj|tjd qS r2   r/   r0   nparrayint64r,   yr'   r'   r(   r.   S      " c                 S   r3   r4   r5   r9   r'   r'   r(   r.   Y   r;   N)
lenr6   r7   r/   r0   tor   r    	ignore_idr#   )
r$   batchdevicexsysys_srcilensxs_padys_pad
ys_pad_srcr'   r'   r(   __call__;   s,   zCustomConverter.__call__)
__name__
__module____qualname____doc__r/   float32r"   r@   rH   __classcell__r'   r'   r%   r(   r   *   s
    
r   c           "         s  t |  tj std t| jd}t	|d }W d   n1 s&w   Y  t
| }t||d  d d d d }t||d  d	 d d d }td
t|  tdt|  | jdusm| jdurvt||| td}nt| j}|||| }t|tsJ | }tj| jst| j | jd }	t|	d#}td|	  |tj||t| fddddd W d   n1 sw   Y  t t|  D ]}
td|
 d tt| |
   q|j!| j"dkr| j#dkrtd| j#| j#| j" f  |  j#| j"9  _#t$| j"dkrdnd}| j%dv r)t&t| j%}ntj'}|j(||d}td)t*dd |+ D t*d d |+ D t*d!d |+ D d" t*d#d |+ D   | j,d$krztj-j.|+ d%| j/| j0d&}n4| j,d'krtj-j1|+ | j2| j0d(}n | j,d)krdd*l3m4} ||+ | j5| j6| j7}nt8d+| j, | j%d,v rzdd-l9m:} W n t;y } zt<d.| j% d/ |d}~ww | j,d)kr|j=||j>| j%d0\}|_>n|j=||| j%d0\}}d}nd}t?|d1 t?|d2fd3d4 t@|jAd || jBdkp| jCdkd5 t| jDd}t	|d }W d   n	1 s6w   Y  t| jd}t	|d }W d   n	1 sTw   Y  | jEdkpc| jEdk}tF|| j#| jG| jH| jI| j"dkrw| j"nd|| jJ| jK| jL| jM| jNddd6}tF|| j#| jG| jH| jI| j"dkr| j"nd| jJ| jK| jL| jM| jNddd7}tOd8d| jPd9did:tOd8d| jPd9did:tQtR| fd;d4d| jS| d<d4 d=}tQtR| fd>d4ddd?d4 | jSd@}tT|| jUdA|i||| j"| jV| jW|dB	}tXjY|| jZdCf| jdD}|r"|j[t\|g| jEdkr| jEn| jZdCfdE | j]r4tdF| j]  t^| j]| | j_dkrN|j[t`|dA|i|| j"| j_dGfdE n|[t`|dA|i|| j" | jadkrt t
|b d| ja dHd4 ddI}tc|dJr|jdje}|jdjf}n|je}|jf}|||| jdK  ||dL}|j[|dMdE nd}| jBdkr| jgdkr| jhdkrt t
|b d| jh dNd4 ddI}tc|dJr|jdji}|jdjj}n|ji}|jj}|||| jdO  ||dL}|j[|dMdE nd}|[tkjlg dPdCdQdR |[tkjlg dSdCdTdR |[tkjldUdVgdCdWdR |j[tm|dXtXjnodYdE |j[tm|dZtXjnpd[dE | j_dkrI|j[tqd\d]| j_dGfdE n|j[tq dMdE | j,d$kr| jrd^kr|j[ts|| jd_ ttd`tud[dad4 dE |j[tv| jwtud[dbd4 dE n| jrdckr|j[ts|| jdd ttd`tudYded4 dE |j[tv| jwtudYdfd4 dE nY| j,d'kr| jrd^kr|j[ts|| jd_ ttd`tud[dgd4 dE |j[tx| jytud[dhd4 dE n)| jrdckr|j[ts|| jdd ttd`tudYdid4 dE |j[tx| jytudYdjd4 dE |[tkjz| j{dGfdE g dk} | jBdkr$| |dl | |dm | dng7 } | j,d$krE|j[tk}dodpd4 | j{dGfdE | |do n| j,dqv r`|j[tk}drdsd4 | j{dGfdE | |dr | jBdkr| jgdkrv| |dt | |du | jgdk r| j~r| |dv | jr| |dw | jr| |dU | |dV |j[tk| | j{dGfdE |[tkj| j{dx t||  | jdur| jdykrddzlm}! |j[t|!| j||d{| j{dGfdE |  t|| jZ dS )|z\Train with the given args.

    Args:
        args (namespace): The program arguments.

    zcuda is not availablerbuttsNr   inputr*   outputz#input dims : z#output dims: )	interfacez/model.jsonwbzwriting a model config file to    FTindentensure_ascii	sort_keysutf_8zARGS: z: r   z0batch size is automatically increased (%d -> %d)cudar)   )float16rM   float64)r@   r    z6num. model params: {:,} (num. trained: {:,} ({:.1f}%))c                 s       | ]}|  V  qd S Nnumelr,   pr'   r'   r(   	<genexpr>       ztrain.<locals>.<genexpr>c                 s       | ]
}|j r| V  qd S r`   requires_gradrb   rc   r'   r'   r(   re          c                 s   rg   r`   rh   rc   r'   r'   r(   re      rj   g      Y@c                 s   r_   r`   ra   rc   r'   r'   r(   re      rf   adadeltagffffff?)rhoepsweight_decayadam)lrrn   noam)get_std_optzunknown optimizer: )O0O1O2O3)ampz+You need to install apex for --train-dtype z*. See https://github.com/NVIDIA/apex#linux)	opt_leveltarget	serializec                    s
     | S r`   )rz   )s)reporterr'   r(   <lambda>      
 ztrain.<locals>.<lambda>)r   r    r#   )	min_batch_sizeshortest_firstcount
batch_binsbatch_frames_inbatch_frames_outbatch_frames_inoutiaxisoaxis)r   r   r   r   r   r   r   r   asrtrain)modeload_outputpreprocess_confpreprocess_argsc                        | gS r`   r'   data)	converterload_trr'   r(   r}         c                 S      | d S Nr   r'   r-   r'   r'   r(   r}         )dataset
batch_sizenum_workersshuffle
collate_fnc                    r   r`   r'   r   )r   load_cvr'   r(   r}   "  r   c                 S   r   r   r'   r   r'   r'   r(   r}   %  r   )r   r   r   r   r   main)use_apexepoch)out)triggerzresumed from %s	iterationc                 S   s   t | d d d d d S )Nr   rQ   r   r*   intr   r'   r'   r(   r}   Q      )keyreversemodulez/att_ws)r   	transformr@   r   )r   r   c                 S   s   t | d d d d d S )Nr   rS   r   r*   r   r   r'   r'   r(   r}   l  r   z	/ctc_prob)	main/lossvalidation/main/lossmain/loss_asrvalidation/main/loss_asrzmain/loss_mtzvalidation/main/loss_mtmain/loss_stvalidation/main/loss_stzloss.png)	file_name)main/accvalidation/main/accmain/acc_asrvalidation/main/acc_asrzmain/acc_mtzvalidation/main/acc_mtzacc.pngz	main/bleuzvalidation/main/bleuzbleu.pngzmodel.loss.bestr   zmodel.acc.bestr   z"snapshot.iter.{.updater.iteration})filenameaccz/model.acc.best)load_fnc                 S      | |kS r`   r'   
best_valuecurrent_valuer'   r'   r(   r}     r   c                 S   r   r`   r'   r   r'   r'   r(   r}     r   lossz/model.loss.bestc                 S      | |k S r`   r'   r   r'   r'   r(   r}     r   c                 S   r   r`   r'   r   r'   r'   r(   r}     r   c                 S   r   r`   r'   r   r'   r'   r(   r}     r   c                 S   r   r`   r'   r   r'   r'   r(   r}     r   c                 S   r   r`   r'   r   r'   r'   r(   r}     r   c                 S   r   r`   r'   r   r'   r'   r(   r}     r   )
r   r   r   r   r   r   r   r   r   r   r   r   elapsed_timerm   c                 S      | j djd d S )Nr   r   rm   updaterget_optimizerparam_groupstrainerr'   r'   r(   r}         )ro   rq   rp   c                 S   r   )Nr   r   rp   r   r   r'   r'   r(   r}   %  r   zmain/cer_ctczvalidation/main/cer_ctczvalidation/main/cerzvalidation/main/wer)update_interval )SummaryWriter)att_reporterctc_reporter)r   r/   r\   is_availableloggingwarningopen
valid_jsonjsonloadlistkeysr   infostrenc_initdec_initr   r   r   model_module
isinstanceget_total_subsampling_factorospathexistsoutdirmakedirswritedumpsvarsencodesortedr|   ngpur   r@   train_dtypegetattrrM   r=   formatsum
parametersoptoptimAdadeltarm   rn   Adamrp   1espnet.nets.pytorch_backend.transformer.optimizerrr   adimtransformer_warmup_stepstransformer_lrNotImplementedErrorapexrw   ImportErrorerror
initialize	optimizersetattrr   	subsample
asr_weight	mt_weight
train_json	sortagradr   	maxlen_in
maxlen_outminibatchesbatch_countr   r   r   r   r   r   r   r   n_iter_processesr   	grad_clip
grad_noise
accum_gradr   Trainerepochsextendr   resumer   save_interval_itersr   num_save_attentionitemshasattrr   calculate_all_attentionsattention_plot_classmtlalphanum_save_ctccalculate_all_ctc_probsctc_plot_classr   
PlotReportr	   triggersMinValueTriggerMaxValueTriggerr   	criterionr   r
   r   r   	eps_decayr   lr_decay	LogReportreport_interval_itersappendobserve_value
report_cer
report_werreport_bleuPrintReportProgressBarr   tensorboard_dirtorch.utils.tensorboardr   r   runr   )"argsfr   rP   idimodimmodelmodel_classtotal_subsampling_factor
model_confr   r@   r    r   rr   rw   er   r   use_sortagradr   valid
train_iter
valid_iterr   r   r   
att_vis_fn
plot_classr   
ctc_vis_fnr   report_keysr   r'   )r   r   r   r|   r(   r   b   s  

  

$


	
	$
	



					


	
	






	r   c              	      s  t |  t| j\}}t|tsJ | |_| jdkr.tt| j}t	
dt|  |  t| jd}t|d W d   n1 sFw   Y  i }tddd| jdu rY|jn| jddid	}| jd
krt C t dD ]3\}}t	
d| |t  || fg}	||	d
 d
 }
||
| |j}t| ||j||< qrW d   n1 sw   Y  nddd}t | jdkrfddD  ttt  fddd}fdd|D t I || jdD ]9}dd |D }fdd|D }	||	d
 }||| |j}t|D ]\}}|| }t| ||j||< qqW d   n	1 s7w   Y  t| jd}|tj d|idddd!d W d   dS 1 s`w   Y  dS )z]Decode with the given args.

    Args:
        args (namespace): The program arguments.

    r   zgpu id: rO   rP   Nr   Fr   )r   r   sort_in_input_lengthr   r   r   z(%d/%d) decoding c                 S   s   t |g|  }tj|d|iS )N	fillvalue)iter	itertoolszip_longest)niterabler<  kargsr'   r'   r(   grouper  s   ztrans.<locals>.grouperc                    s$   g | ]} | d  d d d qS )rQ   r   r*   r'   )r,   r   jsr'   r(   r.     s   $ ztrans.<locals>.<listcomp>c                    s
    |   S r`   r'   )i)	feat_lensr'   r(   r}     r~   ztrans.<locals>.<lambda>)r   c                    s   g | ]} | qS r'   r'   )r,   rF  )r   r'   r(   r.         c                 S   s   g | ]}|r|qS r'   r'   r,   namer'   r'   r(   r.     rH  c                    s   g | ]}| | fqS r'   r'   rI  rD  r'   r(   r.     s    rU   rV   TrW   r[   r`   )"r   r   r.  r   r   
trans_argsr   r   ranger   r   r   r\   r   
trans_jsonr   r   r   r   	batchsizer/   no_grad	enumerater   r<   	translate	char_listr   r   translate_batchresult_labelr   r   r   )r*  r.  
train_argsgpu_idr+  new_jsload_inputs_and_targetsidxrJ  r?   feat
nbest_hypsrC  sorted_indexnamesfeatsrF  	nbest_hypr'   )rG  rE  r   r(   transP  s   








$r`  )6rL   r>  r   r   r   numpyr6   r/   chainerr   chainer.trainingr   espnet.asr.asr_utilsr   r   r   r   r   r	   r
   r   r   espnet.asr.pytorch_backend.asrr   ASRCustomConverterr   r   #espnet.asr.pytorch_backend.asr_initr   r   #espnet.nets.pytorch_backend.e2e_asrr   espnet.nets.st_interfacer   espnet.utils.datasetr   r    espnet.utils.deterministic_utilsr   espnet.utils.dynamic_importr   espnet.utils.io_utilsr   espnet.utils.training.batchfyr   espnet.utils.training.iteratorsr   (espnet.utils.training.tensorboard_loggerr   !espnet.utils.training.train_utilsr   r   r   r`  r'   r'   r'   r(   <module>   s:   ,8   q