o
    i&V                     @   sr  d Z ddlZddlZddlZddlmZ ddlZddlZddl	m
Z
 ddlmZ ddlm  m  mZ ddlm  m  m  mZ ddlmZ ddlmZmZmZmZmZmZmZm Z  ddl!m"Z"m#Z#m$Z$ dd	l%m&Z& dd
l'm(Z( ddl)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9m:Z: G dd de;Z<dd Z=dd Z>dS )z
This script is used for multi-speaker speech recognition.

Copyright 2017 Johns Hopkins University (Shinji Watanabe)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
    N)zip_longest)training)
extensions)add_results_to_json)CompareValueTriggeradadelta_eps_decayget_model_confrestore_snapshotsnapshot_object
torch_loadtorch_resumetorch_snapshot)CustomEvaluatorCustomUpdaterload_trained_model)ASRInterface)pad_list)ChainerDataLoaderTransformDataset)set_deterministic_pytorch)dynamic_import)LoadInputsAndTargets)make_batchset)ShufflingEnabler)TensorboardLogger)check_early_stopset_early_stopc                   @   s4   e Zd ZdZdejdfddZedfddZd	S )
CustomConverterzCustom batch converter for Pytorch.

    Args:
        subsampling_factor (int): The subsampling factor.
        dtype (torch.dtype): Data type to convert.

          c                 C   s   || _ d| _|| _|| _dS )zInitialize the converter.N)subsampling_factor	ignore_iddtype	num_spkrs)selfr!   r#   r$    r&   V/home/ubuntu/.local/lib/python3.10/site-packages/espnet/asr/pytorch_backend/asr_mix.py__init__<   s   
zCustomConverter.__init__cpuc                    s  t |dksJ |d d |d  j d }} jdkr' fdd|D }tdd |D }|d jjdkr_tdd |D dj| jd	}td
d |D dj| jd	}||d}ntdd |D dj| jd	}t	
||}t|d tjsg }	tt |D ]}
|	dd ||
 D 7 }	qt|	 j}	|	 jd|	ddd|}	ntdd |D  j|}	|||	fS )a8  Transform a batch and send it to a device.

        Args:
            batch (list(tuple(str, dict[str, dict[str, Any]]))): The batch to transform.
            device (torch.device): The device to send to.

        Returns:
            tuple(torch.Tensor, torch.Tensor, torch.Tensor): Transformed batch.

        r   r   Nc                    s$   g | ]}|d d  j d d f qS N)r!   .0xr%   r&   r'   
<listcomp>T      $ z,CustomConverter.__call__.<locals>.<listcomp>c                 S   s   g | ]}|j d  qS )r   )shaper+   r&   r&   r'   r/   W       cc                 S      g | ]
}t |j qS r&   )torch
from_numpyrealfloatr+   r&   r&   r'   r/   ]       )r#   c                 S   r4   r&   )r5   r6   imagr8   r+   r&   r&   r'   r/   `   r9   )r7   r:   c                 S      g | ]	}t | qS r&   )r5   r6   r8   r+   r&   r&   r'   r/   h       c                 S   r;   r&   r5   r6   longr,   yr&   r&   r'   r/   p   r<   r    c                 S   r;   r&   r=   r?   r&   r&   r'   r/   y   r<   )lenr$   r!   nparrayr#   kindr   tor5   r6   
isinstancendarrayranger"   viewsize	transpose)r%   batchdevicexsysilensxs_pad_realxs_pad_imagxs_padys_padir&   r.   r'   __call__C   sH   "

zCustomConverter.__call__N)	__name__
__module____qualname____doc__r5   float32r(   rM   rV   r&   r&   r&   r'   r   3   s    r   c           #         s	  t |  tj std t| jd}t	|d }W d   n1 s&w   Y  t
| }t||d  d d d d }t||d  d	 d d d }td
t|  tdt|  | jdkrpd}td n| jdkr}d}td nd}td t| j}|||| }t|tsJ |jd }	| jdurt| j| j}
ttt| j|
j|
jt|
dd}t	| j| ||_t j!"| j#st $| j# | j#d }t|d#}td|  |%tj&||t'| fdddd(d W d   n	1 sw   Y  t)t'|  D ]}td| d tt'| |   q|j*| j+dkrJ| j,dkrJtd | j,| j,| j+ f  |  j,| j+9  _,t-| j+dkrTd!nd"}| j.d#v rdtt| j.}ntj/}|j0||d$}td%1t2d&d' |3 D t2d(d' |3 D t2d)d' |3 D d* t2d+d' |3 D   | j4d,krtj5j6|3 d-| j7| j8d.}n2| j4d/krtj5j9|3 | j8d0}n | j4d1krdd2l:m;} ||3 | j<| j=| j>}nt?d3| j4 | j.d4v r1zdd5l@mA} W n tBy } ztCd6| j. d7 |d}~ww | j4d1kr#|jD||jE| j.d8\}|_En|jD||| j.d8\}}d}nd}tF|d9 tF|d:fd;d< tG|	|| jHd= t| jId}t	|d }W d   n	1 sdw   Y  t| jd}t	|d }W d   n	1 sw   Y  | jJdkp| jJdk}tK|| j,| jL| jM| jN| j+dkr| j+nd|| jO| jP| jQ| jR| jSddd>}tK|| j,| jL| jM| jN| j+dkr| j+nd| jO| jP| jQ| jR| jSddd?}tTd@d| jUdAdidBtTd@d| jUdAdidBdCtVtW| fdDd<d| jXddEd< dFi}dCtVtW| fdGd<dddHd< | jXdIi}tY|| jZ|||| j+| j[| j\|dJ	}t]j^|| j_dKf| j#dL}|rQ|j`ta|g| jJdkrJ| jJn| j_dKfdM | jbrctdN| jb  tc| jb| |`td|||| j+ | jedkr| jdkrt)t
|f d| je dOd< ddP}tg|dQr|jhji}|jhjj}n|ji}|jj}|||| j#dR  |dS} |j`| dTdM nd} |`tkjlg dUdKdVdW |`tkjldXdYgdKdZdW |`tkjld[d\gdKd]dW |j`tm|d^t]jnod_dM |dkr|j`tm|d`t]jnpdYdM |j`tq dTdM | j4d,krg| jrdakr>|dkr>|j`ts|| j#db ttdctudYddd< dM |j`tv| jwtudYded< dM n)| jrdfkrg|j`ts|| j#dg ttdctud_dhd< dM |j`tv| jwtud_did< dM |`tkjx| jydjfdM g dk}!| j4d,kr|j`tkzdldmd< | jydjfdM |!{dl | j|r|!{dn | j}r|!{do |j`tk~|!| jydjfdM |`tkj| jydp t||  | jdur| jdqkrddrlm}" |j`t|"| j| | jydjfdM |  t|| j_ dS )sz\Train with the given args.

    Args:
        args (namespace): The program arguments.

    zcuda is not availablerbuttsNr   inputr1   r    outputz#input dims : z#output dims: g      ?ctczPure CTC modeg        attzPure attention modemtlzMultitask learning mode
embed_unitz/model.jsonwbzwriting a model config file to    FTindentensure_ascii	sort_keysutf_8zARGS: z: r   z0batch size is automatically increased (%d -> %d)cudar)   )float16r[   float64)rM   r#   z6num. model params: {:,} (num. trained: {:,} ({:.1f}%))c                 s       | ]}|  V  qd S r*   numelr,   pr&   r&   r'   	<genexpr>       ztrain.<locals>.<genexpr>c                 s       | ]
}|j r| V  qd S r*   requires_gradrp   rq   r&   r&   r'   rs          c                 s   ru   r*   rv   rq   r&   r&   r'   rs      rx   g      Y@c                 s   rn   r*   ro   rq   r&   r&   r'   rs      rt   adadeltagffffff?)rhoepsweight_decayadam)r|   noam)get_std_optzunknown optimizer: )O0O1O2O3)ampz+You need to install apex for --train-dtype z*. See https://github.com/NVIDIA/apex#linux)	opt_leveltarget	serializec                    s
     | S r*   )r   )s)reporterr&   r'   <lambda>	     
 ztrain.<locals>.<lambda>)r!   r#   r$   )	min_batch_sizeshortest_firstcount
batch_binsbatch_frames_inbatch_frames_outbatch_frames_inoutiaxisoaxis)r   r   r   r   r   r   r   r   asrtrain)modeload_outputpreprocess_confpreprocess_argsmainc                        | gS r*   r&   data)	converterload_trr&   r'   r   J      c                 S      | d S Nr   r&   r-   r&   r&   r'   r   N      )dataset
batch_sizenum_workersshuffle
collate_fnc                    r   r*   r&   r   )r   load_cvr&   r'   r   S  r   c                 S   r   r   r&   r   r&   r&   r'   r   V  r   )r   r   r   r   r   )use_apexepoch)out)triggerzresumed from %sc                 S   s   t | d d d d d S )Nr   r^   r   r1   )intr   r&   r&   r'   r   {  r9   )keyreversemodulez/att_ws)r   	transformrM   )r   r   )	main/lossvalidation/main/lossmain/loss_ctcvalidation/main/loss_ctcmain/loss_attvalidation/main/loss_attzloss.png)	file_namemain/accvalidation/main/acczacc.pngmain/cer_ctcvalidation/main/cer_ctczcer.pngzmodel.loss.bestr   zmodel.acc.bestaccz/model.acc.best)load_fnc                 S      | |kS r*   r&   
best_valuecurrent_valuer&   r&   r'   r     r   c                 S   r   r*   r&   r   r&   r&   r'   r     r   lossz/model.loss.bestc                 S      | |k S r*   r&   r   r&   r&   r'   r     r   c                 S   r   r*   r&   r   r&   r&   r'   r     r   	iteration)r   r   r   r   r   r   r   r   r   r   r   r   elapsed_timer{   c                 S   s   | j djd d S )Nr   r   r{   )updaterget_optimizerparam_groups)trainerr&   r&   r'   r     s    zvalidation/main/cerzvalidation/main/wer)update_interval )SummaryWriter)r   r5   rk   is_availableloggingwarningopen
valid_jsonjsonloadlistkeysr   infostrmtlalphar   model_modulerF   r   	subsamplernnlmr   
rnnlm_conf
lm_pytorchClassifierWithStateRNNLMrA   	char_listlayerunitgetattrospathexistsoutdirmakedirswritedumpsvarsencodesortedr   ngpur   rM   train_dtyper[   rE   formatsum
parametersoptoptimAdadeltar{   r|   Adam1espnet.nets.pytorch_backend.transformer.optimizerr   adimtransformer_warmup_stepstransformer_lrNotImplementedErrorapexr   ImportErrorerror
initialize	optimizersetattrr   r$   
train_json	sortagradr   	maxlen_in
maxlen_outminibatchesbatch_countr   r   r   r   r   r   r   r   n_iter_processesr   	grad_clip
grad_noise
accum_gradr   Trainerepochsextendr   resumer   r   num_save_attentionitemshasattrr   calculate_all_attentionsattention_plot_classr   
PlotReportr
   triggersMinValueTriggerMaxValueTriggerr   	criterionr	   r   r   r   	eps_decay	LogReportreport_interval_itersobserve_valueappend
report_cer
report_werPrintReportProgressBarr   tensorboard_dirtorch.utils.tensorboardr   r   runr   )#argsfr   r]   idimodimmtl_modemodel_classmodelr!   
rnnlm_argsr   
model_confr   rM   r#   r  r   r   er   r  use_sortagradr   valid
train_iter
valid_iterr   r   r   
att_vis_fn
plot_classatt_reporterreport_keysr   r&   )r   r   r   r   r'   r      st  

  







&








			
	


r   c                    s  t |  t| j\}}t|tsJ | |_| jrJt| j| j}t	|dddkr+t
dttt|j|j|jt	|dd}t| j| |  nd}| jrt| j| j}|j}dd t|jD }ttt||j|j}t| j| |  |durtt|j|j||}ntt|j||}| jdkrtt| j}t d	t!|  |"  |r|"  t#| j$d
}	t%&|	d W d   n1 sw   Y  i }
t'ddd| j(du r|j(n| j(ddid}| j)dkrBt*+ D t, dD ]4\}}t d| |t,  || fg}||d d }|-|| |j|}t.| ||j|
|< qW d   n	1 s<w   Y  nd#dd}t, | j)dkrsfddD  t/tt  fddd}fdd|D t*+ Q || j)dD ]A}dd |D }fdd|D }||d }|j0|| |j|d}t|D ]\}fdd|D }t.| ||j|
|< qqW d   n	1 sw   Y  t#| j1d}	|	2t%j3d|
iddd d!4d" W d   dS 1 sw   Y  dS )$z]Decode with the given args.

    Args:
        args (namespace): The program arguments.

    r   defaultz?use '--api v2' option to decode with non-default language modelrc   Nc                 S   s   i | ]\}}||qS r&   r&   )r,   rU   r-   r&   r&   r'   
<dictcomp>6  r2   zrecog.<locals>.<dictcomp>r   zgpu id: r\   r]   r   Fr   )r   r   sort_in_input_lengthr   r   r   z(%d/%d) decoding c                 S   s   t |g|  }t|d|iS )N	fillvalue)iterr   )niterabler?  kargsr&   r&   r'   groupern  s   zrecog.<locals>.grouperc                    s$   g | ]} | d  d d d qS )r^   r   r1   r&   )r,   r   jsr&   r'   r/   u  r0   zrecog.<locals>.<listcomp>c                    s
    |   S r*   r&   rU   )	feat_lensr&   r'   r   v  r   zrecog.<locals>.<lambda>)r   c                    s   g | ]} | qS r&   r&   )r,   rU   )r   r&   r'   r/   w      c                 S   s   g | ]}|r|qS r&   r&   r,   namer&   r&   r'   r/   {  rI  c                    s   g | ]}| | fqS r&   r&   rJ  rE  r&   r'   r/   |  s    )r   c                    s   g | ]}|  qS r&   r&   )r,   hyprG  r&   r'   r/     rI  rd   re   Trf   rj   r*   )5r   r   r0  rF   r   
recog_argsr   r   r   r   
ValueErrorr   r   r   rA   r   r   r   r   eval
word_rnnlmword_rnnlm_confchar_list_dict	enumerateextlm_pytorchMultiLevelLM	predictorLookAheadWordLMr   r   rH   r   r   r   rk   r   
recog_jsonr   r   r   r   	batchsizer5   no_gradr   	recognizer   r   recognize_batchresult_labelr   r   r   )r*  r0  
train_argsr1  r   	word_dict	char_dictrP  gpu_idr+  new_jsload_inputs_and_targetsidxrK  rL   feat
nbest_hypsrD  sorted_indexnamesfeats	nbest_hypr&   )rH  rU   rF  r   r'   recog  s   









$rk  )?rZ   r   r   r   	itertoolsr   numpyrB   r5   chainerr   chainer.trainingr   espnet.lm.pytorch_backend.extlmlmpytorch_backendextlmrT  &espnet.nets.pytorch_backend.lm.defaultnetsr<  r   espnet.asr.asr_mix_utilsr   espnet.asr.asr_utilsr   r   r   r	   r
   r   r   r   espnet.asr.pytorch_backend.asrr   r   r   espnet.nets.asr_interfacer   'espnet.nets.pytorch_backend.e2e_asr_mixr   espnet.utils.datasetr   r    espnet.utils.deterministic_utilsr   espnet.utils.dynamic_importr   espnet.utils.io_utilsr   espnet.utils.training.batchfyr   espnet.utils.training.iteratorsr   (espnet.utils.training.tensorboard_loggerr   !espnet.utils.training.train_utilsr   r   objectr   r   rk  r&   r&   r&   r'   <module>   s<   (
L   