o
    i                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlm  m   m!Z" ddl#m$  m   m  m%Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2 ddl3m4Z4m5Z5m6Z6 ddl7m8Z8 ddl9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB ddlCmDZD ddlEmFZF ddlGmHZHmIZImJZJ ddlKmLZL ddlMmNZN ddlOmPZP ddlQmRZR ddlSmTZT ddlUmVZV ddlWmXZX ddlYmZZZm[Z[ dd Z\G d d! d!Z]G d"d# d#eTZ^G d$d% d%eZ_G d&d' d'e`ZaG d(d) d)e`Zbd*d+ Zcd,d- Zdd.d/ Zed0d1 ZfdS )2z=Training/decoding definition for the speech recognition task.    Nreportertraining)
extensions)StandardUpdater)parse)DistributedDataParallel)data_parallel)DistributedSampler)CompareValueTriggeradadelta_eps_decayadd_results_to_jsonformat_mulenc_argsget_model_confplot_spectrogramrestore_snapshotsnapshot_object
torch_loadtorch_resumetorch_snapshot)freeze_modulesload_trained_modelload_trained_modules)ASRInterface)BeamSearchTransducer)pad_list)SegmentStreamingE2E)WindowStreamingE2E)IStft)Transformation)file_writer_helper)ChainerDataLoader	TransformTransformDataset)set_deterministic_pytorch)dynamic_import)LoadInputsAndTargets)make_batchset)BaseEvaluator)ShufflingEnabler)TensorboardLogger)check_early_stopset_early_stopc                    s8   t | r
|  S t| trt fdd| D S | S )Nc                 3   s    | ]}t | V  qd S N)_recursive_to.0xdevice R/home/ubuntu/.local/lib/python3.10/site-packages/espnet/asr/pytorch_backend/asr.py	<genexpr>D   s    z _recursive_to.<locals>.<genexpr>)torch	is_tensorto
isinstancetuple)xsr4   r5   r3   r6   r/   @   s
   


r/   c                   @   s*   e Zd ZdZd	ddZdd Zdd ZdS )
DistributedDictSummaryaL  Distributed version of DictSummary.

    This implementation is based on an official implementation below.
    https://github.com/chainer/chainer/blob/v6.7.0/chainer/reporter.py

    To gather stats information from all processes and calculate exact mean values,
    this class is running AllReduce operation in compute_mean().
    Nc                 C   s   t  | _d | _|| _d S r.   )reporter_moduleDictSummary_local_summary_summary_names_device)selfr4   r5   r5   r6   __init__R   s   

zDistributedDictSummary.__init__c                 C   s$   | j d u rt| | _ | j|S r.   )rB   	frozensetkeysrA   add)rD   dr5   r5   r6   rH   W   s   
zDistributedDictSummary.addc                 C   s   dd | j D }| jj D ]\}}|| d  |j7  < || d  |j7  < qg }g }t| j D ]}||| d  ||| d  q2tj	|| j
d}tj	|| j
d}tj|dd}tj|dd}	|  |	  || }
i }tt| j D ]\}}|| jjvrqz|
|  ||< qz|S )Nc                 S   s   i | ]}|d dgqS )        r   r5   r1   namer5   r5   r6   
<dictcomp>c       z7DistributedDictSummary.compute_mean.<locals>.<dictcomp>r      r3   T)async_op)rB   rA   
_summariesitems_x_nsortedappendr8   tensorrC   dist
all_reducewait	enumerateitem)rD   
raw_valuesrL   summarysum_list
count_list
sum_tensorcount_tensor
sum_handlecount_handlemean_tensorresult_dictidxr5   r5   r6   compute_mean^   s,   z#DistributedDictSummary.compute_meanr.   )__name__
__module____qualname____doc__rE   rH   rh   r5   r5   r5   r6   r>   H   s
    
	r>   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	CustomEvaluatora  Custom Evaluator for Pytorch.

    Args:
        model (torch.nn.Module): The model to evaluate.
        iterator (chainer.dataset.Iterator) : The train iterator.

        target (link | dict[str, link]) :Link object or a dictionary of
            links to evaluate. If this is just a link object, the link is
            registered by the name ``'main'``.

        device (torch.device): The device used.
        ngpu (int): The number of GPUs.
        use_ddp (bool): The flag to use DDP.

    NFc                    sP   t t| || || _|| _|d ur|| _n|jdkr d| _nd| _|| _d S )Ncpur   rO   )superrm   rE   modelr4   ngputypeuse_ddp)rD   rp   iteratortargetr4   rq   rs   	__class__r5   r6   rE      s   

zCustomEvaluator.__init__c              
   C   s  | j d }| jr| |  t|dr|  |}nt|}| jr't| j}nt	 }| j
  t G |D ]<}t|| j}i }t|  | jdksO| jrU| j
|  n
t| j
|t| j W d   n1 siw   Y  || q7W d   n1 s~w   Y  | j
  | S )z*Main evaluate routine for CustomEvaluator.mainresetr   N)
_iterators	eval_hookhasattrry   copyrs   r>   r4   r?   r@   rp   evalr8   no_gradr/   report_scoperq   r
   rangerH   trainrh   )rD   rt   itr^   batchr2   observationr5   r5   r6   evaluate   s4   







zCustomEvaluator.evaluate)NF)ri   rj   rk   rl   rE   r   __classcell__r5   r5   rv   r6   rm      s    rm   c                       s:   e Zd ZdZ				d
 fdd	Zdd Zdd	 Z  ZS )CustomUpdatera  Custom Updater for Pytorch.

    Args:
        model (torch.nn.Module): The model to update.
        grad_clip_threshold (float): The gradient clipping value to use.
        train_iter (chainer.dataset.Iterator): The training iterator.
        optimizer (torch.optim.optimizer): The training optimizer.

        device (torch.device): The device to use.
        ngpu (int): The number of gpus to use.
        use_apex (bool): The flag to use Apex in backprop.
        use_ddp (bool): The flag to use DDP for multi-GPU training.

    FrO   c                    sR   t t| || || _|| _|| _|| _|| _d| _|| _	d| _
|	| _|
| _d S Nr   )ro   r   rE   rp   grad_clip_thresholdr4   rq   
accum_gradforward_count
grad_noise	iterationuse_apexrs   )rD   rp   r   
train_iter	optimizerr4   rq   r   r   r   rs   rv   r5   r6   rE      s   
zCustomUpdater.__init__c                 C   s  |  d}| d}|j}| }t|| j}|j|k}| jdks$| jr/| j| 	 | j
 }nt| j|t| j	 | j
 }| jrlddlm} t|drO|jn|}	|||	}
|
  W d   n1 sfw   Y  n|  | jrddlm} || j| jddd	d
 |  jd7  _|s| j| j
krdS d| _tjj| j | j}| jrt | t!"d#| t$%|rt!&d n|'  |(  dS )z)Main update routine of the CustomUpdater.rx   r   ampr   N)add_gradient_noised         ?g?)durationetascale_factorrO   zgrad norm={}z&grad norm is nan. Do not update model.))get_iteratorget_optimizerepochnextr/   r4   rq   rs   rp   meanr   r
   r   r   apexr   r|   r   
scale_lossbackwardr   espnet.asr.asr_utilsr   r   r   r8   nnutilsclip_grad_norm_
parametersr   rX   rY   logginginfoformatmathisnanwarningstep	zero_grad)rD   r   r   r   r   r2   is_new_epochlossr   optscaled_lossr   	grad_normr5   r5   r6   update_core   sJ   





zCustomUpdater.update_corec                 C   s(   |    | jdkr|  jd7  _d S d S )Nr   rO   )r   r   r   rD   r5   r5   r6   update3  s   
zCustomUpdater.update)FrO   FF)ri   rj   rk   rl   rE   r   r   r   r5   r5   rv   r6   r      s    Ar   c                   @   s2   e Zd ZdZdejfddZedfddZdS )	CustomConverterzCustom batch converter for Pytorch.

    Args:
        subsampling_factor (int): The subsampling factor.
        dtype (torch.dtype): Data type to convert.

    rO   c                 C   s   || _ d| _|| _dS )z#Construct a CustomConverter object.N)subsampling_factor	ignore_iddtype)rD   r   r   r5   r5   r6   rE   D  s   
zCustomConverter.__init__rn   c           
         s   t |dksJ |d \}} jdkr fdd|D }tdd |D }|d jjdkrTtdd |D dj| jd}td	d |D dj| jd}||d
}ntdd |D dj| jd}t	||}tdd |D  j
|}	|||	fS )zTransform a batch and send it to a device.

        Args:
            batch (list): The batch to transform.
            device (torch.device): The device to send to.

        Returns:
            tuple(torch.Tensor, torch.Tensor, torch.Tensor)

        rO   r   c                    s$   g | ]}|d d  j d d f qS r.   )r   r0   r   r5   r6   
<listcomp>[     $ z,CustomConverter.__call__.<locals>.<listcomp>c                 S      g | ]}|j d  qS r   shaper0   r5   r5   r6   r   ^  rN   cc                 S      g | ]
}t |j qS r5   )r8   
from_numpyrealfloatr0   r5   r5   r6   r   d      r   c                 S   r   r5   )r8   r   imagr   r0   r5   r5   r6   r   g  r   )r   r   c                 S      g | ]	}t | qS r5   r8   r   r   r0   r5   r5   r6   r   o      c              	   S   s:   g | ]}t t|trt|d  dd n| qS )r   Nr8   r   r;   r<   nparraylongr1   yr5   r5   r6   r   v  s    ")lenr   r   r   r   kindr   r:   r8   r   r   )
rD   r   r4   r=   ysilensxs_pad_realxs_pad_imagxs_padys_padr5   r   r6   __call__J  s<   


zCustomConverter.__call__N	ri   rj   rk   rl   r8   float32rE   r4   r   r5   r5   r5   r6   r   ;  s    r   c                   @   s6   e Zd ZdZddgejfddZedfddZdS )	CustomConverterMulEnczCustom batch converter for Pytorch in multi-encoder case.

    Args:
        subsampling_factors (list): List of subsampling factors for each encoder.
        dtype (torch.dtype): Data type to convert.

    rO   c                 C   s    || _ d| _|| _t|| _dS )zInitialize the converter.r   N)subsampling_factorsr   r   r   num_encs)rD   r   r   r5   r5   r6   rE     s   zCustomConverterMulEnc.__init__rn   c                    s   t |dksJ |d dj |d d }tjjkr-fddtjD fddtjD  fddtjD } fd	dtjD td
d |D j }||fS )a
  Transform a batch and send it to a device.

        Args:
            batch (list): The batch to transform.
            device (torch.device): The device to send to.

        Returns:
            tuple( list(torch.Tensor), list(torch.Tensor), torch.Tensor)

        rO   r   Nr   c                    s$   g | ]  fd d  D qS )c                    s(   g | ]}|d d j   d d f qS r.   )r   r0   )irD   r5   r6   r     s   ( =CustomConverterMulEnc.__call__.<locals>.<listcomp>.<listcomp>r5   )r1   )rD   xs_listr   r6   r     s    z2CustomConverterMulEnc.__call__.<locals>.<listcomp>c                    s$   g | ]}t d d  | D qS )c                 S   r   r   r   r0   r5   r5   r6   r     rN   r   )r   r   r1   r   )r   r5   r6   r     s    c                    s0   g | ]}t d d | D dj jdqS )c                 S   r   r5   r   r0   r5   r5   r6   r     r   r   r   r   )r   r:   r   r   )r4   rD   r   r5   r6   r     s    c                    s    g | ]}t |  qS r5   )r8   r   r:   r   )r4   
ilens_listr5   r6   r     s    c                 S   s2   g | ]}t t|trt|d  n| qS r   r   r   r5   r5   r6   r     s    $)	r   r   r   sumr   r   r   r   r:   )rD   r   r4   r   xs_list_padr   r5   )r4   r   rD   r   r6   r     s2   

zCustomConverterMulEnc.__call__Nr   r5   r5   r5   r6   r     s    r   c                 C   s   | j  p|dkS r   )rs   )args	worldsizerank	localrankr5   r5   r6   is_writable_process  s   r   c           :         s  | j rOtjdd}|dusJ t|}|| jksJ tjdd}|dus(J t|}tjdd}|dus9J t|}tjdd||d |dkrNt	  nd	}d}d}t
|  | jd	krbt| } tj sltd
 t| jd}t|d W d   n1 sw   Y  t fddt| jD }td  d d d d }t| jD ]}td|d	 ||  qtdt|  d| jv rt| dddkst| dddkrd}nd}td n"| jdkrd}td n| jdkrd}td  nd!}td" | jdus| jdur'| jd	kr't |d || }	nt!| j}
|
| jd	kr7|d n||| }	t"|	t#sDJ |	$ }td#tt%d$d% |	& D   | j'durt(| j'| j)}t*+t*,t-| j.|j/|j0}t1| j'| ||	_'t2| |||rtj34| j5st6| j5 | j5d& }t|d'-}td(|  |7tj8| jd	kr|d n||t9| fd)dd*d+:d, W d   n	1 sw   Y  t;t9|  D ]}td-| d. tt9| |   q|	j<| j r| jd	krt=d/n,| jd	kr0| j>dkr&td0| j>| j>| j f  |  j>| j9  _>| jd	kr0t=d/| j r=t?d1|  nt?| jdkrGd2nd3 | j@d4v rWtt| j@}ntjA}|	jB |d5}	| jCrntD|	| jC\}	}n|	& }td6t%d7d% |	& D t%d8d% |	& D t%d9d% |	& D d: t%d;d% |	& D   | jEd<krtjFjG|d=| jH| jId>}nH| jEd?krtjFjJ|| jId@}n8| jEdAkrddBlKmL} d|v r| jMdkr||| jM| jN| jO}ntPdC||| jQ| jR| jS}nt=dD| jE | j@dEv r^zddFlTmU} W n tVy& } ztWdG| j@ dH |d}~ww | jEdAkr;|jX|	|jY| j@dI\}	|_Yn|jX|	|| j@dI\}	}d*}ddJlZm[} |\|dK |]  tdL nd}t^|dM t^|dNfdOdP | jd	krt_|	j`d |dQ}ntadRd |	jbD |dS}t| jcd}t|d }W d   n	1 sw   Y  t| jd}t|d W d   n	1 sw   Y  | jddkp| jddk}| j rd	}n| jd	kr| jnd	}te|| j>| jf| jg| jh||| ji| jj| jk| jl| jmdddT}te| j>| jf| jg| jh|| ji| jj| jk| jl| jmdddU}tndVd*| jodWd*idX}tndVd*| jodWdidX}tp|tq||} tp|tq||}!dd| }"| j rUtr| tr|!d}"ts| d	| jt|"tsjudY}#ts|!d	dtsju| jtdZ}$| j rxtv|	|gd[}	tw|	| jxd\|#i| | j| jy| jz|| j d]
}%t{j||%| j}d^f| j5d_}&| j rt{j~d`dafdbdc}'|&|' |r|&jt|#g| jddkr| jdn| j}d^fda | jrtdd| j  t| j|& | jdkr|&jt|	d\|$i | j| j | jdefda n|&t|	d\|$i | j| j  t2| |||rdf| jv pdg| jv p|dhv }(| jdkrd|(rdt;t d| j didP d*dj})t|	dkrH|	jj}*|	jj}+n|	j}*|	j}+|+|*|)| j5dl || |dm},|&j|,d`da nd},|dnv r| jdkrt;t d| j dodP d*dj})t|	dkr|	jj}-|	jj}+n|	j}-|	j}+|+|-|)| j5dp || |dm}.|&j|.d`da nd}.| jd	krdqd t|	jD drd t|	jD  }/dsd t|	jD dtd t|	jD  }0t|	dur&g dv}1| jrdwdxgng }2| jrdydzgng }3| jrd{d|gng }4| jrd}d~gng }5|1|2 |3 |4 |5 }6|&tj|6d^dd n|&tjg d| jd	kr5g n|/ d^dd |&tjddgd^dd |&tjddg| jd	krYg n|/ d^dd |&jt|	dt{jdda |dvr|&jt|	dt{jdda | jdkr|&jtdd| jdefda |&jt d`da | jEd<kr| jdkr|dkr|&jt|	| j5d t1dtdddP da |&jt| jtdddP da n?| jdkr|&jt|	| j5d t1dtdddP da |&jt| jtdddP da n| jdkr|&jt| jtdddP da t2| |||r|&tj| jdefda t|	dur5d^deg|6 dg }7ng d| jd	kr@g n|0|/  }7| jEd<kr`|&jtdddP | jdefda |7d | jri|7d | jrr|7d |&jt|7| jdefda |&tj| jd t|&|  t2| |||r| jdur| jdkrddlm}8 |&jt|8| j|,|.d| jdefda | j rdt{j~d`da fdd}9|&|9 |&  t2| |||rt|&| j} dS dS )z\Train with the given args.

    Args:
        args (namespace): The program arguments.

    
WORLD_SIZENRANK
LOCAL_RANKncclzenv://)backendinit_methodr   
world_sizer   rO   zcuda is not availablerbuttsc                    s,   g | ]}t  d   d | d d qS )r   inputr   r   intr   )r   
valid_jsonr5   r6   r     s     ztrain.<locals>.<listcomp>outputr   r   zstream{}: input dims : {}z#output dims: 
transduceretypeFcustomr   custom_transducerzPure transducer moder   ctczPure CTC moderJ   attzPure attention modemtlzMultitask learning mode  Total parameter of the model = c                 s       | ]}|  V  qd S r.   numelr1   pr5   r5   r6   r7   '      ztrain.<locals>.<genexpr>z/model.jsonwbzwriting a model config file to    Tindentensure_ascii	sort_keysutf_8zARGS: z: z7Data parallel is not supported for multi-encoder setup.z0batch size is automatically increased (%d -> %d)zcuda:cudarn   )float16r   float64)r4   r   z6num. model params: {:,} (num. trained: {:,} ({:.1f}%))c                 s   r  r.   r  r  r5   r5   r6   r7   q  r	  c                 s       | ]
}|j r| V  qd S r.   requires_gradr  r  r5   r5   r6   r7   r      c                 s   r  r.   r  r  r5   r5   r6   r7   s  r  g      Y@c                 s   r  r.   r  r  r5   r5   r6   r7   u  r	  adadeltagffffff?)rhoepsweight_decayadam)r  noam)get_std_optz4noam-adim option should be set to use Noam schedulerzunknown optimizer: )O0O1O2O3r   z+You need to install apex for --train-dtype z*. See https://github.com/NVIDIA/apex#linux)	opt_level)CTCloss_fnzregister ctc as float functionru   	serializec                    s
     | S r.   )r&  )sr   r5   r6   <lambda>     
 ztrain.<locals>.<lambda>)r   r   c                 S   s   g | ]}|d  qS r   r5   r   r5   r5   r6   r         r   )	min_batch_sizeshortest_firstcount
batch_binsbatch_frames_inbatch_frames_outbatch_frames_inoutiaxisoaxis)r+  r-  r.  r/  r0  r1  r2  r3  asrr   )modeload_outputpreprocess_confpreprocess_args)dataset
batch_sizenum_workersshufflesampler
collate_fn)r9  r:  r<  r=  r>  r;  )
device_idsrx   )r   rs   r   )out)rO   r   )triggerc                    s      | jj  | jj d S r.   )	set_epochupdaterr   trainer)train_samplerval_samplerr5   r6    set_epoch_to_distributed_sampler+  s   z/train.<locals>.set_epoch_to_distributed_samplerzresumed from %sr   transformer	conformer)r  r  r   c                 S   s   t | d d d d d S )NrO   r   r   r   r   r2   r5   r5   r6   r(  a  r   )keyreversemodulez/att_ws)	converter	transformr4   r   )r   r  c                 S   s   t | d d d d d S )NrO   r   r   r   r   rK  r5   r5   r6   r(  |  r   z	/ctc_probc                 S      g | ]	}d  |d qS )zmain/loss_ctc{}rO   r   r   r5   r5   r6   r         c                 S   rQ  )zvalidation/main/loss_ctc{}rO   rR  r   r5   r5   r6   r     s    c                 S   rQ  )zmain/cer_ctc{}rO   rR  r   r5   r5   r6   r     rS  c                 S   rQ  )zvalidation/main/cer_ctc{}rO   rR  r   r5   r5   r6   r     rS  is_transducer)	main/lossvalidation/main/losszmain/loss_transzvalidation/main/loss_transmain/loss_ctcvalidation/main/loss_ctczmain/loss_aux_transzvalidation/main/loss_aux_transzmain/loss_symm_kl_divz validation/main/loss_symm_kl_divzmain/loss_lmzvalidation/main/loss_lmzloss.png)	file_name)rU  rV  rW  rX  main/loss_attvalidation/main/loss_attmain/accvalidation/main/acczacc.pngmain/cer_ctcvalidation/main/cer_ctczcer.pngzmodel.loss.bestrV  )r   r   r   zmodel.acc.bestz"snapshot.iter.{.updater.iteration})filenameaccz/model.acc.best)load_fnc                 S      | |kS r.   r5   
best_valuecurrent_valuer5   r5   r6   r(        c                 S   rc  r.   r5   rd  r5   r5   r6   r(    rg  r   z/model.loss.bestc                 S      | |k S r.   r5   rd  r5   r5   r6   r(  "  rg  c                 S   rh  r.   r5   rd  r5   r5   r6   r(  )  rg  loss_eps_decay_onlyc                 S   rh  r.   r5   rd  r5   r5   r6   r(  5  rg  elapsed_time)r   r   rU  rW  rZ  rV  rX  r[  r\  r]  r^  r_  rj  r  c                 S   s   | j djd d S )Nrx   r   r  )rC  r   param_groupsrD  r5   r5   r6   r(  _  s
    zvalidation/main/cerzvalidation/main/wer)update_interval )SummaryWriter)att_reporterctc_reporterc                    s.   t jd d}tj|d t jj d d S )NrO   r3   )src)r8   zerosrX   	broadcastr  synchronize)rE  notification)r4   src_rankr5   r6   barrier_extension_per_epoch  s   z*train.<locals>.barrier_extension_per_epoch)rs   osenvirongetr   rq   rX   init_process_groupr   disabler%   r   r   r8   r  is_availabler   openr   jsonloadlistrG   r   r   r   strmodel_modulegetattrmtlalphaenc_initdec_initr   r&   r;   r   get_total_subsampling_factorr   r   rnnlmr   
rnnlm_conf
lm_pytorchClassifierWithStateRNNLMr   	char_listlayerunitr   r   pathexistsoutdirmakedirswritedumpsvarsencoderU   r   NotImplementedErrorr:  r4   train_dtyper   r:   freeze_modsr   r   optimAdadeltar  r  Adam1espnet.nets.pytorch_backend.transformer.optimizerr  	noam_adimoptimizer_warmup_stepsnoam_lr
ValueErroradimtransformer_warmup_stepstransformer_lrr   r   ImportErrorerror
initializer   espnet.nets.pytorch_backend.ctcr$  register_float_functioninitsetattrr   	subsampler   subsample_list
train_json	sortagradr(   	maxlen_in
maxlen_outminibatchesbatch_countr.  r/  r0  r1  r'   r7  r$   r#   r   r"   n_iter_processesget_first_elementDDPr   	grad_clipr   r   r   Trainerepochsmake_extensionextendr*   resumer   save_interval_itersrm   num_save_attentionrR   r|   rN  calculate_all_attentionsattention_plot_classnum_save_ctccalculate_all_ctc_probsctc_plot_classuse_ctc_lossuse_aux_transducer_lossuse_symm_kl_div_lossuse_lm_lossr   
PlotReportr   triggersMinValueTriggerMaxValueTriggerr   	criterionr   r   r   	eps_decay	LogReportreport_interval_itersobserve_valuerV   
report_cer
report_werPrintReportProgressBarr-   tensorboard_dirtorch.utils.tensorboardrn  r+   runr,   ):r   r   r   r   f	idim_listodimr   mtl_moderp   model_classtotal_subsampling_factor
rnnlm_argsr  
model_confrL  r   model_paramsr   r  r   er   r$  rO  r  use_sortagradr+  r   validload_trload_cvtrain_dsval_dsr<  r   
valid_iterrC  rE  rH  is_attn_plotdata
att_vis_fn
plot_classro  
ctc_vis_fnrp  report_keys_loss_ctcreport_keys_cer_ctc
trans_keysctc_keysaux_trans_keyssymm_kl_div_keyslm_keystransducer_keysreport_keysrn  rw  r5   )r4   r   rv  rF  r   rG  r   r6   r     s@  


 


$

	&







	
		
	




			
	


	

r   c                    s	  t |  t| jdd\}t|tsJ | |_| jdur'tdd | jD }ntj	j
h}| jrmtd ttjtdk rLdjv rLtj	j|v rLtd	| jd
kr^ttjtdk r^tdtt| j}tjj|||d}| jrydjv rytdtdttdd | D   | jrt| j| j}t|dddkrtdt !t "t#j$|j%|j&t|dd}t'| j| | j(rtt| j}tjj|||d}|)  nd}| j*r't| j*| j+}|j,}dd t-j$D }t !t "t#||j%|j&t|dd}t'| j*| |)  |durt !t./|j0|j0||}nt !t.1|j0||}| j2dkrHt3t4| j2}	tdt|	  |5  |rH|5  t6| j7d}
t89|
d W d   n	1 saw   Y  i }t:ddd| j;du ruj;n| j;ddid}t<|d rt<|d!r|j=}n|j>}|j?j@}tAdVi d"|d#|d$| jBd%|d&| jCd'| jDd(| jEd)| jFd*| jGd+| jHd,| jId-| jJd.| jKd/| jLd0| jMd1| j}| jNd2krHtO \ t-P dD ]K\}}td3| |t#P  || fg}|| | jQdkr d2 d2 n fd4dt4|jQD  | jd5kro| jQdkrotd6| jR tS|| |d7}t4d2 jTd2 | jRD ]}td8||| jR  |U ||| jR   qBtd9 |V  td: |W }n| jd;kr| jQdkrtd<| jX g }t4| jMD ]}|Yg d=d> qtZ|| |d7}t[\|j]}t4d2 jTd2 |D ]f}|U |||  }|durd?^fd@d|d2 dA ddB D }|_dCdD` }|_|jadD}|_|jbd?}t| t4| jMD ]}|| dA c|| dA  || dE  || dE 7  < qqnt<|d r |d |}n	|d | j$|}te| |j$||< qW d   n	1 sAw   Y  nPdWdFdG}t3P | jNdkryfdHdD tft4t#fdIdJdK}fdLd|D tO  || jNdD ] }dMd |D }fdNd|D }| jQdkr||d2 n||}| jd5kr| jQdkrt| jd;krc| jQdkrc| jNdkrt|d2  g }t4| jMD ]}|Yg d=d> qtZ|| |d7}t[\|j]}t4d2 jTd2 |D ]f}|U |||  }|dur]d?^fdOd|d2 dA ddB D }|_dCdD` }|_|jadD}|_|jbd?}t| t4| jMD ]}|| dA c|| dA  || dE  || dE 7  < q>q|g}n
|jg|| j$|dP}t-|D ]\}}|| }te| |j$||< qqqW d   n	1 sw   Y  t6| jhdQ}
|
it8jjd|idRddSdTkdU W d   dS 1 sw   Y  dS )Xz]Decode with the given args.

    Args:
        args (namespace): The program arguments.

    Fr   Nc                 S   s   g | ]}t tj|qS r5   )r  r8   r   )r1   qr5   r5   r6   r         zrecog.<locals>.<listcomp>z'Use a quantized ASR model for decoding.z1.4.0lstmz;Quantized LSTM in ESPnet is only supported with torch 1.4+.r  z1.5.0zufloat16 dtype for dynamic quantization is not supported with torch version < 1.5.0. Switching to qint8 dtype instead.r   rI  z1streaming mode for transformer is not implementedr  c                 s   r  r.   r  r  r5   r5   r6   r7     r	  zrecog.<locals>.<genexpr>r  defaultz?use '--api v2' option to decode with non-default language model
embed_unitc                 S   s   i | ]\}}||qS r5   r5   )r1   r   r2   r5   r5   r6   rM     rN   zrecog.<locals>.<dictcomp>rO   gpu id: r   r   r4  r   )r5  r6  sort_in_input_lengthr7  r8  rT  decdecoderjoint_network	beam_sizelm	lm_weightsearch_typemax_sym_expu_maxnstepprefix_alphaexpansion_gammaexpansion_beta
score_normsoftmax_temperaturenbestquantizationr   z(%d/%d) decoding c                    s   g | ]} | d  qS r   r5   )r1   rg   )featr5   r6   r   9  r  windowz5Using streaming recognizer with window size %d frames)e2e
recog_argsr  zFeeding frames %d - %dz!Running offline attention decoderz"Offline attention decoder finishedsegmentz2Using streaming recognizer with threshold value %drJ   )yseqscorerm  c                    &   g | ]}t |d kr jt | qS r   r   r  r0   
train_argsr5   r6   r   X  
    r  r   u   ▁ r  c                 S      t |g|  }tj|d|iS N	fillvalueiter	itertoolszip_longestniterabler!  kargsr5   r5   r6   groupers     zrecog.<locals>.grouperc                    $   g | ]} | d  d d d qS r   r   r   r5   r1   rL  jsr5   r6   r   z  r   c                    
    |   S r.   r5   r   	feat_lensr5   r6   r(  {  r)  zrecog.<locals>.<lambda>rL  c                       g | ]} | qS r5   r5   r   rG   r5   r6   r   |  r*  c                 S   s   g | ]}|r|qS r5   r5   rK   r5   r5   r6   r     r*  c                       g | ]}| | fqS r5   r5   rK   r/  r5   r6   r     r  c                    r  r  r  r0   r  r5   r6   r     r  )r  r
  r  Tr  r  r5   r.   )lr%   r   rp   r;   r   r  quantize_configsetr8   r   Linearquantize_asr_modelr   r   V__version__r   LSTMr  quantize_dtyper  r  quantize_dynamicstreaming_moder  r  r  r   r   r  r   r  r  r  r  r   r  r  r  r   quantize_lm_modelr~   
word_rnnlmword_rnnlm_confchar_list_dictr[   extlm_pytorchMultiLevelLM	predictorLookAheadWordLMrq   r  r   r  r~  
recog_jsonr  r  r'   r7  r|   r   r  transducer_tasksr  r   r  r  r  r  r  r	  r
  r  r  r  r  r  	batchsizer   rG   r   streaming_windowr   r   accept_inputdecode_with_attention_offlineretrieve_recognitionstreaming_min_blank_durrV   r   r   prodr  joinreplacestripspaceblankr  	recognizer   rU   recognize_batchresult_labelr  r  r  )r   rp   q_configr   r  r  	word_dict	char_dictrC  gpu_idr  new_jsload_inputs_and_targetstrans_decoderr  beam_search_transducerrg   rL   r   se2er   
nbest_hypsr'  rhypstextr*  sorted_indexnamesfeats	nbest_hypr5   )r  r3  r0  rG   r  r6   recog  s  







	






 

B


 
3$rl  c           #         s  t |  t| j| j\}}}| jdksJ d| jtd| j  t|j	}||||}t
|ts5J t| j| | |_| jdkrWtt| j}tdt|  |  t| jd}t|d W d   n1 sow   Y  tdd	d	dd
}| jdkrd| _| jdurt| j| jd}	nd}	| jdu r|jn| j}
|
durtd|
  t|
}nd}d}| j}| jr$|
durt|
@}t|}d|v sJ ||d D ]&}|d dkrt |d |d |!ddd}td|
| |d } nqW d   n	1 sw   Y  |du r$t | j"| j| j#d}td| t$ fddD  t%tt&  fddd}fdd|D d7dd}d}t'j()| j*s_t'+| j* || jdD ]}fd d|D }||d }|dur||d	d!}n|}t,-  |.|\}}}W d   n	1 sw   Y  t/|D ]O\}}|| d||  }|| d||  }|| }| j*dur|| j0k rddl1}|2d" ddl3m4}  |d7 }d}!| j5d#d$ | 6d%dd | 7d&|! t8| |dd|!f j9| j:d'|d	d	d( | 6d%dd) | 7d*|! t8| |dd|!f j9| j:d+|d	d	d( | 6d%dd, | 7d-|! t8| |dd|!f |dd|!f  j9|| j:d+d	d	d. | 6d%dd% | 7d/ t8| |j9| j:d+|d0 | ;t'j(<| j*|d1  | =  |	dur|dur||}n|}| j>rt&|| t&|k r|dt&||  }n&t&|t&|krdt&|| t&| fgd2g|j?d   }"t@jA||"d3d4}| jd5v r| j:|f|	|< n||	|< || j0kr|	du rtd6  nqqfdS )8zbDumping enhanced speech and mask.

    Args:
        args (namespace): The program arguments.
    rO   z+number of encoder should be 1 ({} is given)zreading model parameters from r  r   r   Nr4  F)r5  r6  r  r7  r   )filetypezUse preprocessing: processrr   stft
win_lengthn_shiftr  hann)rp  rq  r  z4stft is found in {}. Setting istft config from it
{}z2Setting istft config from the command line args
{}c                    r,  r-  r5   r.  r/  r5   r6   r   !  r   zenhance.<locals>.<listcomp>c                    r1  r.   r5   r   r2  r5   r6   r(  "  r)  zenhance.<locals>.<lambda>r4  c                    r5  r5   r5   r   r6  r5   r6   r   #  r*  c                 S   r  r   r"  r&  r5   r5   r6   r*  %  r+  zenhance.<locals>.grouperc                    r7  r5   r5   rK   r/  r5   r6   r   .  r  )r   Agg)   
   )figsizer  zMask [ref={}ch]linear)fsr5  frame_shiftbottomlabelbottom   zNoisy speech [ref={}ch]db   zMasked speech [ref={}ch])ry  rx  r5  rz  r{  zEnhanced speech)rx  r5  ry  z.png)r   r   constant)r5  )soundz
sound.hdf5zBreaking the process.r.   )Br%   r   rp   r  r   r   r   r   r&   r  r;   r   r   r  rq   r  r   r  r  r~  rJ  r  r  r'   rL  enh_wspecifierr!   enh_filetyper7  r    istft_n_shiftapply_istftr   rz  istft_win_lengthistft_windowrG   rU   r   rx  r  r  	image_dirr  r8   r   enhancer[   
num_images
matplotlibusematplotlib.pyplotpyplotfiguresubplottitler   Trx  savefigrS  clfkeep_lengthndimr   pad)#r   idimr  r  r  rp   r^  r  r`  
enh_writerr7  rP  istftry  confr  rh  r*  r  ri  r   	org_featsrj  enhancedmaskr   rg   rL   enhmasr  r  pltref_chpadwidthr5   )r3  r0  rG   r6   r    sB  
















 





r  )grl   r}   r$  r  r   r   rx  numpyr   r8   torch.distributeddistributedrX   chainerr   r?   r   chainer.trainingr   chainer.training.updaterr   packaging.versionr   r<  torch.nn.parallelr	   r  r
   torch.utils.data.distributedr   espnet.lm.pytorch_backend.extlmr  pytorch_backendextlmrF  &espnet.nets.pytorch_backend.lm.defaultnetsr  r  r   r   r   r   r   r   r   r   r   r   r   r   #espnet.asr.pytorch_backend.asr_initr   r   r   espnet.nets.asr_interfacer   "espnet.nets.beam_search_transducerr   #espnet.nets.pytorch_backend.e2e_asrr   -espnet.nets.pytorch_backend.streaming.segmentr   ,espnet.nets.pytorch_backend.streaming.windowr   espnet.transform.spectrogramr   espnet.transform.transformationr    espnet.utils.cli_writersr!   espnet.utils.datasetr"   r#   r$    espnet.utils.deterministic_utilsr%   espnet.utils.dynamic_importr&   espnet.utils.io_utilsr'   espnet.utils.training.batchfyr(   espnet.utils.training.evaluatorr)   espnet.utils.training.iteratorsr*   (espnet.utils.training.tensorboard_loggerr+   !espnet.utils.training.train_utilsr,   r-   r/   r>   rm   r   objectr   r   r   r   rl  r  r5   r5   r5   r6   <module>   sp   4;EsGD     T  "