o
    ig                     @   sn  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z
ddlZddlmZ ddlmZ ddlmZmZmZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z. G dd de'Z/G dd dej0Z1G dd de2Z3dd Z4e5 dd Z6dS )z&E2E-TTS training / decoding functions.    N)training)
extensions)get_model_confsnapshot_object
torch_loadtorch_resumetorch_snapshot)load_trained_modules)pad_list)TTSInterface)ChainerDataLoaderTransformDataset)set_deterministic_pytorch)dynamic_import)LoadInputsAndTargets)make_batchset)BaseEvaluator)ShufflingEnabler)TensorboardLogger)check_early_stopset_early_stopc                       s(   e Zd ZdZ fddZdd Z  ZS )CustomEvaluatorzCustom evaluator.c                    s"   t t| || || _|| _dS )a3  Initilize module.

        Args:
            model (torch.nn.Module): Pytorch model instance.
            iterator (chainer.dataset.Iterator): Iterator for validation.
            target (chainer.Chain): Dummy chain instance.
            device (torch.device): The device to be used in evaluation.

        N)superr   __init__modeldevice)selfr   iteratortargetr   	__class__ R/home/ubuntu/.local/lib/python3.10/site-packages/espnet/tts/pytorch_backend/tts.pyr   /   s   

zCustomEvaluator.__init__c              
      s<   j d } jr   t|dr|  |}nt|}tj } j	  t
 a |D ]V}t|trBt fdd|D }n|}| D ]}||  j||< qHi }tj| t|tri j|  n jdi | W d   n1 s{w   Y  || q/W d   n1 sw   Y   j  | S )z"Evaluate over validation iterator.mainresetc                 3       | ]	}|  jV  qd S Ntor   .0arrr   r!   r"   	<genexpr>Q       z+CustomEvaluator.evaluate.<locals>.<genexpr>Nr!   )
_iterators	eval_hookhasattrr$   copychainerreporterDictSummaryr   evaltorchno_grad
isinstancetuplekeysr(   r   report_scopeaddtraincompute_mean)r   r   itsummarybatchxkeyobservationr!   r,   r"   evaluate>   s8   









zCustomEvaluator.evaluate)__name__
__module____qualname____doc__r   rF   __classcell__r!   r!   r   r"   r   ,   s    r   c                       s2   e Zd ZdZd	 fdd	Zdd Zdd Z  ZS )
CustomUpdaterzCustom updater.   c                    s@   t t| || || _|| _|| _tjjj	| _
|| _d| _dS )a  Initilize module.

        Args:
            model (torch.nn.Module) model: Pytorch model instance.
            grad_clip (float) grad_clip : The gradient clipping value.
            iterator (chainer.dataset.Iterator): Iterator for training.
            optimizer (torch.optim.Optimizer) : Pytorch optimizer instance.
            device (torch.device): The device to be used in training.

        r   N)r   rL   r   r   	grad_clipr   r7   nnutilsclip_grad_norm_clip_grad_norm
accum_gradforward_count)r   r   rN   r   	optimizerr   rS   r   r!   r"   r   f   s   
zCustomUpdater.__init__c                    s    d} d}| }t|trt fdd|D }n|}| D ]}||  j||< q%t|trB j| 	  j
 }n jd	i |	  j
 }|    jd7  _ j j
krbdS d _  j  j}td| t|rtd n|  |  dS )
zUpdate model one step.r#   c                 3   r%   r&   r'   r)   r,   r!   r"   r-      r.   z,CustomUpdater.update_core.<locals>.<genexpr>rM   Nr   zgrad norm={}z&grad norm is nan. Do not update model.r!   )get_iteratorget_optimizernextr9   r:   r;   r(   r   r   meanrS   backwardrT   rR   
parametersrN   loggingdebugformatmathisnanwarningstep	zero_grad)r   
train_iterrU   rB   rC   rD   loss	grad_normr!   r,   r"   update_corez   s,   




zCustomUpdater.update_corec                 C   s(   |    | jdkr|  jd7  _dS dS )zRun update function.r   rM   N)rg   rT   	iterationr,   r!   r!   r"   update   s   
zCustomUpdater.update)rM   )rG   rH   rI   rJ   r   rg   ri   rK   r!   r!   r   r"   rL   c   s
    &rL   c                   @   s*   e Zd ZdZdd ZedfddZdS )CustomConverterzCustom converter.c                 C   s   dS )zInitilize module.Nr!   r,   r!   r!   r"   r      s   zCustomConverter.__init__cpuc                 C   sB  t |dksJ |d \}}}}ttdd |D  |}ttdd |D  |}tdd |D d|}tdd |D d|}||	d|	d}	t
|D ]\}
}d|	|
|d d	f< q^||||	|d
}|d	urtt| }|||d< |d	urtdd |D d}|||d< |S )a   Convert a given batch.

        Args:
            batch (list): List of ndarrays.
            device (torch.device): The device to be send.

        Returns:
            dict: Dict of converted tensors.

        Examples:
            >>> batch = [([np.arange(5), np.arange(3)],
                          [np.random.randn(8, 2), np.random.randn(4, 2)],
                          None, None)]
            >>> conveter = CustomConverter()
            >>> conveter(batch, torch.device("cpu"))
            {'xs': tensor([[0, 1, 2, 3, 4],
                           [0, 1, 2, 0, 0]]),
             'ilens': tensor([5, 3]),
             'ys': tensor([[[-0.4197, -1.1157],
                            [-1.5837, -0.4299],
                            [-2.0491,  0.9215],
                            [-2.4326,  0.8891],
                            [ 1.2323,  1.7388],
                            [-0.3228,  0.6656],
                            [-0.6025,  1.3693],
                            [-1.0778,  1.3447]],
                           [[ 0.1768, -0.3119],
                            [ 0.4386,  2.5354],
                            [-1.2181, -0.5918],
                            [-0.6858, -0.8843],
                            [ 0.0000,  0.0000],
                            [ 0.0000,  0.0000],
                            [ 0.0000,  0.0000],
                            [ 0.0000,  0.0000]]]),
             'labels': tensor([[0., 0., 0., 0., 0., 0., 0., 1.],
                               [0., 0., 0., 1., 1., 1., 1., 1.]]),
             'olens': tensor([8, 4])}

        rM   r   c                 S      g | ]}|j d  qS r   shaper*   rC   r!   r!   r"   
<listcomp>       z,CustomConverter.__call__.<locals>.<listcomp>c                 S   rl   rm   rn   r*   yr!   r!   r"   rq      rr   c                 S      g | ]	}t | qS r!   )r7   
from_numpylongrp   r!   r!   r"   rq          c                 S   ru   r!   r7   rv   floatrs   r!   r!   r"   rq      rx         ?N)xsilensyslabelsolensspembsc                 S   ru   r!   ry   )r*   extrar!   r!   r"   rq      rx   extras)lenr7   rv   nparrayrw   r(   r
   	new_zerossize	enumeraterz   )r   rB   r   r|   r~   r   r   r}   r   r   il	new_batchr!   r!   r"   __call__   s,   )$$	zCustomConverter.__call__N)rG   rH   rI   rJ   r   r7   r   r   r!   r!   r!   r"   rj      s    rj   c           (         s	  t |  tj std t| jd}t	|d }W d   n1 s&w   Y  t
| }t||d  d d d d }t||d  d	 d d d }td
t|  tdt|  | jrxt||d  d	 d d d | _nd| _| jrt||d  d	 d d d | _nd| _tj| jst| j | jd }t|d#}td|  |tj||t| fddddd W d   n1 sw   Y  tt|  D ]}td| d tt| |   q| jdus| jdurt||| t }nt!| j"}	|	||| }t#|t sJ t| |j$| j%dkrNtj&j'|t
t(| j%d}| j)dkrNtd| j)| j)| j% f  |  j)| j%9  _)t*| j%dkrXdnd}
|+|
}| j,rt-|drsdd | j,D }n| j,}|. D ]\}t/fdd|D rt d d|_0qzt1d d! |2 }n|2 }td"3t4d#d |2 D t4d$d |2 D t4d%d |2 D d& t4d'd |2 D   | j5d(krtj6j7|| j8| j9| j:d)}n| j5d*krdd+l;m<} ||| j=| j>| j?}nt@d,| j5 tA|d- tA|d.fd/d! t| jBd}t	|d }W d   n	1 s2w   Y  t| jd}t	|d }W d   n	1 sPw   Y  | jCd0kp_| jCdk}|rfd	| _DtE|| j)| jF| jG| jH| jD| j%dkr{| j%nd|| jI| jJ| jK| jL| jMdddd1}tE|| j)| jF| jG| jH| jD| j%dkr| j%nd| jI| jJ| jK| jL| jMdddd2}tNd3| j| j| jOd4di| jPd5tNd3| j| j| jOd4di| jPd5tQ  d6tRtS| fd7d!d| jT| d8d! d9i}d6tRtS| fd:d!ddd;d! | jTd<i}tU|| jV|||
| jW}tXjY|| jZd=f| jd>}| j[r,td?| j[  t\| j[| | j]d=f}| j^d=f}| j_d@f}|j`ta|||
|dA |j`tb |dA |j`tc|dBtXjdjedC|dAdA | jfdkrtt
|g d| jf dDd! dE}t-|dr|jhji}|jhjj}|jhjk}n	|ji}|jj}|jk}|dkrtlm|}t(tn|D ]!}|| d d	 d d d } | | || d d	 d d d< q|||| jdF  |
ddG}!|j`|!|dA nd}!t-|dr|jhjo}"n|jo}"g }#|"D ]}dH| dI| g}$|j`tpjq|$d=|dJ dK|dA |#|$7 }#q|j`tpjq|#d=dLdK|dA |`tpjr|dA g dM|# }%|j`tps|%|dA |j`tpt |dA tu||  | jvdurb| jvdNkrbddOlwmx}& |&| jv}'|j`ty|'|!|dA |r{|j`tz|g| jCd0krt| jCn| jZd=fdA |{  t||| jZ dS )PzTrain E2E-TTS model.zcuda is not availablerbuttsNr   outputro   rM   inputz#input dims : z#output dims: z/model.jsonwbzwriting a model config file to   FT)indentensure_ascii	sort_keysutf_8zARGS: : )
device_idsz0batch size is automatically increased (%d -> %d)cudark   modulec                 S   s   g | ]}d | qS )zmodule.r!   rp   r!   r!   r"   rq   D  s    ztrain.<locals>.<listcomp>c                 3   s    | ]}  |V  qd S r&   )
startswith)r*   rD   )modr!   r"   r-   I  s    ztrain.<locals>.<genexpr>z is frozen not to be updated.c                 S   s   | j S r&   )requires_gradrC   r!   r!   r"   <lambda>M  s    ztrain.<locals>.<lambda>z6num. model params: {:,} (num. trained: {:,} ({:.1f}%))c                 s       | ]}|  V  qd S r&   numelr*   pr!   r!   r"   r-   S      c                 s       | ]
}|j r| V  qd S r&   r   r   r   r!   r!   r"   r-   T      c                 s   r   r&   r   r   r!   r!   r"   r-   U  r   g      Y@c                 s   r   r&   r   r   r!   r!   r"   r-   W  r   adam)epsweight_decaynoam)get_std_optzunknown optimizer: r   	serializec                    s
     | S r&   )r   )s)r4   r!   r"   r   k  s   
 )batch_sort_keymin_batch_sizeshortest_firstcount
batch_binsbatch_frames_inbatch_frames_outbatch_frames_inoutswap_ioiaxisoaxis)
r   r   r   r   r   r   r   r   r   r   ttsr>   )modeuse_speaker_embeddinguse_second_targetpreprocess_confpreprocess_argskeep_all_data_on_memr#   c                        | gS r&   r!   data)	converterload_trr!   r"   r         c                 S      | d S Nr   r!   r   r!   r!   r"   r         )dataset
batch_sizenum_workersshuffle
collate_fnc                    r   r&   r!   r   )r   load_cvr!   r"   r     r   c                 S   r   r   r!   r   r!   r!   r"   r     r   )r   r   r   r   r   epoch)outzresumed from %srh   )triggerzmodel.loss.bestzvalidation/main/lossc                 S   s   t | d d d d d S )NrM   r   r   ro   )intr   r!   r!   r"   r     s    )rD   z/att_ws)r   	transformr   reversezmain/zvalidation/main/z.png)	file_namezall_loss.png)r   rh   elapsed_time )SummaryWriter)}r   r7   r   is_availabler\   ra   open
valid_jsonjsonloadlistr;   r   infostrr   spk_embed_dimr   spc_dimospathexistsoutdirmakedirswritedumpsvarsencodesortedenc_initdec_initr	   r   r   model_moduler9   r4   ngpurO   DataParallelranger   r   r(   freeze_modsr1   named_parametersanyr   filterr[   r^   sumoptoptimAdamlrr   r   1espnet.nets.pytorch_backend.transformer.optimizerr   adimtransformer_warmup_stepstransformer_lrNotImplementedErrorsetattr
train_json	sortagradr   r   	maxlen_in
maxlen_outminibatchesbatch_countr   r   r   r   r   r   r   rj   r   r   num_iter_processesrL   rN   rS   r   Trainerepochsresumer   eval_interval_epochssave_interval_epochsreport_interval_itersextendr   r   r   triggersMinValueTriggernum_save_attentionitemsr   calculate_all_attentionsattention_plot_classreduction_factorr2   deepcopyr   base_plot_keysr   
PlotReport	LogReportPrintReportProgressBarr   tensorboard_dirtorch.utils.tensorboardr   r   r   runr   )(argsfr   r   idimodim
model_confrD   r   model_classr   r   parammodel_paramsrU   r   r  use_sortagradtrain_batchsetvalid_batchsetrd   
valid_iterupdatertrainereval_intervalsave_intervalreport_intervalr   
att_vis_fn
plot_classr  idxilenatt_reporterr  	plot_keysplot_keyreport_keysr   writerr!   )r   r   r   r   r4   r"   r>      s  

  $$
$


		





$	


r>   c                 C   s  t |  t| j| j\}}}tt|  D ]}td| d t	t| |   qt
|j}||||}t|ts<J t| td| j  t| j| |  t| jdkr\dnd}||}t| jd}t|d }	W d	   n1 s|w   Y  tj| j}
t|
dkrtj|
st|
 td
dd|j| j d	u r|j n| j ddid}d"dd}dd }dd }t!"dj#| jd}| j$rt!"dj#| j%ddd}| j&rt!"dj#| j%ddd}t'|	 D ]\}}||	| fg}||}t(|d d |}d	}|jrt)|d d |}t** }|j+|| |d\}}}tdt,|-dt** |    |-d|-d| j. krQt/d|  ||}td|d t|	 ||-d|-d|f  |0 1 ||< | j$r||}|0 1 ||< | j&rt23|4dd||< |d	ur||0 1 tj| jd |   |d	ur||0 1 tj| jd!|   q|5  | j$r|5  | j&r|5  d	S d	S )#zDecode with E2E-TTS model.zargs: r   zreading model parameters from r   r   rk   r   r   Nr   Fr>   )r   
load_inputsort_in_input_lengthr   r   r      r      c              	   S   s  dd l }|d dd lm} | j}t|dkr6|j||d ||  |d |	d |
ddg ntt|dkrU|j||d |j| dd	 |d
 |	d nUt|dkr|j|d |d  |d |d  f|d t| D ]0\}}t|dD ]&\}	}
||d |d ||d  |	  |j|
dd	 |d
 |	d q}qtntd|  tjtj|stjtj|dd || |  d S )Nr   AggrM   )figsizedpiFrameProbability   auto)aspectInputOutputr   z!Support only from 1D to 4D array.T)exist_ok)
matplotlibusematplotlib.pyplotpyplotro   r   figureplotxlabelylabelylimimshowr   subplotr  tight_layoutr   r   r   dirnamer   savefigclose)r   fignamerD  rE  rN  pltro   idx1r|   idx2rC   r!   r!   r"   _plot_and_savea  s>   




*"

zdecode.<locals>._plot_and_savec                 S   sh   | d u rdS t | jdkrt| jddd  S t | jdkr0t| jddd jdd S td)Nr{   rH  r   dimr   r   +att_ws should be 2 or 4 dimensional tensor.)r   ro   rz   maxrY   
ValueErroratt_wsr!   r!   r"   _calculate_focus_rete  s    z%decode.<locals>._calculate_focus_retec                    s   t  jdkrn,t  jdkr0tjdd  D dd  jddd jdd}| } |  ntdt fd	dt	 jd
 D }|
dd
 S )NrH  r   c                 S   s   g | ]}|qS r!   r!   )r*   att_wr!   r!   r"   rq     s    z<decode.<locals>._convert_att_to_duration.<locals>.<listcomp>r   rb  r   rd  c                    s    g | ]}  d | qS )r   )argmaxeqr   )r*   r   rg  r!   r"   rq     s     rM   )r   ro   r7   catre  rY   rk  rf  stackr   viewrz   )rh  diagonal_scoresdiagonal_head_idx	durationsr!   rg  r"   _convert_att_to_duration  s   
z(decode.<locals>._convert_att_to_durationzark,scp:{o}.ark,{o}.scp)ofeatsrr  focus_ratesrM   )spembz$inference speed = %.1f frames / sec.z*output length reaches maximum length (%s).z+(%d/%d) %s (size: %d->%d, focus rate: %.3f)z/probs/%s_prob.pngz/att_ws/%s_att_ws.png)r@  rB  )6r   r   r   r(  r   r   r;   r\   r   r   r   r   r9   r   r   r6   r7   r   r   r(   r   r   r   r   r   rZ  r   r   r   r   r   r   r   kaldiioWriteHelperr^   save_durationsreplacesave_focus_ratesr   
LongTensorFloatTensortime	inferencer   r   maxlenratiora   rk   numpyr   r   reshaper\  )r$  r&  r'  
train_argsrD   r)  r   r   r%  jsr   load_inputs_and_targetsra  ri  rs  feat_writer
dur_writer	fr_writerr7  utt_idrB   r   rC   rw  
start_timeoutsprobsrh  
focus_ratedsr!   r!   r"   decode2  s   $





)&



r  )7rJ   r2   r   r\   r_   r   r  r3   rx  r  r   r7   r   chainer.trainingr   espnet.asr.asr_utilsr   r   r   r   r   #espnet.asr.pytorch_backend.asr_initr	   &espnet.nets.pytorch_backend.nets_utilsr
   espnet.nets.tts_interfacer   espnet.utils.datasetr   r    espnet.utils.deterministic_utilsr   espnet.utils.dynamic_importr   espnet.utils.io_utilsr   espnet.utils.training.batchfyr   espnet.utils.training.evaluatorr   espnet.utils.training.iteratorsr   (espnet.utils.training.tensorboard_loggerr   !espnet.utils.training.train_utilsr   r   r   StandardUpdaterrL   objectrj   r>   r8   r  r!   r!   r!   r"   <module>   sD   7DW  6