o
    iC                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZ dddZd	d
 ZedkrHeejdd  dS dS )z4End-to-end speech translation model training script.    N)__version__)	strtobool)BATCH_COUNT_CHOICESTc                 C   s  | du rt jdt jt jd} | jdddd | jddd	d | jd
ddd | jddtdd | jddg ddd | jddtddgdd | jdt|dd | jddtdd | jd|d d! | jd"dtd#d | jd$td%d& | jd'd(d)d*d+d, | jd-d.td/d0d1 | jd2d3d4td5d | jd6dtd*d7d8 | jd9d:td;d | jd<d4td=d | jd>tdd?d1 | jd@tddAd1 | jdBtddCd1 | jdDdEtg dFdGd | jdHdItdJd | jdKdItdLd | jdMdItdNd | jdOdItdPd | jdQdRdSdTdU | jdVdRdSdWdU | jdXddSdYdU | jdZtdd[d1 | jd\td]d^d1 | jd_dItd`d | jdadItdbd | jdcdItddd | jdetddfd1 | jdgtddhd1 | jdidItdjd | jdkdltdmd | jdndotdpd | jdqd4td*drd8 | jdsdtt	dud | jdvdwdxd4tdyd | jdzd4td{d | jd|d4td}d | jd~d4tdd | jdd4tdd | jdddtddd | jdddtddd | jdd4tdd | jdtdd*dd | jddtg ddd | jddtdd | jddtdd | jddtdd | jddtdd | jddtdd | jddItdd | jddtddgdd | jddtdd | jdddtdd | jddtd*dd8 | jddtd*dd8 | jddtdd | jddtdd | jddtdd | jdt
dRdd1 | jddRt
d*dd8 | jddtd*dd8 | jddddƄ dd | jddtd*dd8 | jddddƄ dd | jddRt
dd | jddRt
dd | jdtddd1 | jdt
ddd1 | jdt
dd)d1 | jdt
dRd)d1 | jdtddd1 | jdtddd1 | jdtdId)d1 | jdtdd)d1 | S )zGet default arguments.NzFTrain a speech translation (ST) model on one CPU, one or multiple GPUs)descriptionconfig_file_parser_classformatter_classz--configTzconfig file path)is_config_filehelpz	--config2zCsecond config file path that overwrites the settings in `--config`.z	--config3zRthird config file path that overwrites the settings in `--config` and `--config2`.z--ngpuz5Number of GPUs. If not given, use all visible devices)defaulttyper	   z--train-dtypefloat32)float16r   float64O0O1O2O3zData type for training (only pytorch backend). O0,O1,.. flags require apex. See https://nvidia.github.io/apex/amp.html#opt-levels)r
   choicesr	   z	--backendchainerpytorchzBackend library)r
   r   r   r	   z--outdirzOutput directory)r   requiredr	   z--debugmode   	Debugmodez--dict
Dictionary)r   r	   z--seedzRandom seedz
--debugdirzOutput directory for debugging)r   r	   z--resumez-r ?z!Resume the training from snapshot)r
   nargsr	   z--minibatchesz-Nz-1z&Process only N minibatches (for debug))r   r
   r	   z	--verbosez-Vr   zVerbose optionz--tensorboard-dirzTensorboard log dir path)r
   r   r   r	   z--report-interval-itersd   zReport interval iterationsz--save-interval-itersz!Save snapshot interval iterationsz--train-jsonz#Filename of train label data (json)z--valid-jsonz(Filename of validation label data (json)z--model-modulezBmodel defined module (default: espnet.nets.xxx_backend.e2e_st:E2E)z
--ctc_typebuiltin)r   gtnctccudnnctcz-Type of CTC implementation to calculate loss.z
--mtlalphag        zjMultitask learning coefficient, alpha:                                 alpha*ctc_loss + (1-alpha)*att_lossz--asr-weightzMultitask learning coefficient for ASR task, weight:  asr_weight*(alpha*ctc_loss + (1-alpha)*att_loss) + (1-asr_weight-mt_weight)*st_lossz--mt-weightzMultitask learning coefficient for MT task, weight:                                 mt_weight*mt_loss + (1-mt_weight-asr_weight)*st_lossz--lsm-weightzLabel smoothing weightz--report-cerF
store_truezCompute CER on development set)r
   actionr	   z--report-werzCompute WER on development setz--report-bleuzCompute BLEU on development setz--nbestzOutput N-best hypothesesz--beam-size   z	Beam sizez	--penaltyzIncertion penaltyz--maxlenratiozInput length ratio to obtain max output length.
                        If maxlenratio=0.0 (default), it uses a end-detect function
                        to automatically find maximum hypothesis lengthsz--minlenratioz.Input length ratio to obtain min output lengthz--rnnlmzRNNLM model file to readz--rnnlm-confzRNNLM model config file to readz--lm-weightzRNNLM weight.z--sym-spacez<space>zSpace symbolz--sym-blank<blank>zBlank symbolz--sortagradzFHow many epochs to use sortagrad for. 0 = deactivated, -1 = all epochsz--batch-countautozKHow to count batch_size. The default (auto) will find how to count by args.z--batch-sizez--batch-seqsz-bz*Maximum seqs in a minibatch (0 to disable)z--batch-binsz*Maximum bins in a minibatch (0 to disable)z--batch-frames-inz2Maximum input frames in a minibatch (0 to disable)z--batch-frames-outz3Maximum output frames in a minibatch (0 to disable)z--batch-frames-inoutz9Maximum input+output frames in a minibatch (0 to disable)z--maxlen-inz--batch-seq-maxlen-ini   MLzPWhen --batch-count=seq, batch size is reduced if the input sequence length > ML.)r
   r   metavarr	   z--maxlen-outz--batch-seq-maxlen-out   zPWhen --batch-count=seq, batch size is reduced if the output sequence length > MLz--n-iter-processeszNumber of processes of iteratorz--preprocess-confz-The configuration file for the pre-processing)r   r
   r   r	   z--optadadelta)r)   adamnoam	Optimizerz--accum-gradzNumber of gradient accumurationz--epsg:0yE>zEpsilon constant for optimizerz--eps-decayg{Gz?zDecaying ratio of epsilonz--lrgMbP?zLearning rate for optimizerz
--lr-decayg      ?zDecaying ratio of learning ratez--weight-decayzWeight decay ratioz--criterionacclossz"Criterion to perform epsilon decayz--thresholdg-C6?zThreshold to stop iterationz--epochsz-e   zMaximum number of epochsz--early-stop-criterionzvalidation/main/accz=Value to monitor to trigger an early stopping of the trainingz
--patience   zINumber of epochs to wait without improvement before stopping the trainingz--grad-clip   zGradient norm threshold to clipz--num-save-attentionz*Number of samples of attention to be savedz--num-save-ctcz0Number of samples of CTC probability to be savedz--grad-noisezFThe flag to switch to use noise injection to gradients during trainingz--context-residualzHThe flag to switch to use context vector residual in the decoder networkz
--enc-initz,Pre-trained ASR model to initialize encoder.z--enc-init-modszenc.enc.c                        fdd  dD S )Nc                       g | ]
} d krt |qS r   str.0mods G/home/ubuntu/.local/lib/python3.10/site-packages/espnet/bin/st_train.py
<listcomp>|      0get_parser.<locals>.<lambda>.<locals>.<listcomp>,splitr:   r<   r:   r=   <lambda>|      zget_parser.<locals>.<lambda>z<List of encoder modules to initialize, separated by a comma.z
--dec-initz6Pre-trained ASR, MT or LM model to initialize decoder.z--dec-init-modsz
att., dec.c                    r2   )Nc                    r3   r4   r5   r7   r:   r<   r=   r>     r?   r@   rA   rB   r:   r<   r:   r=   rD     rE   z<List of decoder modules to initialize, separated by a comma.z--multilingualzPrepend target language ID to the source sentence.  Both source/target language IDs must be prepend in the pre-processing stage.z--replace-sosz}Replace <sos> in the decoder with a target language ID                               (the first token in the target sequence)z--stats-filez,The stats file for the feature normalizationz--apply-uttmvnz2Apply utterance level mean variance normalization.z--uttmvn-norm-meansz--uttmvn-norm-varsz
--fbank-fsi>  z5The sample frequency used for the mel-fbank creation.z--n-melsP   z!The number of mel-frequency bins.z--fbank-fminz--fbank-fmax)configargparseArgumentParserYAMLConfigFileParserArgumentDefaultsHelpFormatteraddadd_argumentintr6   floatr   r   )parserr   r<   r<   r=   
get_parser   s  		
rP   c              	   C   s  t  }|| \}}|jdkr|jdkrtd|j d|jdkr0|jdv r0td|j ddd	lm} |j	d
u rCd|j d }n|j	}||}|
| || }||_	d|j	v r_d|_d|j	v rgd|_t|_|jdkrxtjtjdd ntjtjdd td |jd
u rtjd}|d
urt|d}n.td ztjddgtjtjd}	W n tjtfy   d}Y nw t|	j dd }||_n|jdkrt d |j}t!d|  t!dtjdd  t!d |j"  t#"|j" t$j#"|j" |j%d
ur;t&|j%d!}
|
' }W d
   n	1 s w   Y  d"d# |D }|(dd$ |)d% ||_*nd
|_*t!d&|j  |jdkrXdd'l+m,} || d
S td())zRun the main training function.r   r   z/chainer backend does not support --train-dtype z.Use --dtype float32.r   )r   r   r   r   r   z--train-dtype z" does not support the CPU backend.)dynamic_importNzespnet.nets.z_backend.e2e_st:E2Echainer_backendpytorch_backendr   z>%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s)levelformatzSkip DEBUG/INFO messagesCUDA_VISIBLE_DEVICESrA   z CUDA_VISIBLE_DEVICES is not set.z
nvidia-smiz-L)stdoutstderr
r   zsThere are some bugs with multi-GPU processing in PyTorch 1.2+ (see https://github.com/pytorch/pytorch/issues/21108)zngpu: zpython path = 
PYTHONPATHz(None)zrandom seed = %drbc                 S   s    g | ]}| d dd qS )zutf-8 r   )decoderC   )r8   entryr<   r<   r=   r>     s     zmain.<locals>.<listcomp>r$   z<eos>z
backend = )trainzOnly pytorch are supported.)-rP   parse_known_argsbackendtrain_dtypeNotImplementedErrorngpu
ValueErrorespnet.utils.dynamic_importrQ   model_moduleadd_arguments
parse_argsr   versionverboseloggingbasicConfigINFOWARNwarningosenvirongetlenrC   
subprocessrunPIPECalledProcessErrorFileNotFoundErrorrX   r]   debuginfoseedrandomnpdictopen	readlinesinsertappend	char_listespnet.st.pytorch_backend.str_   )cmd_argsrO   args_rQ   rg   model_classcvdrd   pf
dictionaryr   r_   r<   r<   r=   main  s   












r   __main__r   )NT)__doc__rl   rq   r}   ru   sysrG   numpyr~   espnetr   espnet.utils.cli_utilsr   espnet.utils.training.batchfyr   rP   r   __name__argvr<   r<   r<   r=   <module>   s&   
   $h