o
    iK:                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZ dddZd	d
 ZedkrHeejdd  dS dS )z1Neural machine translation model training script.    N)__version__)	strtobool)BATCH_COUNT_CHOICESTc                 C   s  | du rt jdt jt jd} | jdddd | jddd	d | jd
ddd | jddtdd | jddg ddd | jddtddgdd | jdt|dd | jddtdd | jd|d d! | jd"dtd#d | jd$td%d& | jd'd(d)d*d+d, | jd-d.td/d0d1 | jd2d3d4td5d | jd6dtd*d7d8 | jd9d:td;d | jd<d4td=d | jd>tdd?d1 | jd@tddAd1 | jdBtddCd1 | jdDdEtdFd | jdGddHdIdJ | jdKtddLd1 | jdMtdNdOd1 | jdPdEtdQd | jdRdEtdSd | jdTdEtdUd | jdVtddWd1 | jdXtddYd1 | jdZdEtd[d | jd\d]td^d | jd_d`tdad | jdbd4td*dcd8 | jdddet	dfd | jdgdhdid4tdjd | jdkd4tdld | jdmd4tdnd | jdod4tdpd | jdqd4tdrd | jdsdtd:tdudvdw | jdxdyd:tdudzdw | jd{d4td|d | jd}d~tg ddd | jddtdd | jddtdd | jddtdd | jddtdd | jddtdd | jddEtdd | jddtddgdd | jddtdd | jdddtdd | jddtd*dd8 | jddtd*dd8 | jddtdd | jddtdd | jddt
d*dd8 | jddt
d*dd8 | jddt
d*dd8 | jddtd*dd8 | jdddd dd | jddtd*dd8 | jdddd dd | jddt
dd | jddt
dd | S )zGet default arguments.NzOTrain a neural machine translation (NMT) model on one CPU, one or multiple GPUs)descriptionconfig_file_parser_classformatter_classz--configTzconfig file path)is_config_filehelpz	--config2zCsecond config file path that overwrites the settings in `--config`.z	--config3zRthird config file path that overwrites the settings in `--config` and `--config2`.z--ngpuz5Number of GPUs. If not given, use all visible devices)defaulttyper	   z--train-dtypefloat32)float16r   float64O0O1O2O3zData type for training (only pytorch backend). O0,O1,.. flags require apex. See https://nvidia.github.io/apex/amp.html#opt-levels)r
   choicesr	   z	--backendchainerpytorchzBackend library)r
   r   r   r	   z--outdirzOutput directory)r   requiredr	   z--debugmode   	Debugmodez--dictz&Dictionary for source/target languages)r   r	   z--seedzRandom seedz
--debugdirzOutput directory for debugging)r   r	   z--resumez-r ?z!Resume the training from snapshot)r
   nargsr	   z--minibatchesz-Nz-1z&Process only N minibatches (for debug))r   r
   r	   z	--verbosez-Vr   zVerbose optionz--tensorboard-dirzTensorboard log dir path)r
   r   r   r	   z--report-interval-itersd   zReport interval iterationsz--save-interval-itersz!Save snapshot interval iterationsz--train-jsonz#Filename of train label data (json)z--valid-jsonz(Filename of validation label data (json)z--model-modulezBmodel defined module (default: espnet.nets.xxx_backend.e2e_mt:E2E)z--lsm-weightg        zLabel smoothing weightz--report-bleu
store_truezCompute BLEU on development set)r
   actionr	   z--nbestzOutput N-best hypothesesz--beam-size   z	Beam sizez	--penaltyzIncertion penaltyz--maxlenratiozInput length ratio to obtain max output length.
                        If maxlenratio=0.0 (default), it uses a end-detect function
                        to automatically find maximum hypothesis lengthsz--minlenratioz.Input length ratio to obtain min output lengthz--rnnlmzRNNLM model file to readz--rnnlm-confzRNNLM model config file to readz--lm-weightzRNNLM weight.z--sym-spacez<space>zSpace symbolz--sym-blank<blank>zBlank symbolz--sortagradzFHow many epochs to use sortagrad for. 0 = deactivated, -1 = all epochsz--batch-countautozKHow to count batch_size. The default (auto) will find how to count by args.z--batch-sizez--batch-seqsz-bz*Maximum seqs in a minibatch (0 to disable)z--batch-binsz*Maximum bins in a minibatch (0 to disable)z--batch-frames-inz2Maximum input frames in a minibatch (0 to disable)z--batch-frames-outz3Maximum output frames in a minibatch (0 to disable)z--batch-frames-inoutz9Maximum input+output frames in a minibatch (0 to disable)z--maxlen-inz--batch-seq-maxlen-inMLzPWhen --batch-count=seq, batch size is reduced if the input sequence length > ML.)r
   r   metavarr	   z--maxlen-outz--batch-seq-maxlen-outzPWhen --batch-count=seq, batch size is reduced if the output sequence length > MLz--n-iter-processeszNumber of processes of iteratorz--optadadelta)r$   adamnoam	Optimizerz--accum-gradzNumber of gradient accumurationz--epsg:0yE>zEpsilon constant for optimizerz--eps-decayg{Gz?zDecaying ratio of epsilonz--lrgMbP?zLearning rate for optimizerz
--lr-decayg      ?zDecaying ratio of learning ratez--weight-decayzWeight decay ratioz--criterionacclossz"Criterion to perform epsilon decayz--thresholdg-C6?zThreshold to stop iterationz--epochsz-e   zMaximum number of epochsz--early-stop-criterionzvalidation/main/accz=Value to monitor to trigger an early stopping of the trainingz
--patience   zINumber of epochs to wait without improvement before stopping the trainingz--grad-clip   zGradient norm threshold to clipz--num-save-attentionz*Number of samples of attention to be savedz--context-residualFzHThe flag to switch to use context vector residual in the decoder networkz--tie-src-tgt-embeddingz8Tie parameters of source embedding and target embedding.z--tie-classifierz?Tie parameters of target embedding and output projection layer.z
--enc-initz,Pre-trained ASR model to initialize encoder.z--enc-init-modszenc.enc.c                        fdd  dD S )Nc                       g | ]
} d krt |qS r   str.0mods G/home/ubuntu/.local/lib/python3.10/site-packages/espnet/bin/mt_train.py
<listcomp>O      0get_parser.<locals>.<lambda>.<locals>.<listcomp>,splitr5   r7   r5   r8   <lambda>O      zget_parser.<locals>.<lambda>z<List of encoder modules to initialize, separated by a comma.z
--dec-initz6Pre-trained ASR, MT or LM model to initialize decoder.z--dec-init-modsz
att., dec.c                    r-   )Nc                    r.   r/   r0   r2   r5   r7   r8   r9   \  r:   r;   r<   r=   r5   r7   r5   r8   r?   \  r@   z<List of decoder modules to initialize, separated by a comma.z--multilingualzPrepend target language ID to the source sentence. Both source/target language IDs must be prepend in the pre-processing stage.z--replace-sosz_Replace <sos> in the decoder with a target language ID (the first token in the target sequence))configargparseArgumentParserYAMLConfigFileParserArgumentDefaultsHelpFormatteraddadd_argumentintr1   floatr   r   )parserr   r7   r7   r8   
get_parser   s|  		
rJ   c              	   C   s  t  }|| \}}|jdkr|jdkrtd|j d|jdkr0|jdv r0td|j ddd	lm} |j	d
u rCd|j d }n|j	}||}|
| || }||_	d|j	v r_d|_d|j	v rgd|_t|_|jdkrxtjtjdd ntjtjdd td |jd
u rtjd}|d
urt|d}n.td ztjddgtjtjd}	W n tjtfy   d}Y nw t|	j dd }||_n|jdkrt d |j}t!d|  t!dtjdd  t!d |j"  t#"|j" t$j#"|j" |j%d
ur;t&|j%d!}
|
' }W d
   n	1 s w   Y  d"d# |D }|(dd$ |)d% ||_*nd
|_*t!d&|j  |jdkrXdd'l+m,} || d
S td())zRun the main training function.r   r   z/chainer backend does not support --train-dtype z.Use --dtype float32.r   )r   r   r   r   r   z--train-dtype z" does not support the CPU backend.)dynamic_importNzespnet.nets.z_backend.e2e_mt:E2Echainer_backendpytorch_backendr   z>%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s)levelformatzSkip DEBUG/INFO messagesCUDA_VISIBLE_DEVICESr<   z CUDA_VISIBLE_DEVICES is not set.z
nvidia-smiz-L)stdoutstderr
r   zsThere are some bugs with multi-GPU processing in PyTorch 1.2+ (see https://github.com/pytorch/pytorch/issues/21108)zngpu: zpython path = 
PYTHONPATHz(None)zrandom seed = %drbc                 S   s    g | ]}| d dd qS )zutf-8 r   )decoder>   )r3   entryr7   r7   r8   r9     s     zmain.<locals>.<listcomp>r    z<eos>z
backend = )trainzOnly pytorch are supported.)-rJ   parse_known_argsbackendtrain_dtypeNotImplementedErrorngpu
ValueErrorespnet.utils.dynamic_importrK   model_moduleadd_arguments
parse_argsr   versionverboseloggingbasicConfigINFOWARNwarningosenvirongetlenr>   
subprocessrunPIPECalledProcessErrorFileNotFoundErrorrR   rW   debuginfoseedrandomnpdictopen	readlinesinsertappend	char_listespnet.mt.pytorch_backend.mtrY   )cmd_argsrI   args_rK   ra   model_classcvdr^   pf
dictionaryr~   rY   r7   r7   r8   mainr  s   












r   __main__r   )NT)__doc__rf   rk   rw   ro   sysrA   numpyrx   espnetr   espnet.utils.cli_utilsr   espnet.utils.training.batchfyr   rJ   r   __name__argvr7   r7   r7   r8   <module>   s$   
  \h