o
    i4                  
   @   sT  d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z
d dlZd dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZB d dlCmDZD d dlEmFZF d dlGmHZH d dlImJZJmKZKmLZL e>d eMe6d!e(d"d#ZNe>d$eMe2e0d%e.dd&d'ZOe>d(eMee$e e&e"ed)ed*d+ZPe>d,eMe,d-e*dd&d'ZQe>d.eMeeeeeed/ed*d+ZRG d0d1 d1e8ZSdS )2    N)Callable
CollectionDictListOptionalTuple)check_argument_typescheck_return_type)
AbsDecoder)
RNNDecoder)&DynamicConvolution2DTransformerDecoder$DynamicConvolutionTransformerDecoder*LightweightConvolution2DTransformerDecoder(LightweightConvolutionTransformerDecoderTransformerDecoder)
AbsEncoder)BranchformerEncoder)ConformerEncoder)!ContextualBlockTransformerEncoder)
RNNEncoder)TransformerEncoder)VGGRNNEncoder)AbsFrontend)AbsPostEncoder)"HuggingFaceTransformersPostEncoder)AbsPreEncoder)LinearProjection)LightweightSincConvs)ESPnetMTModel)	Embedding)AbsTask)g2p_choices)
initialize)ClassChoices)CommonCollateFn) MutliTokenizerCommonPreprocessor)Trainer)get_default_kwargs)NestedDictAction)int_or_nonestr2boolstr_or_nonefrontend)embedr-   )nameclasses
type_checkdefault
preencoder)sinclinearT)r.   r/   r0   r1   optionalencoder)	conformertransformercontextual_block_transformervgg_rnnrnnbranchformerr;   )r/   r0   r1   postencoder)hugging_face_transformersdecoder)r8   lightweight_convlightweight_conv2ddynamic_convdynamic_conv2dr;   c                   @   s6  e Zd ZU dZeed< eeee	e
gZeZedejfddZedejdedeeeeeeejf f  geee eeejf f f fd	d
Zedejdedeeeeeej f geeejf f  fddZ!e	ddededeedf fddZ"e	ddededeedf fddZ#edejde$fddZ%dS )MTTask   num_optimizersparserc                 C   sR  |j dd}|d}|ddg7 }|jdtd dd |jd	td d
d |jddd d dg dd |jdtd dd |jdtttdd |j dd}|jdtddd |jdt	dg ddd |jdt	dg dd d |jd!td d"d |jd#td d$d |jd%td&d' |jd(tg d)d d*d+ |jd,tt
d d-d+ | jD ]}|| qd S ).NzTask related)descriptionrequiredsrc_token_list
token_listz--token_listz4A text mapping int-id to token (for target language))typer1   helpz--src_token_listz4A text mapping int-id to token (for source language)z--initc                 S   s   t |  S )N)r+   lower)x rP   D/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/tasks/mt.py<lambda>   s    z+MTTask.add_task_arguments.<locals>.<lambda>zThe initialization method)chainerxavier_uniformxavier_normalkaiming_uniformkaiming_normalN)rL   r1   rM   choicesz--input_sizez,The number of input dimension of the featurez--model_confz&The keyword arguments for model class.)actionr1   rM   zPreprocess relatedz--use_preprocessorTz"Apply preprocessing to data or notz--token_typebpe)rZ   charwordphnz>The target text will be tokenized in the specified level token)rL   r1   rX   rM   z--src_token_typez>The source text will be tokenized in the specified level tokenz
--bpemodelz5The model file of sentencepiece (for target language)z--src_bpemodelz5The model file of sentencepiece (for source language)z--non_linguistic_symbolsz non_linguistic_symbols file path)rL   rM   z	--cleaner)Ntacotronjaconv
vietnamesezApply text cleaning)rL   rX   r1   rM   z--g2pz&Specify g2p method if --token_type=phn)add_argument_groupget_defaultadd_argumentr+   r)   r(   r'   r   r*   strr!   class_choices_listadd_arguments)clsrG   grouprI   class_choicesrP   rP   rQ   add_task_arguments|   s   

zMTTask.add_task_argumentsargstrainreturnc                 C   s   t  sJ tdddS )Ng        )float_pad_valueint_pad_value)r   r$   )rg   rk   rl   rP   rP   rQ   build_collate_fn   s   
zMTTask.build_collate_fnc              
   C   s`   t  sJ |jr&t||j|jg|j|jg|j|jg|j	|j
|jddgd}nd }t|s.J |S )Ntextsrc_text)rl   
token_typerK   bpemodelnon_linguistic_symbolstext_cleanerg2p_type	text_name)r   use_preprocessorr%   rt   src_token_typerK   rJ   ru   src_bpemodelrv   cleanerg2pr	   )rg   rk   rl   retvalrP   rP   rQ   build_preprocess_fn   s   



zMTTask.build_preprocess_fnTF	inference.c                 C   s   |sd}|S d}|S )N)rs   rr   )rs   rP   rg   rl   r   r   rP   rP   rQ   required_data_names  s
   zMTTask.required_data_namesc                 C   s   |sd}nd}t |sJ |S )NrP   )r	   r   rP   rP   rQ   optional_data_names  s
   zMTTask.optional_data_namesc                 C   s  t  sJ t|jtr/t|jdd}dd |D }W d    n1 s$w   Y  t||_nt|jttfr=t|j}ntdt|}t	
d|  |jd urt|jtr|t|jdd}dd |D }W d    n1 sqw   Y  t||_nt|jttfrt|j}ntdt|}t	
d|  nd	\}}|jd u rt|j}|dd
|i|j}| }	nd |_i |_d }|j}	t|dd d urt|j}
|
di |j}| }	nd }t|j}|dd
|	i|j}| }t|dd d urt|j}|dd
|i|j}| }nd }t|j}|d||d|j}td|||||||||d	|j }|j!d urDt"||j! t#|sKJ |S )Nzutf-8)encodingc                 S      g | ]}|  qS rP   rstrip.0linerP   rP   rQ   
<listcomp>#      z&MTTask.build_model.<locals>.<listcomp>ztoken_list must be str or listzVocabulary size: c                 S   r   rP   r   r   rP   rP   rQ   r   1  r   zSource vocabulary size: )NN
input_sizer2   r=   )
vocab_sizeencoder_output_size)	r   src_vocab_sizer,   r2   r6   r=   r?   rK   rJ   rP   )$r   
isinstancerK   rd   openlisttupleRuntimeErrorlenlogginginforJ   r   frontend_choices	get_classr,   frontend_confoutput_sizegetattrpreencoder_choicesr2   preencoder_confencoder_choicesr6   encoder_confpostencoder_choicesr=   postencoder_confdecoder_choicesr?   decoder_confr   
model_confinitr"   r	   )rg   rk   frK   r   rJ   r   frontend_classr,   r   preencoder_classr2   encoder_classr6   r   postencoder_classr=   decoder_classr?   modelrP   rP   rQ   build_model  s   






zMTTask.build_modelN)TF)&__name__
__module____qualname__rF   int__annotations__r   r   r   r   r   re   r&   trainerclassmethodargparseArgumentParserrj   	Namespaceboolr   r   r   rd   r   npndarrayr   torchTensorrq   r   arrayr   r   r   r   r   rP   rP   rP   rQ   rD   g   sf   
 k
&



rD   )Tr   r   typingr   r   r   r   r   r   numpyr   r   	typeguardr   r	   espnet2.asr.decoder.abs_decoderr
   espnet2.asr.decoder.rnn_decoderr   'espnet2.asr.decoder.transformer_decoderr   r   r   r   r   espnet2.asr.encoder.abs_encoderr   (espnet2.asr.encoder.branchformer_encoderr   %espnet2.asr.encoder.conformer_encoderr   8espnet2.asr.encoder.contextual_block_transformer_encoderr   espnet2.asr.encoder.rnn_encoderr   'espnet2.asr.encoder.transformer_encoderr   #espnet2.asr.encoder.vgg_rnn_encoderr   !espnet2.asr.frontend.abs_frontendr   'espnet2.asr.postencoder.abs_postencoderr   =espnet2.asr.postencoder.hugging_face_transformers_postencoderr   %espnet2.asr.preencoder.abs_preencoderr   espnet2.asr.preencoder.linearr   espnet2.asr.preencoder.sincr   espnet2.mt.espnet_modelr   espnet2.mt.frontend.embeddingr   espnet2.tasks.abs_taskr    espnet2.text.phoneme_tokenizerr!   espnet2.torch_utils.initializer"   espnet2.train.class_choicesr#   espnet2.train.collate_fnr$   espnet2.train.preprocessorr%   espnet2.train.trainerr&    espnet2.utils.get_default_kwargsr'    espnet2.utils.nested_dict_actionr(   espnet2.utils.typesr)   r*   r+   dictr   r   r   r   r   rD   rP   rP   rP   rQ   <module>   s     
	