o
    i                     @   s(  d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z
d dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ ede,eededdZ-G dd deZ.dS )    N)Callable
CollectionDictListOptionalTuple)check_argument_typescheck_return_type)AbsLM)ESPnetLanguageModel)SequentialRNNLM)TransformerLM)AbsTask)g2p_choices)
initialize)ClassChoices)CommonCollateFn)CommonPreprocessor)Trainer)get_default_kwargs)NestedDictAction)str2boolstr_or_nonelm)seq_rnntransformerr   )classes
type_checkdefaultc                   @   s.  e Zd ZU dZeed< egZeZ	e
dejfddZe
dejdedeeeeeeejf f  geee eeejf f f fd	d
Ze
dejdedeeeeeejf geeejf f  fddZe
	ddededeedf fddZe
	ddededeedf fddZe
dejde fddZ!dS )LMTask   num_optimizersparserc                 C   s  t  sJ |jdd}|d}|dg7 }|jdtd dd |jdd	d
 d dg dd |jdtttdd |jdd}|jdtddd |jdt	dg ddd |jdtd dd |jdtdd |jdtg d d d!d" |jd#tt
d d$d" | jD ]}|| q|t|sJ |S )%NzTask related)descriptionrequired
token_listz--token_listzA text mapping int-id to token)typer   helpz--initc                 S   s   t |  S )N)r   lower)x r*   D/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/tasks/lm.py<lambda>@   s    z+LMTask.add_task_arguments.<locals>.<lambda>zThe initialization method)chainerxavier_uniformxavier_normalkaiming_uniformkaiming_normalN)r&   r   r'   choicesz--model_confz&The keyword arguments for model class.)actionr   r'   zPreprocess relatedz--use_preprocessorTz"Apply preprocessing to data or notz--token_typebpe)r4   charword )r&   r   r2   r'   z
--bpemodelzThe model file fo sentencepiecez--non_linguistic_symbolsz non_linguistic_symbols file path)r&   r'   z	--cleaner)Ntacotronjaconv
vietnamesezApply text cleaning)r&   r2   r   r'   z--g2pz&Specify g2p method if --token_type=phn)r   add_argument_groupget_defaultadd_argumentr   r   r   r   r   strr   class_choices_listadd_argumentsr	   )clsr"   groupr$   class_choicesr*   r*   r+   add_task_arguments-   s   



zLMTask.add_task_argumentsargstrainreturnc                 C   s   t  sJ tddS )Nr   )int_pad_value)r   r   )rA   rE   rF   r*   r*   r+   build_collate_fn   s   

zLMTask.build_collate_fnc              	   C   sH   t  sJ |jrt||j|j|j|j|j|jd}nd }t	|s"J |S )N)rF   
token_typer%   bpemodeltext_cleanerg2p_typenon_linguistic_symbols)
r   use_preprocessorr   rJ   r%   rK   cleanerg2prN   r	   )rA   rE   rF   retvalr*   r*   r+   build_preprocess_fn   s   

zLMTask.build_preprocess_fnTF	inference.c                 C      d}|S )N)textr*   rA   rF   rT   rR   r*   r*   r+   required_data_names      zLMTask.required_data_namesc                 C   rU   )Nr*   r*   rW   r*   r*   r+   optional_data_names   rY   zLMTask.optional_data_namesc                 C   s   t  sJ t|jtr/t|jdd}dd |D }W d    n1 s$w   Y  | |_nt|jttfr=|j }ntdt	|}t
d|  t|j}|d	d|i|j}td	||d|j}|jd urst||j t|syJ |S )
Nzutf-8)encodingc                 S   s   g | ]}|  qS r*   )rstrip).0liner*   r*   r+   
<listcomp>   s    z&LMTask.build_model.<locals>.<listcomp>ztoken_list must be str or dictzVocabulary size: 
vocab_size)r   r`   r*   )r   
isinstancer%   r>   opencopytuplelistRuntimeErrorlenlogginginfo
lm_choices	get_classr   lm_confr   
model_confinitr   r	   )rA   rE   fr%   r`   lm_classr   modelr*   r*   r+   build_model   s$   

zLMTask.build_modelN)TF)"__name__
__module____qualname__r!   int__annotations__rj   r?   r   trainerclassmethodargparseArgumentParserrD   	Namespaceboolr   r   r   r>   r   npndarrayr   torchTensorrI   r   arrayrS   rX   rZ   r   rr   r*   r*   r*   r+   r   #   s\   
 U	&

r   )/rz   rh   typingr   r   r   r   r   r   numpyr~   r   	typeguardr   r	   espnet2.lm.abs_modelr
   espnet2.lm.espnet_modelr   espnet2.lm.seq_rnn_lmr   espnet2.lm.transformer_lmr   espnet2.tasks.abs_taskr   espnet2.text.phoneme_tokenizerr   espnet2.torch_utils.initializer   espnet2.train.class_choicesr   espnet2.train.collate_fnr   espnet2.train.preprocessorr   espnet2.train.trainerr    espnet2.utils.get_default_kwargsr    espnet2.utils.nested_dict_actionr   espnet2.utils.typesr   r   dictrj   r   r*   r*   r*   r+   <module>   s<     