o
    i*C                  6   @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$m%Z% d dl&m'Z' d dl(m)Z)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 G dd dZ3de4de5de5de6de4de6de6de6de5de5d e5d!e6d"e6d#ee6e4f d$e	e
e4e4e4f  d%ee4 d&ee4 d'ee4 d(ee4 d)ee4 d*ee4 d+ee4 d,ee4 d-ee4 d.ee4 d/ee4 d0e7f6d1d2Z8d3d4 Z9d8d5d6Z:e;d7kre:  dS dS )9    N)Path)AnyListOptionalSequenceTupleUnion)check_argument_typescheck_return_type)DatadirWriter)LMTask)MTTask)build_tokenizer)TokenIDConverter)	to_device)set_all_random_seed)config_argparse)str2boolstr2triple_strstr_or_none)BatchBeamSearch)
BeamSearch
Hypothesis)TooShortUttError)BatchScorerInterface)LengthBonus)get_commandline_argsc                %   @   s  e Zd ZdZ																	
		d'deeef deeef deeef deeef dedeeef dedededededededededededef$ddZ	e
 dee
jejf d eeee ee ee ef  fd!d"Ze	d(d#ee d$ee fd%d&ZdS ))	Text2TextzText2Text class

    Examples:
        >>> text2text = Text2Text("mt_config.yml", "mt.pth")
        >>> text2text(audio)
        [(text, token, token_int, hypothesis object), ...]

    Nfullcpu           float32         ??mt_train_configmt_model_filelm_train_configlm_filengram_scorer
ngram_file
token_typebpemodeldevicemaxlenratiominlenratio
batch_sizedtype	beam_size	lm_weightngram_weightpenaltynbestc           #   
   C   sl  t  sJ i }t|||	\}}|jtt|d  |j}|j}|j	|t
t|d |d ur>t|||	\}}|j|d< |d ur^|dkrRddlm} |||}nddlm} |||}nd }||d< td	|||d
}t||||j|jt||dd}|dkrdd |j D }t|dkrt|_td n	td| d |j|	tt|d  | D ]} t| tjjr| j|	tt|d  qtd|  td|	 d|  |d u r|j }|d u r|j!}|d u rd }!n|dkr|d urt"||d}!nd }!nt"|d}!t#|d}"td|!  || _$|| _%|"| _&|!| _'|| _(|
| _)|| _*|	| _+|| _,|| _-d S )N)r2   )decoderlength_bonuslmr   r   )NgramFullScorer)NgramPartScorerngramr$   )r8   r:   r=   r9   )r3   weightsscorerssoseos
vocab_size
token_listpre_beam_score_keyr!   c                 S   s   g | ]\}}t |ts|qS  )
isinstancer   .0kvrE   rE   L/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/bin/mt_inference.py
<listcomp>t   s    z&Text2Text.__init__.<locals>.<listcomp>z+BatchBeamSearch implementation is selected.zAs non-batch scorers z2 are found, fall back to non-batch implementation.)r.   r2   zBeam_search: zDecoding device=z, dtype=bpe)r,   r-   )r,   )rC   zText tokenizer: ).r	   r   build_model_from_filetogetattrtorchevalr8   rC   updater   lenr   r:   espnet.nets.scorers.ngramr;   r<   dictr   r@   rA   full_scorersitemsr   	__class__logginginfowarningvaluesrF   nnModuler,   r-   r   r   mt_modelmt_train_args	converter	tokenizerbeam_searchr/   r0   r.   r2   r7   )#selfr&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r?   r`   ra   r8   rC   r:   lm_train_argsr;   r=   r<   r>   rd   	non_batchscorerrc   rb   rE   rE   rK   __init__'   s   








zText2Text.__init__src_textreturnc                    sn  t  sJ t|tjrt|}|dtj}|j	dgtj|
dd}||d}t| jd} jjdi |\}}t|dksIJ t| j|d  j jd}|d j }g }|D ]N}t|tsmJ t||j }	tt fdd	|	}	tt fd
d	|	}	ttdd	 |	}	 j|	}
 jdur j|
}nd}|||
|	|f q`t|sJ |S )z}Inference

        Args:
            data: Input text data
        Returns:
            text, token, token_int, hyp

        r   r!   )r2   
fill_value)rj   src_text_lengths)r.   )xr/   r0   Nc                       |  j jkS N)r`   r@   rn   re   rE   rK   <lambda>       z$Text2Text.__call__.<locals>.<lambda>c                    ro   rp   )r`   rA   rq   rr   rE   rK   rs      rt   c                 S   s   | dkS )Nr   rE   rq   rE   rE   rK   rs          rE   ) r	   rF   npndarrayrQ   tensor	unsqueezerO   longnew_fullsizer   r.   r`   encoderT   rd   r/   r0   r7   r   typeyseqtolistlistfilterrb   
ids2tokensrc   tokens2textappendr
   )re   rj   lengthsbatchenc_
nbest_hypsresultshyp	token_inttokentextrE   rr   rK   __call__   s6   




zText2Text.__call__	model_tagkwargsc                 K   s^   | dur(zddl m} W n ty   td  w | }|jdi ||  tdi |S )a  Build Text2Text instance from the pretrained model.

        Args:
            model_tag (Optional[str]): Model tag of the pretrained models.
                Currently, the tags of espnet_model_zoo are supported.
        Returns:
            Text2Text: Text2Text instance.

        Nr   )ModelDownloaderzZ`espnet_model_zoo` is not installed. Please install via `pip install -U espnet_model_zoo`.rE   )espnet_model_zoo.downloaderr   ImportErrorrZ   errorrS   download_and_unpackr   )r   r   r   drE   rE   rK   from_pretrained   s   zText2Text.from_pretrained)NNNNr   NNNr   r    r    r!   r"   r#   r$   r%   r    r!   rp   )__name__
__module____qualname____doc__r   r   strfloatintri   rQ   no_gradTensorrv   rw   r   r   r   r   r   staticmethodr   r   rE   rE   rE   rK   r      s    




	

~>r   
output_dirr/   r0   r1   r2   r3   ngpuseedr4   r5   r6   r7   num_workers	log_leveldata_path_and_name_and_typekey_filer&   r'   r(   r)   word_lm_train_configword_lm_filer+   r   r,   r-   allow_variable_data_keysc           ,      C   s  t  sJ |dkrtd|d urtd|dkrtdtj|dd |dkr+d}nd}t| td.i d	|d
|d|d|d|d|d|d|d|d|d|d|d|d|	d|
d|}tjd.d|i|}tj	|||||t
|jdt|jd|dd	}t| }|D ]\} }!t|!tsJ t|!tdd | D sJ | ttt|! }"t| |"ksJ t|  d|" d d! |! D }!z	|d.i |!}#W n1 ty	 }$ z$td"|  d#|$  td$i i g d%}%d#d&gd'g|%gg| }#W Y d }$~$nd }$~$ww | d( }&ttd|d |#D ]9\}'\}(})}*}%||' d) }+d#|)|+d* |&< d#tt|*|+d+ |&< t|%j|+d, |&< |(d urP|(|+d- |&< qqW d    d S 1 s_w   Y  d S )/Nr!   z!batch decoding is not implementedzWord LM is not implementedz%only single GPU decoding is supportedz>%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s)levelformatcudar   r&   r'   r(   r)   r+   r,   r-   r.   r/   r0   r2   r3   r4   r5   r6   r7   r   FT)r2   r1   r   r   preprocess_fn
collate_fnr   	inferencec                 s   s    | ]}t |tV  qd S rp   )rF   r   )rH   srE   rE   rK   	<genexpr>^  s    zinference.<locals>.<genexpr>z != c                 S   s$   i | ]\}}| d s||d qS )_lengthsr   )endswithrG   rE   rE   rK   
<dictcomp>a  s   $ zinference.<locals>.<dictcomp>z
Utterance  r    )scorescoresstatesr   z<space>   r   
best_recogr   r   r   r   rE   )r	   NotImplementedErrorrZ   basicConfigr   rV   r   r   r   build_streaming_iteratorbuild_preprocess_fnra   build_collate_fnr   rF   r~   allrT   nextiterr]   rX   r   r\   r   ziprangejoinmapr   r   ),r   r/   r0   r1   r2   r3   r   r   r4   r5   r6   r7   r   r   r   r   r&   r'   r(   r)   r   r   r+   r   r,   r-   r   r.   text2text_kwargs	text2textloaderwriterkeysr   _bsr   er   keynr   r   r   ibest_writerrE   rE   rK   r     s   
	

""$
$r   c                  C   s  t jdtjd} | jddd dddd	 | jd
tdd | jdtddd | jdtddd | jddg ddd | jdtddd | d}|jdtddd |jdt	d  |jd!t
d"d# | d$}|jd%td&d' |jd(td)d' |jd*td+d' |jd,td-d' |jd.td/d' |jd0td1d' |jd2td3d' |jd4td5d' | d6}|jd7tdd8d |jd9tdd:d |jd;td<d=d |jd>td?d@d |jdAtd?dBd |jdCtd?dDd |jdEtdFdGd |jdHtdIdJd | dK}|jdLt	d g dMdNd	 |jdOt	d dPd | S )QNzMT Decoding)descriptionformatter_classz--log_levelc                 S   s   |   S rp   )upperrq   rE   rE   rK   rs     ru   zget_parser.<locals>.<lambda>INFO)CRITICALERRORWARNINGr   DEBUGNOTSETzThe verbose level of logging)r~   defaultchoiceshelpz--output_dirT)r~   requiredz--ngpur   z(The number of gpus. 0 indicates CPU mode)r~   r   r   z--seedzRandom seedz--dtyper"   )float16r"   float64z	Data type)r   r   r   z--num_workersr!   z)The number of workers used for DataLoaderzInput data relatedz--data_path_and_name_and_typer   )r~   r   actionz
--key_file)r~   z--allow_variable_data_keysF)r~   r   zThe model configuration relatedz--mt_train_configzST training configuration)r~   r   z--mt_model_filezMT model parameter filez--lm_train_configzLM training configurationz	--lm_filezLM parameter filez--word_lm_train_configzWord LM training configurationz--word_lm_filezWord LM parameter filez--ngram_filezN-gram parameter filez--model_tagz[Pretrained model tag. If specify this option, *_train_config and *_file will be overwrittenzBeam-search relatedz--batch_sizezThe batch size for inferencez--nbestzOutput N-best hypothesesz--beam_sizer#   z	Beam sizez	--penaltyr    zInsertion penaltyz--maxlenratiozInput length ratio to obtain max output length. If maxlenratio=0.0 (default), it uses a end-detect function to automatically find maximum hypothesis lengths.If maxlenratio<0.0, its absolute value is interpretedas a constant max output lengthz--minlenratioz.Input length ratio to obtain min output lengthz--lm_weightr$   zRNNLM weightz--ngram_weightr%   zngram weightzText converter relatedz--token_type)charrM   NzHThe token type for ST model. If not given, refers from the training argsz
--bpemodelzLThe model path of sentencepiece. If not given, refers from the training args)r   ArgumentParserargparseArgumentDefaultsHelpFormatteradd_argumentr   r   add_argument_groupr   r   r   r   )parsergrouprE   rE   rK   
get_parserz  s   



r   c                 C   sF   t t tjd t }|| }t|}|dd  tdi | d S )N)fileconfigrE   )	printr   sysstderrr   
parse_argsvarspopr   )cmdr   argsr   rE   rE   rK   main  s   
r   __main__rp   )<r   rZ   r   pathlibr   typingr   r   r   r   r   r   numpyrv   rQ   	typeguardr	   r
   espnet2.fileio.datadir_writerr   espnet2.tasks.lmr   espnet2.tasks.mtr   espnet2.text.build_tokenizerr   espnet2.text.token_id_converterr    espnet2.torch_utils.device_funcsr   'espnet2.torch_utils.set_all_random_seedr   espnet2.utilsr   espnet2.utils.typesr   r   r   espnet.nets.batch_beam_searchr   espnet.nets.beam_searchr   r   3espnet.nets.pytorch_backend.transformer.subsamplingr   espnet.nets.scorer_interfacer    espnet.nets.scorers.length_bonusr   espnet.utils.cli_utilsr   r   r   r   r   boolr   r   r   r   rE   rE   rE   rK   <module>   s     f	


x 

	
