o
    i5                     @   sr   d Z ddlZddlZddlZddlZddlZddlZddlm	Z	 dd Z
dd Zedkr7eejd	d  dS dS )
z4End-to-end speech recognition model decoding script.    N)	strtoboolc                  C   s  t jdt jt jd} | jdddd | jdddd | jd	dd
d | jdtddd | jddddd | jdtdddgdd | jdtddd | jdtddd | jddtdd d | jd!tdd"d | jd#td$d%d | jd&d'd'd(gd)d* | jd+td,d- | jd.tdd/d0 | jd1tdd2d0 | jd3td$d4d | jd5tddd6gd7d | jd8dtd9d: | jd;tdd<d | jd=tdd>d | jd?td@dAd | jdBtd@dCd | jdDtd@dEd | jdFtd@dGd | jdHtdIdJdK | jdLtddMd | jdNtdOg dPdQd | jdRtddSd | jdTtd6dUd | jdVtd6dWd | jdXtdYdZd | jd[td\d]d | jd^td6d_d | jd`t	daddbdc | jddtdedfd | jdgtd$dhd | jditd$djd | jdktd$dld | jdmtd$dnd | jdotd$dpd | jdqtdrdsd | jdttd$dud | jdvtdrdwd | jdxtdydzd{d | jd|td$d}d~gdd | jdtddd | jdtddd | jdtddd | jdtddd | jdtddd | jdtddd | jdddd | jdtdddgdd | jdt
ddd | jdt
ddd | S )zGet default arguments.zNTranscribe text from speech using a speech recognition model on one CPU or GPU)descriptionconfig_file_parser_classformatter_classz--configTzConfig file path)is_config_filehelpz	--config2zBSecond config file path that overwrites the settings in `--config`z	--config3zQThird config file path that overwrites the settings in `--config` and `--config2`z--ngpur   zNumber of GPUs)typedefaultr   z--dtype)float16float32float64r   z,Float precision (only available in --api v2))choicesr	   r   z	--backendchainerpytorchzBackend library)r   r	   r   r   z--debugmode   	Debugmodez--seedzRandom seedz	--verbosez-VzVerbose optionz--batchsizez9Batch size for beam search (0: means no batch processing)z--preprocess-confNz-The configuration file for the pre-processingz--apiv1v2zBeam search APIs v1: Default API. It only supports the ASRInterface.recognize method and DefaultRNNLM. v2: Experimental API. It supports any models that implements ScorerInterface.)r	   r   r   z--recog-jsonz#Filename of recognition data (json))r   r   z--result-labelz$Filename of result label data (json))r   requiredr   z--modelzModel file parameters to readz--model-confzModel config filez--num-spkrs   z Number of speakers in the speechz
--num-encsz Number of encoders in the model.)r	   r   r   z--nbestzOutput N-best hypothesesz--beam-sizez	Beam sizez	--penaltyg        zIncertion penaltyz--maxlenratioaS  Input length ratio to obtain max output length.
                        If maxlenratio=0.0 (default), it uses a end-detect function
                        to automatically find maximum hypothesis lengths.
                        If maxlenratio<0.0, its absolute value is interpreted
                        as a constant max output lengthz--minlenratioz.Input length ratio to obtain min output lengthz--ctc-weightzCTC weight in joint decodingz--weights-ctc-decappendzPctc weight assigned to each encoder during decoding.[in multi-encoder mode only])r   actionr   z--ctc-window-margina  Use CTC window with margin parameter to accelerate
                        CTC/attention decoding especially on GPU. Smaller magin
                        makes decoding faster, but may increase search errors.
                        If margin=0 (default), this function is disabledz--search-typer	   )r	   nsctsdalsdmaesa5  Type of beam search implementation to use during inference.
        Can be either: default beam search ("default"),
        N-Step Constrained beam search ("nsc"), Time-Synchronous Decoding ("tsd"),
        Alignment-Length Synchronous Decoding ("alsd") or
        modified Adaptive Expansion Search ("maes").z--nstepzpNumber of expansion steps allowed in NSC beam search or mAES
        (nstep > 0 for NSC and nstep > 1 for mAES).z--prefix-alphaz<Length prefix difference allowed in NSC beam search or mAES.z--max-sym-expz+Number of symbol expansions allowed in TSD.z--u-maxi  z)Length prefix difference allowed in ALSD.z--expansion-gammagffffff@z:Allowed logp difference for prune-by-value method in mAES.z--expansion-betazZNumber of additional candidates for expanded hypotheses
                selection in mAES.z--score-norm?z+Normalize final hypotheses' score by length)r   nargsr	   r   z--softmax-temperatureg      ?z'Penalization term for softmax function.z--rnnlmzRNNLM model file to readz--rnnlm-confzRNNLM model config file to readz--word-rnnlmzWord RNNLM model file to readz--word-rnnlm-confz$Word RNNLM model config file to readz--word-dictzWord list to readz--lm-weightg?zRNNLM weightz--ngram-modelzngram model file to readz--ngram-weightzngram weightz--ngram-scorerpart)fullr   a  if the ngram is set as a part scorer, similar with CTC scorer,
                ngram scorer only scores topK hypethesis.
                if the ngram is set as full scorer, ngram scorer scores all hypthesis
                the decoding speed of part scorer is musch faster than full onez--streaming-modewindowsegmentzrUse streaming recognizer for inference.
                        `--batchsize` must be set to 0 to enable this modez--streaming-window
   zWindow sizez--streaming-min-blank-durz Minimum blank duration thresholdz--streaming-onset-marginzOnset marginz--streaming-offset-marginzOffset marginz--maskctc-n-iterationszINumber of decoding iterations.For Mask CTC, set 0 to predict 1 mask/iter.z--maskctc-probability-thresholdg+?z$Threshold probability for CTC outputz--quantize-config*a  Config for dynamic quantization provided as a list of modules,
        separated by a comma. E.g.: --quantize-config=[Linear,LSTM,GRU].
        Each specified module should be an attribute of 'torch.nn', e.g.:
        torch.nn.Linear, torch.nn.LSTM, torch.nn.GRU, ...)r   r   z--quantize-dtypeqint8r
   zDtype for dynamic quantization.z--quantize-asr-modelFz(Apply dynamic quantization to ASR model.z--quantize-lm-modelz!Apply dynamic quantization to LM.)configargparseArgumentParserYAMLConfigFileParserArgumentDefaultsHelpFormatteraddadd_argumentintstrfloatr   bool)parser r0   H/home/ubuntu/.local/lib/python3.10/site-packages/espnet/bin/asr_recog.py
get_parser   s  


r2   c                 C   s  t  }|| } | jdkr| jdkrtd| j d| jdkr)tjtjdd n| jdkr7tjtj	dd ntjtj
dd td	 | jdkr|tjd
}|du rYtd n| jt|dkrmtd td | jdkr|td td tdtjdd  t| j tj| j td| j  | jdur| jdurtd td td| j  | jdkr'| jdkrddlm} ||  dS | jdkr#| jdkr| jdkrddl m!} ||  dS ddl"m} | jdkrt#d| j d||  dS | jdkrt#d| j dddl"m} ||  dS td | jdkrC| jdkr?ddl$m} ||  dS td!dS )"zRun the main decoding function.r   r
   z--dtype z" does not support the CPU backend.r   z>%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s)levelformatr   zSkip DEBUG/INFO messagesCUDA_VISIBLE_DEVICESNz CUDA_VISIBLE_DEVICES is not set.,z/#gpus is not matched with CUDA_VISIBLE_DEVICES.z!The program only supports ngpu=1.zpython path = 
PYTHONPATHz(None)zset random seed = %dzTIt seems that both --rnnlm and --word-rnnlm are specified. Please use either option.z
backend = r   )recogr   r   )recog_v2r   z	`--dtype z#` is only available with `--api v2`z--num-encs z! > 1 is not supported in --api v2z'Only chainer and pytorch are supported.zOnly pytorch is supported.)%r2   
parse_argsngpudtype
ValueErrorverboseloggingbasicConfigINFODEBUGWARNwarningosenvirongetlenspliterrorsysexitinforandomseednprnnlm
word_rnnlmbackend	num_spkrsespnet.asr.chainer_backend.asrr8   num_encsapi espnet.asr.pytorch_backend.recogr9   espnet.asr.pytorch_backend.asrNotImplementedError"espnet.asr.pytorch_backend.asr_mix)argsr/   cvdr8   r9   r0   r0   r1   main8  s   












r^   __main__r   )__doc__r?   rE   rN   rK   r%   numpyrP   espnet.utils.cli_utilsr   r2   r^   __name__argvr0   r0   r0   r1   <module>   s     $b