o
    i                     @   s\  d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
Zd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZm Z m!Z! d dl"m#Z# de$de%de$de%de%de%de	e%e$f deee$e$e$f  dee$ dee$ dee$ dee& de'fddZ(dd Z)d"dd Z*e+d!kre*  dS dS )#    N)Path)OptionalSequenceTupleUnion)data_parallel)check_argument_types)DatadirWriter)LMTask)	to_device)ForwardAdaptor)set_all_random_seed)config_argparse)float_or_nonestr2boolstr2triple_strstr_or_none)get_commandline_args
output_dir
batch_sizedtypengpuseednum_workers	log_leveldata_path_and_name_and_typekey_filetrain_config
model_filelog_baseallow_variable_data_keysc           !      C   sv  t  sJ tj|dd |dkrd}nd}t| t|	|
|\}}t|d}|jtt	|d
  td|  tj|||||t|d	t|d	|d
d	}t| [}d}d}|D ]\}}t|tskJ t|tdd |D sxJ |ttt| }t||ksJ t| d| t	 ' t||}|dkr|di |\}}nt|dt||d\}}W d    n1 sw   Y  |t|  krt|ksn J |t|t|f|   d}|   }|| 7 }|| 7 }t |||D ].\}}}|d u rt!"|| }n||| t!#|  }t$||d |< t$||d |< qq\|d u rBt!"|| }n||| t!#|  }t%| d j&ddd}|'| d W d    n	1 slw   Y  t%| d j&ddd}|d u rt!j(} n|} |'|  d W d    n	1 sw   Y  td|  W d    d S 1 sw   Y  d S )Nz>%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s)levelformat   cudacpunll)r   zModel:
FT)r   r   r   r   preprocess_fn
collate_fnr    	inferenceg        r   c                 s   s    | ]}t |tV  qd S N)
isinstancestr).0s r/   R/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/bin/lm_calc_perplexity.py	<genexpr>N   s    z"calc_perplexity.<locals>.<genexpr>z != r/   )module_kwargsutt2pplutt2ntokenspplwzutf-8)encoding
basezPPL=))r   loggingbasicConfigr   r
   build_model_from_filer   togetattrtorchevalinfobuild_streaming_iteratorbuild_preprocess_fnbuild_collate_fnr	   r+   dicttypealllennextitervaluesno_gradr   r   rangedetachr%   numpysumzipnpexplogr,   r   openwritee)!r   r   r   r   r   r   r   r   r   r   r   r   r    devicemodel
train_argswrapped_modelloaderwriter	total_nlltotal_ntokenskeysbatch_bsr&   lengthskey_nllntokenutt_pplr5   f	_log_baser/   r/   r0   calc_perplexity   s   



"


4



$rj   c                  C   s  t jdtjd} | jddd dddd	 | jd
tdd | jdtddd | jdtddd | jddg ddd | jdtddd | jdtddd | jdtd dd | d}|jd t	dd!d" |jd#t
d$ |jd%td&d' | d(}|jd)td$ |jd*td$ | S )+NzCalc perplexity)descriptionformatter_classz--log_levelc                 S   s   |   S r*   )upper)xr/   r/   r0   <lambda>   s    zget_parser.<locals>.<lambda>INFO)CRITICALERRORWARNINGrp   DEBUGNOTSETzThe verbose level of logging)rF   defaultchoiceshelpz--output_dirT)rF   requiredz--ngpur   z(The number of gpus. 0 indicates CPU mode)rF   rv   rx   z--seedzRandom seedz--dtypefloat32)float16rz   float64z	Data type)rv   rw   rx   z--num_workersr#   z)The number of workers used for DataLoaderz--batch_sizezThe batch size for inferencez
--log_basezIThe base of logarithm for Perplexity. If None, napier's constant is used.zInput data relatedz--data_path_and_name_and_typeappend)rF   ry   actionz
--key_file)rF   z--allow_variable_data_keysF)rF   rv   zThe model configuration relatedz--train_configz--model_file)r   ArgumentParserargparseArgumentDefaultsHelpFormatteradd_argumentr,   intr   add_argument_groupr   r   r   )parsergroupr/   r/   r0   
get_parser   sp   

r   c                 C   sF   t t tjd t }|| }t|}|dd  tdi | d S )N)fileconfigr/   )	printr   sysstderrr   
parse_argsvarspoprj   )cmdr   argskwargsr/   r/   r0   main   s   
r   __main__r*   ),r   r:   r   pathlibr   typingr   r   r   r   rO   rR   r?   torch.nn.parallelr   	typeguardr   espnet2.fileio.datadir_writerr	   espnet2.tasks.lmr
    espnet2.torch_utils.device_funcsr   #espnet2.torch_utils.forward_adaptorr   'espnet2.torch_utils.set_all_random_seedr   espnet2.utilsr   espnet2.utils.typesr   r   r   r   espnet.utils.cli_utilsr   r,   r   floatboolrj   r   r   __name__r/   r/   r/   r0   <module>   sb   
	

h
C	
