o
    ib                     @   s&  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ e dedeolee
ee eeej f f  deo}ee
ee eeej f f  dede	e! de	e! de"ddfddZ#dS )    N)defaultdict)Path)DictIterableListOptionalTuple)data_parallel)
DataLoader)check_argument_types)DatadirWriter)NpyScpWriter)	to_device)ForwardAdaptor)AbsESPnetModelmodel
train_iter
valid_iter
output_dirngpulog_intervalwrite_collected_featsreturnc                 C   s  t  sJ i }t||gddgD ]\}}	|du r0ztt|d d}W n ty/   d}Y nw tdd }
td	d }td
d }t||	 }t|dD ] \}\}}t||dkr^dnd}|D ]@}|	drkqctt||| D ].\}\}}| d|v rt
|| d | }|d| }dtt|j|| d |< qtqc|dkr| jdi |}ntt| ddt||d}| D ]~\}}tt||  D ]n\}\}}| d|v r|| d | }|d| }n|d }|
|  |d7  < ||  |d d7  < ||  t|7  < |r?||	f|vr7||	 d }t|d|  || d |||	f< ||||	f |< qq|| dkrPtd|  qOW d   n	1 s\w   Y  |
D ]}tj||	 | d || |
| || d qc||	 d jddd}|dtd d |d  W d   n	1 sw   Y  ||	 d! jddd}|d|
d  W d   n	1 sw   Y  qdS )"zPerform on collect_stats mode.

    Running for deriving the shape information from data
    and gathering statistics.
    This method is used before executing train().

    trainvalidN   
   d   c                   S      dS Nr    r    r    r    T/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/main_funcs/collect_stats.py<lambda>.       zcollect_stats.<locals>.<lambda>c                   S   r   r   r    r    r    r    r!   r"   /   r#   c                   S   r   r   r    r    r    r    r!   r"   0   r#      r   cudacpu_lengths,_shapecollect_featsr    )module_kwargs   data_z.scpzNiter: z
_stats.npz)countsum
sum_square
batch_keyswzutf-8)encoding
c                 S   s   |  d S )Nr'   )endswith)xr    r    r!   r"   w   s    
stats_keys)r   zipmaxlen	TypeErrorr   r   	enumerater   r5   intjoinmapstrshaper*   r	   r   rangeitemsr&   numpyr/   r   logginginfonpsavezopenwritefilter)r   r   r   r   r   r   r   npy_scp_writersitrmodesum_dictsq_dict
count_dictdatadir_writeriiterkeysbatchnameikeydatalgvuttidseqlengthpfr    r    r!   collect_stats   s   


	":
ra   )$rE   collectionsr   pathlibr   typingr   r   r   r   r   rD   rG   torchtorch.nn.parallelr	   torch.utils.datar
   	typeguardr   espnet2.fileio.datadir_writerr   espnet2.fileio.npy_scpr    espnet2.torch_utils.device_funcsr   #espnet2.torch_utils.forward_adaptorr   espnet2.train.abs_espnet_modelr   no_gradr@   Tensorr=   boolra   r    r    r    r!   <module>   s@      