o
    i`                     @   st   d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlZd dl	Z	d dl
mZ G dd deZG dd deZdS )    N)OrderedDict)Transformationc                   @   s^   e Zd ZdZ									dddZddd	Zd
d Zdd Zdd Zdd Z	dd Z
dS )LoadInputsAndTargetsa  Create a mini-batch from a list of dicts

    >>> batch = [('utt1',
    ...           dict(input=[dict(feat='some.ark:123',
    ...                            filetype='mat',
    ...                            name='input1',
    ...                            shape=[100, 80])],
    ...                output=[dict(tokenid='1 2 3 4',
    ...                             name='target1',
    ...                             shape=[4, 31])]]))
    >>> l = LoadInputsAndTargets()
    >>> feat, target = l(batch)

    :param: str mode: Specify the task mode, "asr" or "tts"
    :param: str preprocess_conf: The path of a json file for pre-processing
    :param: bool load_input: If False, not to load the input data
    :param: bool load_output: If False, not to load the output data
    :param: bool sort_in_input_length: Sort the mini-batch in descending order
        of the input length
    :param: bool use_speaker_embedding: Used for tts mode only
    :param: bool use_second_target: Used for tts mode only
    :param: dict preprocess_args: Set some optional arguments for preprocessing
    :param: Optional[dict] preprocess_args: Used for tts mode only
    asrNTFc
           
      C   s   i | _ |dvrtd||d ur!t|| _td| j nd | _|r0|r0|dkr0td|s4|rA|dkrA|dkrAtd || _|| _|| _	|| _
|| _|| _|d u r[i | _nt|tsfJ t|t|| _|	| _d S )N)r   ttsmtvcz$Only asr or tts are allowed: mode={}z[[Experimental feature] Some preprocessing will be done for the mini-batch creation using {}r   z>Choose one of "use_second_target" and "use_speaker_embedding "r   zO"use_second_target" and "use_speaker_embedding" is used only for tts or vc mode)_loaders
ValueErrorformatr   preprocessingloggingwarningmodeload_output
load_inputsort_in_input_lengthuse_speaker_embeddinguse_second_targetpreprocess_args
isinstancedicttypekeep_all_data_on_mem)
selfr   preprocess_confr   r   r   r   r   r   r    r   I/home/ubuntu/.local/lib/python3.10/site-packages/espnet/utils/io_utils.py__init__(   sD   


zLoadInputsAndTargets.__init__c                 C   s  t  }t  }g }|D ]\}}|| | jr:t|d D ]\}}	| j|	d |	ddd}
||	d g |
 qn:| jdkrt| jrtt|d D ]+\}}	|dkr[t	|d dkr[d	}
n| j|	d |	ddd}
||	d g |
 qH| j
r| jd
krtjtt|d d d  tjd}
||d d d g |
 t|d D ]0\}}	d|	v rtjtt|	d  tjd}
n| j|	d |	ddd}
||	d g |
 qq
| jdkr| |||\}}nJ| jdkr
|d \}}t|d d d d d }| ||||\}}n%| jd
kr| |||\}}n| jdkr*| |||\}}nt| j| jd	urR| D ]}|drP| j|| |fi | j||< q9|r]t| |fS t| S )aE  Function to load inputs and targets from list of dicts

        :param List[Tuple[str, dict]] batch: list of dict which is subset of
            loaded data.json
        :param bool return_uttid: return utterance ID information for visualization
        :return: list of input token id sequences [(L_1), (L_2), ..., (L_B)]
        :return: list of input feature sequences
            [(T_1, D), (T_2, D), ..., (T_B, D)]
        :rtype: list of float ndarray
        :return: list of target token id sequences [(L_1), (L_2), ..., (L_B)]
        :rtype: list of int ndarray

        inputfeatfiletypemat)filepathr!   namer      Nr   outputtokeniddtyper   r   shaper   )r   appendr   	enumerate_get_from_loaderget
setdefaultr   r   lenr   npfromitermapintsplitint64_create_batch_asr_create_batch_tts_create_batch_mt_create_batch_vcNotImplementedErrorr   keys
startswithr   tuplevalues)r   batchreturn_uttidx_feats_dicty_feats_dict
uttid_listuttidinfoidxinpxreturn_batch_eosx_namer   r   r   __call__]   s   


	






zLoadInputsAndTargets.__call__c                    s  t | | jrOt | td td ks)J td td ft tfddttd }tdt|D ] t fdd|}qAn
t ttd }| jrgt|fdddn|ttd krt	d
td t fd	d
D fdd
D t | }| jrȇfdd
D t | }tg dd
 t|D dd
 t|D }|fS tdd
 t|D }|fS )a  Create a OrderedDict for the mini-batch

        :param OrderedDict x_feats_dict:
            e.g. {"input1": [ndarray, ndarray, ...],
                  "input2": [ndarray, ndarray, ...]}
        :param OrderedDict y_feats_dict:
            e.g. {"target1": [ndarray, ndarray, ...],
                  "target2": [ndarray, ndarray, ...]}
        :param: List[str] uttid_list:
            Give uttid_list to sort in the same order as the mini-batch
        :return: batch, uttid_list
        :rtype: Tuple[OrderedDict, List[str]]
        r   c                    s   t  d |  dkS Nr   r0   iysr   r   <lambda>       z8LoadInputsAndTargets._create_batch_asr.<locals>.<lambda>r%   c                    s   t   |  dkS rO   rP   rQ   )nrT   r   r   rU      rV   c                    s   t  d |   S rO   rP   rQ   xsr   r   rU      s    key8Target sequences include empty tokenid (batch {} -> {}).c                       g | ]  fd dD qS )c                       g | ]} | qS r   r   .0rR   rI   r   r   
<listcomp>   rV   ELoadInputsAndTargets._create_batch_asr.<locals>.<listcomp>.<listcomp>r   r`   nonzero_sorted_idxra   r   rb          z:LoadInputsAndTargets._create_batch_asr.<locals>.<listcomp>c                    r^   r   r   r_   rD   r   r   rb      rV   c                    r]   )c                    r^   r   r   r_   yr   r   rb      rV   rc   r   rd   re   ri   r   rb      rg   c                 S      g | ]\}}||fqS r   r   r`   rM   rI   r   r   r   rb          c                 S   rk   r   r   )r`   y_namerj   r   r   r   rb      rm   c                 S   rk   r   r   rl   r   r   r   rb      rm   )listr?   r   r0   filterranger   sortedr   r   r   r<   r   zip)r   rB   rC   rD   nonzero_idxx_namesy_namesrJ   r   )rW   rf   rD   rY   rT   r   r7      sD   4"z&LoadInputsAndTargets._create_batch_asrc           	         sJ  t | d | jr3t | d ttks%J ttftfddtt}ntt}| jrGt|fddd}n|}t|tkr^t	d
tt| fdd|D  fd	d|D  t | d }| jrfd
d|D t | d }t|f|fg}| fS t|fg}| fS )zCreate a OrderedDict for the mini-batch

        :param OrderedDict x_feats_dict:
        :param OrderedDict y_feats_dict:
        :return: batch, uttid_list
        :rtype: Tuple[OrderedDict, List[str]]
        r   c                       t  |  dkS rO   rP   rQ   rS   r   r   rU         z7LoadInputsAndTargets._create_batch_mt.<locals>.<lambda>c                       t  |   S NrP   rQ   rX   r   r   rU         rZ   r\   c                    r^   r   r   r_   rX   r   r   rb   %  rV   z9LoadInputsAndTargets._create_batch_mt.<locals>.<listcomp>c                    r^   r   r   r_   rh   r   r   rb   &  rV   c                    r^   r   r   r_   rS   r   r   rb   *  rV   )ro   r?   r   r0   rp   rq   r   rr   r   r   r   r<   r   )	r   rB   rC   rD   rt   rf   rM   rn   rJ   r   )rD   rY   rT   r   r9     s2   	$z%LoadInputsAndTargets._create_batch_mtc                    sN  t | d t tfddtt}| jr%t|fddd}n|}fdd|D fdd|D  fd	dD | jrt | d ttks_J ttffd
d|D ddd}d}| jrt | d fdd|D t |	 d }| j
rt | d fdd|D t |	 d }t |	 d }	t |	 d }
t|	f|
f|f|fg}|fS | j
rt|dkrtdt|dkrd}nd}t | | fdd|D t |	 d }	t |	 | }t|	f|fg}|fS t |	 d }	t|	fg}|fS )a  Create a OrderedDict for the mini-batch

        :param OrderedDict x_feats_dict:
            e.g. {"input1": [ndarray, ndarray, ...],
                  "input2": [ndarray, ndarray, ...]}
        :param OrderedDict y_feats_dict:
            e.g. {"target1": [ndarray, ndarray, ...],
                  "target2": [ndarray, ndarray, ...]}
        :param: List[str] uttid_list:
        :param int eos:
        :return: batch, uttid_list
        :rtype: Tuple[OrderedDict, List[str]]
        r   c                    rw   rO   rP   rQ   rX   r   r   rU   C  rx   z8LoadInputsAndTargets._create_batch_tts.<locals>.<lambda>c                    ry   rz   rP   rQ   rX   r   r   rU   G  r{   rZ   c                    r^   r   r   r_   rX   r   r   rb   K  rV   z:LoadInputsAndTargets._create_batch_tts.<locals>.<listcomp>c                    r^   r   r   r_   rh   r   r   rb   L  rV   c                    s   g | ]}t | qS r   )r1   r+   )r`   rI   )rL   r   r   rb   N  rm   c                    r^   r   r   r_   rS   r   r   rb   S  rV   Nspembs_none	spcs_noner%   c                    r^   r   r   r_   spcsr   r   rb   \  rV   c                    r^   r   r   r_   spembsr   r   rb   a  rV    No speaker embedding is providedc                    r^   r   r   r_   r   r   r   rb   s  rV   )ro   r?   rp   rq   r0   r   rr   r   r   r<   r   r   
IndexError)r   rB   rC   rD   rL   rt   rf   spembs_name	spcs_namerM   rn   rJ   
spembs_idxr   )rL   r   r   rD   rY   rT   r   r8   2  sZ   $z&LoadInputsAndTargets._create_batch_ttsc                    s  t | d t tfddtt}| jr%t|fddd}n|}fdd|D fdd|D | jrt | d ttksVJ ttffd	d|D d
d
 d}d}| jrnt	d| jrt | d fdd|D t |
 d }t |
 d }t |
 d }	t|f|	f|f| fg}
|
fS | jrt|dkrtdt|dkrd}nd}t | | fdd|D t |
 d }t |
 | }t|f|fg}
|
fS t |
 d }t|fg}
|
fS )a  Create a OrderedDict for the mini-batch

        :param OrderedDict x_feats_dict:
            e.g. {"input1": [ndarray, ndarray, ...],
                  "input2": [ndarray, ndarray, ...]}
        :param OrderedDict y_feats_dict:
            e.g. {"target1": [ndarray, ndarray, ...],
                  "target2": [ndarray, ndarray, ...]}
        :param: List[str] uttid_list:
        :return: batch, uttid_list
        :rtype: Tuple[OrderedDict, List[str]]
        r   c                    rw   rO   rP   rQ   rX   r   r   rU     rx   z7LoadInputsAndTargets._create_batch_vc.<locals>.<lambda>c                    ry   rz   rP   rQ   rX   r   r   rU     r{   rZ   c                    r^   r   r   r_   rX   r   r   rb     rV   z9LoadInputsAndTargets._create_batch_vc.<locals>.<listcomp>c                    r^   r   r   r_   rh   r   r   rb     rV   c                    r^   r   r   r_   rS   r   r   rb     rV   Nr|   r}   z&Currently second target not supported.r%   c                    r^   r   r   r_   r~   r   r   rb     rV   c                    r^   r   r   r_   r   r   r   rb     rV   r   c                    r^   r   r   r_   r   r   r   rb     rV   )ro   r?   rp   rq   r0   r   rr   r   r   r
   r<   r   r   r   )r   rB   rC   rD   rt   rf   r   r   rM   rn   rJ   r   r   )r   r   rD   rY   rT   r   r:     sT   $z%LoadInputsAndTargets._create_batch_vcc                 C   s  |dkr'| dd\}}| j|}|du r!t|d}|| j|< || d S |dkrQ| dd\}}| j|}|du rIt|ddd	}|| j|< || \}}|S |d
kr{| jsctj|dd	\}}|S || jvrvtj|dd	\}}|| j|< | j| S |dkr| dd\}}| j|}|du rt	
|}|| j|< || S |dkr| jst	
|S || jvrt	
|| j|< | j| S |dv r| jst|S || jvrt|| j|< | j| S |dkr| dd\}}| j|}|du rt|}|| j|< || S td|)aq  Return ndarray

        In order to make the fds to be opened only at the first referring,
        the loader are stored in self._loaders

        >>> ndarray = loader.get_from_loader(
        ...     'some/path.h5:F01_050C0101_PED_REAL', filetype='hdf5')

        :param: str filepath:
        :param: str filetype:
        :return:
        :rtype: np.ndarray
        hdf5:r%   Nrr   z
sound.hdf5int16r(   soundnpznpy)r"   vecscpzNot supported: loader_type={})r5   r	   r.   h5pyFileSoundHDF5Filer   	soundfilereadr1   loadkaldiioload_matload_scpr;   r   )r   r#   r!   r[   loaderarrayraterK   r   r   r   r-     sd   














z%LoadInputsAndTargets._get_from_loader)	r   NTTTFFNF)F)__name__
__module____qualname____doc__r   rN   r7   r9   r8   r:   r-   r   r   r   r   r      s$    

5h>/MNr   c                   @   s   e Zd ZdZd!ddZdd Zd"d	d
Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd  ZdS )#r   aq  Collecting sound files to a HDF5 file

    >>> f = SoundHDF5File('a.flac.h5', mode='a')
    >>> array = np.random.randint(0, 100, 100, dtype=np.int16)
    >>> f['id'] = (array, 16000)
    >>> array, rate = f['id']


    :param: str filepath:
    :param: str mode:
    :param: str format: The type used when saving wav. flac, nist, htk, etc.
    :param: str dtype:

    r+Nr   c                 K   sv   || _ || _|| _tj||fi || _|d u r6tjtj|d d }|dd  }|	 t
 vr6d}|| _d S )Nr   r%   flac)r#   r   r)   r   r   fileospathsplitextupperr   available_formatsr   )r   r#   r   r   r)   kwargs
second_extr   r   r   r   A  s   
zSoundHDF5File.__init__c                 C   s   d | j| j| j | jS )Nz3<SoundHDF5 file "{}" (mode {}, format {}, type {})>)r   r#   r   r)   r   r   r   r   __repr__R  s   zSoundHDF5File.__repr__c                 K   sL   t  }|\}}tj|||| jd | jj|f|t|	 d| d S )N)r   )r*   data)
ioBytesIOr   writer   r   create_datasetr1   voidgetvalue)r   r$   r*   r   kwdsfr   r   r   r   r   r   W  s   (zSoundHDF5File.create_datasetc                 C   s   | j ||d d S )N)r   )r   )r   r$   r   r   r   r   __setitem__]  s   zSoundHDF5File.__setitem__c                 C   s8   | j | d }t| }tj|| jd\}}||fS )Nr   r(   )r   r   r   tobytesr   r   r)   )r   r[   r   r   r   r   r   r   r   __getitem__`  s   zSoundHDF5File.__getitem__c                 C   s
   | j  S rz   )r   r<   r   r   r   r   r<   f     
zSoundHDF5File.keysc                 c   s    | j D ]}| | V  qd S rz   r   r   kr   r   r   r?   i  s   
zSoundHDF5File.valuesc                 c   s     | j D ]	}|| | fV  qd S rz   r   r   r   r   r   itemsm  s   
zSoundHDF5File.itemsc                 C   
   t | jS rz   )iterr   r   r   r   r   __iter__q  r   zSoundHDF5File.__iter__c                 C   s
   || j v S rz   r   r   itemr   r   r   __contains__t  r   zSoundHDF5File.__contains__c                 C   r   rz   )r0   r   r   r   r   r   __len__w  r   zSoundHDF5File.__len__c                 C   s   | S rz   r   r   r   r   r   	__enter__z  s   zSoundHDF5File.__enter__c                 C      | j   d S rz   r   close)r   exc_typeexc_valexc_tbr   r   r   __exit__}     zSoundHDF5File.__exit__c                 C   r   rz   r   r   r   r   r   r     r   zSoundHDF5File.close)r   Nr   )NN)r   r   r   r   r   r   r   r   r   r<   r?   r   r   r   r   r   r   r   r   r   r   r   r   1  s     

r   )r   r   r   collectionsr   r   r   numpyr1   r   espnet.transform.transformationr   objectr   r   r   r   r   r   <module>   s        '