o
    ix:                     @   s&  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
mZmZmZmZmZmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZmZ d dlmZ d dlmZm Z  d dl!m"Z"m#Z# d d	l$m%Z% d d
l&m'Z' d dl(m)Z) G dd de jjZ*G dd dZ+d0ddZ,d1de-fddZ.dd Z/e0e,dgdde0e.dgdde0eg dde0ej1e"ddg dde0ej1e"ddg d de0ej1e"d!dg d"de0ej1e"d#dg d$de0e#g d%de0e+g d&de0eg d'de0e/d(gd)de0e%g d*dd+Z2G d,d- d-eeZ3G d.d/ d/e3Z4dS )2    N)ABCabstractmethod)AnyCallable
CollectionDictMappingTupleUnion)Dataset)check_argument_typescheck_return_type)NpyScpReader)FloatRandomGenerateDatasetIntRandomGenerateDataset)load_num_sequence_textread_2column_text)
RttmReader)SoundScpReader)	SizedDictc                   @   sB   e Zd ZdddZdd Zdd Zdd	 Zd
edej	fddZ
dS )AdapterForSoundScpReaderNc                 C   s    t  sJ || _|| _d | _d S N)r   loaderdtyperate)selfr   r    r   I/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/train/dataset.py__init__   s   

z!AdapterForSoundScpReader.__init__c                 C   s
   | j  S r   )r   keysr   r   r   r   r   $      
zAdapterForSoundScpReader.keysc                 C   
   t | jS r   )lenr   r    r   r   r   __len__'   r!   z AdapterForSoundScpReader.__len__c                 C   r"   r   )iterr   r    r   r   r   __iter__*   r!   z!AdapterForSoundScpReader.__iter__keyreturnc                 C   s6  | j | }t|trtt|dksJ t|t|d tr*t|d tjr*|\}}n&t|d tr>t|d tjr>|\}}ntdt|d  dt|d  | j	d ure| j	|kretd| j	 d| || _	| j
d urs|| j
}nt|tjsJ t||}| j
d ur|| j
}t|tjsJ t||S )N   r      zUnexpected type: z, zSampling rates are mismatched: z != )r   
isinstancetupler#   intnpndarrayRuntimeErrortyper   r   astype)r   r'   retvalr   arrayr   r   r   __getitem__-   s0   





z$AdapterForSoundScpReader.__getitem__r   )__name__
__module____qualname__r   r   r$   r&   strr.   r/   r5   r   r   r   r   r      s    
r   c                   @   sN   e Zd ZdefddZdefddZdefddZd	d
 Zde	j
fddZdS )H5FileWrapperpathc                 C   s   || _ t|d| _d S )Nr)r;   h5pyFileh5_file)r   r;   r   r   r   r   S   s   zH5FileWrapper.__init__r(   c                 C   r"   r   )r9   r?   r    r   r   r   __repr__W   r!   zH5FileWrapper.__repr__c                 C   r"   r   )r#   r?   r    r   r   r   r$   Z   r!   zH5FileWrapper.__len__c                 C   r"   r   )r%   r?   r    r   r   r   r&   ]   r!   zH5FileWrapper.__iter__c                 C   s   | j | }|d S )Nr   )r?   )r   r'   valuer   r   r   r5   `   s   
zH5FileWrapper.__getitem__N)r6   r7   r8   r9   r   r@   r-   r$   r&   r.   r/   r5   r   r   r   r   r:   R   s    r:   c                 C   s   t | ddd}t||S )NTF)	normalize	always_2d)r   r   )r;   float_dtyper   r   r   r   sound_loadere   s   
rE   max_cache_fdc                 C   s   t j| |d}t||S )N)rF   )kaldiioload_scpr   )r;   rD   rF   r   r   r   r   kaldi_loadert   s   
rI   c                 C   sN   zt t|tdd  d\}}W n ty    td| w t| ||S )N	rand_int__ze.g rand_int_3_10: but got )mapr-   r#   split
ValueErrorr0   r   )filepathloader_typelowhighr   r   r   rand_int_loadery   s   $rS   rD   zuAudio format types which supported by sndfile wav, flac, etc.

   utterance_id_a a.wav
   utterance_id_b b.wav
   ...)funckwargshelpzlKaldi-ark file type.

   utterance_id_A /some/where/a.ark:123
   utterance_id_B /some/where/a.ark:456
   ...z`Npy file format.

   utterance_id_A /some/where/a.npy
   utterance_id_B /some/where/b.npy
   ...text_int)rP   zA text file in which is written a sequence of interger numbers separated by space.

   utterance_id_A 12 0 1 3
   utterance_id_B 3 3 1
   ...csv_intzA text file in which is written a sequence of interger numbers separated by comma.

   utterance_id_A 100,80
   utterance_id_B 143,80
   ...
text_floatzA text file in which is written a sequence of float numbers separated by space.

   utterance_id_A 12. 3.1 3.4 4.4
   utterance_id_B 3. 3.12 1.1
   ...	csv_floatzA text file in which is written a sequence of float numbers separated by comma.

   utterance_id_A 12.,3.1,3.4,4.4
   utterance_id_B 3.,3.12,1.1
   ...zReturn text as is. The text must be converted to ndarray by 'preprocess'.

   utterance_id_A hello world
   utterance_id_B foo bar
   ...zA HDF5 file which contains arrays at the first level or the second level.   >>> f = h5py.File('file.h5')
   >>> array1 = f['utterance_id_A']
   >>> array2 = f['utterance_id_B']
zzGenerate random float-ndarray which has the given shapes in the file.

   utterance_id_A 3,4
   utterance_id_B 10,4
   ...rP   ze.g. 'rand_int_0_10'. Generate random int-ndarray which has the given shapes in the path. Give the lower and upper value by the file type. e.g. rand_int_0_10 -> Generate integers from 0 to 10.

   utterance_id_A 3,4
   utterance_id_B 10,4
   ...a  rttm file loader, currently support for speaker diarization

    SPEAKER file1 1 0 1023 <NA> <NA> spk1 <NA>    SPEAKER file1 2 4000 3023 <NA> <NA> spk2 <NA>    SPEAKER file1 3 500 4023 <NA> <NA> spk1 <NA>    END     file1 <NA> 4023 <NA> <NA> <NA> <NA>   ...)sound	kaldi_arknpyrW   rX   rY   rZ   texthdf5
rand_floatzrand_int_\d+_\d+rttmc                   @   s\   e Zd ZedefddZedeedf fddZedee	e
eejf f fddZd	S )

AbsDatasetr(   c                 C      t r   NotImplementedErrorr   namer   r   r   has_name      zAbsDataset.has_name.c                 C   rc   r   rd   r    r   r   r   names  ri   zAbsDataset.namesc                 C   rc   r   rd   )r   uidr   r   r   r5     ri   zAbsDataset.__getitem__N)r6   r7   r8   r   boolrh   r	   r9   rj   r   r   r.   r/   r5   r   r   r   r   rb      s    &rb   c                   @   s  e Zd ZdZ					d deeeeef  deeeee	j
f geee	j
f f d	ed
edeeeef defddZdededeeee	j
ejeejf f fddZdefddZdeedf fddZdd Zdd Zdeeef deeeee	j
f f fddZdS )!ESPnetDatasetaO  Pytorch Dataset class for ESPNet.

    Examples:
        >>> dataset = ESPnetDataset([('wav.scp', 'input', 'sound'),
        ...                          ('token_int', 'output', 'text_int')],
        ...                         )
        ... uttid, data = dataset['uttid']
        {'input': per_utt_array, 'output': per_utt_array}
    Nfloat32long        r   path_name_type_list
preprocessrD   	int_dtypemax_cache_sizerF   c                 C   s   t  sJ t|dkrtdt|}|| _|| _|| _|| _i | _	i | _
|D ]4\}}}	|| j	v r:td| d| ||	}
|
| j	|< ||	f| j
|< t| j	| dkr\t| dq(t|trgt|}|| _|dkrvtdd| _d S d | _d S )Nr   z91 or more elements are required for "path_name_type_list""z" is duplicated for data-keyz has no samplesT)shared)r   r#   rN   copydeepcopyrr   rD   rs   rF   loader_dict
debug_infor0   _build_loaderr+   r9   humanfriendly
parse_sizert   r   cache)r   rq   rr   rD   rs   rt   rF   r;   rg   _typer   r   r   r   r     s6   






zESPnetDataset.__init__r;   rP   r(   c           	      C   s   t  D ]q\}}t||rui }|d D ]/}|dkr||d< q|dkr)| j|d< q|dkr3| j|d< q|dkr=| j|d< qtd| |d }z||fi |W   S  tyt   t	|drc|j
}nt|}td	| d
| d  w qtd| )zHelper function to instantiate Loader.

        Args:
            path:  The file path
            loader_type:  loader_type. sound, npy, text_int, text_float, etc
        rU   rP   rD   rs   rF   z"Not implemented keyword argument: rT   r6   zAn error happened with ()zNot supported: loader_type=)
DATA_TYPESitemsrematchrD   rs   rF   r0   	Exceptionhasattrr6   r9   loggingerror)	r   r;   rP   r'   dicrU   key2rT   rg   r   r   r   r{   C  s2   	

zESPnetDataset._build_loaderc                 C   s
   || j v S r   )ry   rf   r   r   r   rh   j  r!   zESPnetDataset.has_name.c                 C   r"   r   )r,   ry   r    r   r   r   rj   m  r!   zESPnetDataset.namesc                 C   s   t tt | j S r   )r%   nextry   valuesr    r   r   r   r&   p  s   zESPnetDataset.__iter__c              	   C   sZ   | j j}|d7 }| j D ]\}\}}|d| d| d| d7 }q|d| j d7 }|S )Nr   z
  z: {"path": "z", "type": "z"}z
  preprocess: r   )	__class__r6   rz   r   rr   )r   _mesrg   r;   r   r   r   r   r@   s  s   zESPnetDataset.__repr__rk   c           
      C   s  t  sJ t|trtt| j }t|| }| jd ur,|| jv r,| j| }||fS i }| j	 D ]d\}}z'|| }t|tt
frHt|}t|tjtjttjfs]tdt| W n ty{   | j| \}}td| d| d|   w t|tjr| }nt|tjrt|g}|||< q3| jd ur| ||}|D ]?}|| }t|tjstd| dt| d|jjdkr|| j}n|jjd	kr|| j }nt!d
|j |||< q| jd ur| jj"| j#k r|| j|< ||f}	t$|	sJ |	S )Nz.Must be ndarray, torch.Tensor, str or Number: zError happened with path=z, type=z, id=zIAll values must be converted to np.ndarray object by preprocessing, but "z" is still .fizNot supported dtype: )%r   r+   r-   r   r%   ry   r   listr~   r   r,   r.   r4   r/   torchTensorr9   numbersNumber	TypeErrorr1   r   rz   r   r   numpyrr   r0   r   kindr2   rD   rs   re   sizert   r   )
r   rk   ddatarg   r   rA   r;   r   r3   r   r   r   r5   {  sn   








zESPnetDataset.__getitem__)Nrn   ro   rp   r   )r6   r7   r8   __doc__r   r	   r9   r   r   r.   r/   r
   floatr-   r   r   r   r   r   r   r{   rl   rh   rj   r&   r@   r5   r   r   r   r   rm   
  sD    	
.
'0rm   r   )Nr   )5collectionsrw   	functoolsr   r   r   abcr   r   typingr   r   r   r   r   r	   r
   r=   r|   rG   r   r.   r   torch.utils.data.datasetr   	typeguardr   r   espnet2.fileio.npy_scpr   espnet2.fileio.rand_gen_datasetr   r   espnet2.fileio.read_textr   r   espnet2.fileio.rttmr   espnet2.fileio.sound_scpr   espnet2.utils.sized_dictr   r   r:   rE   r-   rI   rS   dictpartialr   rb   rm   r   r   r   r   <module>   s    $5

			





z