o
    i                     @   s   d dl Z d dlmZmZmZmZmZmZmZ d dl	Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ G dd deZdS )	    N)AnyDictIteratorListSequenceTupleUnion)check_argument_types)AbsIterFactory)SequenceIterFactory)
AbsSamplerc                   @   s   e Zd ZdZ								ddedeeeee  f d	eee	f d
e
dedededededefddZ	ddededeeee	 ee	ejf f  fddZdee	 dee	eej f dedejjfddZdS )ChunkIterFactorya  Creates chunks from a sequence

    Examples:
        >>> batches = [["id1"], ["id2"], ...]
        >>> batch_size = 128
        >>> chunk_length = 1000
        >>> iter_factory = ChunkIterFactory(dataset, batches, batch_size, chunk_length)
        >>> it = iter_factory.build_iter(epoch)
        >>> for ids, batch in it:
        ...     ...

    - The number of mini-batches are varied in each epochs and
      we can't get the number in advance
      because IterFactory doesn't be given to the length information.
    - Since the first reason, "num_iters_per_epoch" can't be implemented
      for this iterator. Instead of it, "num_samples_per_epoch" is implemented.

          ?   Nr   F
batch_sizebatcheschunk_lengthchunk_shift_rationum_cache_chunksnum_samples_per_epochseedshufflenum_workers
pin_memoryc              
   C   s:  t  sJ tdd |D sJ dt|||||	|
||d| _t||| _t|trt|dkr4t	dg | _
|dD ]M}zttt|d}W n t	yX   t	d	| w t|d
krft	d	| t|d
kr|  j
tt|d |d d 7  _
q<|  j
|d g7  _
q<n|g| _
|| _|| _|| _|	| _d S )Nc                 s       | ]	}t |d kV  qdS    Nlen.0x r"   X/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/iterators/chunk_iter_factory.py	<genexpr>1       z,ChunkIterFactory.__init__.<locals>.<genexpr>zbatch-size must be 1)datasetr   num_iters_per_epochr   r   r   
collate_fnr   r   z%e.g. 5,8 or 3-5: but got empty string,-ze.g. 5,8 or 3-5: but got    r   )r	   allr   per_sample_iter_factorymaxr   
isinstancestrr   
ValueErrorchunk_lengthssplitlistmapintranger   r   r   r   )selfr&   r   r   r   r   r   r   r   r   r   r(   r   r!   spsr"   r"   r#   __init__!   sD   

&
zChunkIterFactory.__init__epochreturnc              
   #   s   | j ||}|d u r| j}tj|| j }i }i }|D ](\}}t|dks2J dt| tdd |	 D s?J g }	|D ]}
|
d |v rP|	
|
 qCdd | D }|d |	D ]$}
t||
 t||	d  krtd	t||
  d
t||	d   q`t||	d    fdd| jD }t|dkrtd d  d| j  qt||d|g }|i }t| j    d }|r|d   d nd| D ]7\}||vrg ||< ||	v r||  fddt|D 7  < q||  fddt|D 7  < q|fddt|D 7 }t|| jkr?| ||||E d H \}}||< ||< q|D ]|g }|i }| ||||E d H  qJd S )Nr   zMust be per-sample-loader: c                 s   r   r   r   r   r"   r"   r#   r$   m   r%   z.ChunkIterFactory.build_iter.<locals>.<genexpr>_lengthsc                 S   s$   i | ]\}}| d s||d qS )r=   r   )endswithr    kvr"   r"   r#   
<dictcomp>u      $ z/ChunkIterFactory.build_iter.<locals>.<dictcomp>r   z$All sequences must has same length: z != c                    s   g | ]}| k r|qS r"   r"   )r    lg)Lr"   r#   
<listcomp>   s    z/ChunkIterFactory.build_iter.<locals>.<listcomp>zThe length of 'z' is z9, but it is shorter than any candidates of chunk-length: c                    s,   g | ]}|   |     qS r"   r"   r    i)SWZrA   r"   r#   rF      s   , c                       g | ]} qS r"   r"   r    _rA   r"   r#   rF          c                    rL   r"   r"   rM   )id_r"   r#   rF      rP   )r-   
build_iterr   nprandomRandomStater   r   r,   valuesappenditemsRuntimeErrorr2   loggingwarningr6   choice
setdefaultr   randintr7   r   _generate_mini_batches)r8   r;   r   per_sample_loaderstatecache_chunks_dictcache_id_list_dictidsbatchsequence_keyskeyr2   cache_id_listcache_chunksNr@   r"   )rE   rI   rJ   rK   rQ   rA   r#   rR   Z   s   


*$
zChunkIterFactory.build_iterid_listra   c                 #   s    |r$t dt| fdd| D }fddD | j t krVd    fdd| D fV   d   fdd| D }t ks-|fS )Nr   c                    s$   i | ]\} | fd dD qS )c                       g | ]} | qS r"   r"   rG   rO   r"   r#   rF          zFChunkIterFactory._generate_mini_batches.<locals>.<dictcomp>.<listcomp>r"   )r    r@   )indicesrO   r#   rB      rC   z;ChunkIterFactory._generate_mini_batches.<locals>.<dictcomp>c                    rl   r"   r"   rG   )rk   r"   r#   rF      rm   z;ChunkIterFactory._generate_mini_batches.<locals>.<listcomp>c                    s&   i | ]\}}|t |d   dqS )Nr   )torchstackr?   bsr"   r#   rB      s   & c                    s   i | ]\}}|| d  qS Nr"   r?   rq   r"   r#   rB      s    )rS   aranger   r   rX   r   )r8   rk   r   r   ra   r"   )rr   rk   rn   r#   r_      s   

	z'ChunkIterFactory._generate_mini_batches)r   r   Nr   Fr   NFrs   )__name__
__module____qualname____doc__r6   r   r   r   r   r0   floatboolr:   r   r   r   r   ro   TensorrR   rS   rT   rU   r_   r"   r"   r"   r#   r      s`    
	

<
`r   )rZ   typingr   r   r   r   r   r   r   numpyrS   ro   	typeguardr	   "espnet2.iterators.abs_iter_factoryr
   'espnet2.iterators.sequence_iter_factoryr   espnet2.samplers.abs_samplerr   r   r"   r"   r"   r#   <module>   s    $