o
    i                     @   s   d dl mZmZmZmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ ed	d
ddddZ							d%dedededeeedf ee f dededededee d ed!ed"e	fd#d$ZdS )&    )ListSequenceTupleUnion)check_argument_typescheck_return_type)
AbsSampler)FoldedBatchSampler)LengthBatchSampler)NumElementsBatchSampler)SortedBatchSampler)UnsortedBatchSamplera  UnsortedBatchSampler has nothing in particular feature and just creates mini-batches which has constant batch_size. This sampler doesn't require any length information for each feature. 'key_file' is just a text file which describes each sample name.

    utterance_id_a
    utterance_id_b
    utterance_id_c

The fist column is referred, so 'shape file' can be used, too.

    utterance_id_a 100,80
    utterance_id_b 400,80
    utterance_id_c 512,80
a  SortedBatchSampler sorts samples by the length of the first input  in order to make each sample in a mini-batch has close length. This sampler requires a text file which describes the length for each sample 

    utterance_id_a 1000
    utterance_id_b 1453
    utterance_id_c 1241

The first element of feature dimensions is referred, so 'shape_file' can be also used.

    utterance_id_a 1000,80
    utterance_id_b 1453,80
    utterance_id_c 1241,80
a  FoldedBatchSampler supports variable batch_size. The batch_size is decided by
    batch_size = base_batch_size // (L // fold_length)
L is referred to the largest length of samples in the mini-batch. This samples requires length information as same as SortedBatchSampler
a  LengthBatchSampler supports variable batch_size. This sampler makes mini-batches which have same number of 'bins' as possible counting by the total lengths of each feature in the mini-batch. This sampler requires a text file which describes the length for each sample. 

    utterance_id_a 1000
    utterance_id_b 1453
    utterance_id_c 1241

The first element of feature dimensions is referred, so 'shape_file' can be also used.

    utterance_id_a 1000,80
    utterance_id_b 1453,80
    utterance_id_c 1241,80
a  NumElementsBatchSampler supports variable batch_size. Just like LengthBatchSampler, this sampler makes mini-batches which have same number of 'bins' as possible counting by the total number of elements of each feature instead of the length. Thus this sampler requires the full information of the dimension of the features. 

    utterance_id_a 1000,80
    utterance_id_b 1453,80
    utterance_id_c 1241,80
)unsortedsortedfoldedlengthnumel
descending	ascendingF    TNtype
batch_size
batch_binsshape_files.sort_in_batch
sort_batch	drop_lastmin_batch_sizefold_lengthspaddingutt2category_filereturnc              
   C   s  t  sJ t|dkrtd| dkrt||d |d}n^| dkr-t||d |||d}nN| dkrTt|t|krGtdt| d	t| t||||||||
d
}n'| dkrdt||||||	|d}n| dkrtt||||||	|d}ntd|  t|sJ |S )a  Helper function to instantiate BatchSampler.

    Args:
        type: mini-batch type. "unsorted", "sorted", "folded", "numel", or, "length"
        batch_size: The mini-batch size. Used for "unsorted", "sorted", "folded" mode
        batch_bins: Used for "numel" model
        shape_files: Text files describing the length and dimension
            of each features. e.g. uttA 1330,80
        sort_in_batch:
        sort_batch:
        drop_last:
        min_batch_size:  Used for "numel" or "folded" mode
        fold_lengths: Used for "folded" mode
        padding: Whether sequences are input as a padded tensor or not.
            used for "numel" mode
    r   zNo shape file are givenr   )r   key_filer   r   )r   
shape_filer   r   r   r   zGThe number of fold_lengths must be equal to the number of shape_files: z != )r   r   r   r   r   r   r   r!   r   )r   r   r   r   r   r    r   r   zNot supported: )	r   len
ValueErrorr   r   r	   r   r
   r   )r   r   r   r   r   r   r   r   r   r    r!   retvalr   r   X/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/samplers/build_batch_sampler.pybuild_batch_samplerH   sr   


r)   )r   r   Fr   r   TN)typingr   r   r   r   	typeguardr   r   espnet2.samplers.abs_samplerr   %espnet2.samplers.folded_batch_samplerr	   %espnet2.samplers.length_batch_samplerr
   +espnet2.samplers.num_elements_batch_samplerr   %espnet2.samplers.sorted_batch_samplerr   'espnet2.samplers.unsorted_batch_samplerr   dictBATCH_TYPESstrintboolr)   r   r   r   r(   <module>   s^    A	
