o
    i                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlm	Z	 d dlm
Z
mZ d dlmZ d dlZd dlmZ edddd Zd dl Z d d	lmZm	Z	 d dlZd dlZG d
d de	ZdS )    N)DistributedSampler)BatchSamplerSampler)tablesbatch_sampler_classesEspnetStyleBatchSamplerc                 K   s@   i }t | fi |}||d< |dd|d< |dd|d< |S )Nbatch_samplernum_workers   
pin_memoryT)r   get)datasetkwargsdataloader_argsr    r   b/home/ubuntu/.local/lib/python3.10/site-packages/funasr/datasets/audio_datasets/espnet_samplers.pyEspnetStyleBatchSampler_fn   s   r   )Datasetr   c                   @   sN   e Zd Z									ddeded	efd
dZdd Zdd Zdd ZdS )r   tokenNFT   r   is_training	sort_size
start_stepc                 K   s   z
t  }t  }W n   d}d}Y || _|| _|| _|| _|| _|	| _|o(|	| _	|| _
t| j| _tt| j| j | _d| _|
| | _|dd| _|dd| _|dd| _|| _d| _| jdkrrtd| j  d S d S )	Nr      max_token_lengthi   min_token_lengthlength_scale_sourceg      ?z5Warning, start_step > 0, dataloader start from step: )distget_rankget_world_sizeranknum_replicasr   
batch_size
batch_typer   shuffle	drop_lastlen
total_sizeintmathceilnum_samplesepochr   r   r   r   r   r   	batch_numlogginginfo)selfr   r"   r#   r    r!   
rank_splitr$   r%   r   r   r   r   r   r   r   __init__    s4   


z EspnetStyleBatchSampler.__init__c                    s   j r t }| j t j tjt j	|d
 }n	ttt j	}t| fddd}g }g }d}|D ]K} jdkrEd}n jdkr[ j	|t j	|d	  }n j	|}t||t|d  }	|	 jkr||| t||}q;|| |g}|}q;|r jrt||  jkr||  j rt j t | ttt| j }
|
 j }|t| }|tj||d
7 } j|
 }||
 }|| j | }t| _td j d j d||  dt|  t|S )N)	generatorc                    s    j | S N)r   get_source_len)idxr0   r   r   <lambda>[   s    z2EspnetStyleBatchSampler.__iter__.<locals>.<lambda>)keyr   exampler   r   g333333?)kzrank: z, dataloader start from step: z, batch_num: z, batch_num_after_step: ) r$   torch	Generatormanual_seedr,   randomseedrandpermr&   r   tolistlistrangesortedr#   r5   r(   get_target_lenmaxr"   appendr%   r)   r*   r!   choicesr    r   r-   r.   r/   iter)r0   gindicessorted_indicesbuffer_batchesbatchmax_len_in_batchr6   sample_lengthpotential_batch_lengthbatches_per_ranktotal_batches_neededextra_batches	start_idxend_idxrank_batchesr   r7   r   __iter__Q   sT   









&z EspnetStyleBatchSampler.__iter__c                 C   s   | j S r4   )r-   r7   r   r   r   __len__   s   zEspnetStyleBatchSampler.__len__c                 C   s
   || _ d S r4   )r,   )r0   r,   r   r   r   	set_epoch   s   
z!EspnetStyleBatchSampler.set_epoch)	r   NNFTFTr   r   )	__name__
__module____qualname__boolr(   r2   rY   rZ   r[   r   r   r   r   r      s(    

1N)r<   numpynpr.   r)   torch.distributeddistributedr   torch.utils.datar   r   r   r?   funasr.registerr   registerr   r   r   r   r   r   r   <module>   s"    

