o
    }oi-                     @   sL   d dl mZ d dlZd dlmZ d dlmZ d dlmZ G dd deZ	dS )    )OptionalN)Dataset) MegatronPretrainingRandomSampler)RandomSeedDatasetc                       sb   e Zd Z			ddededededed	ed
ededee dee ddf fddZdd Z  Z	S )&MegatronVisionPretrainingRandomSamplerTNFdatasettotal_samplesconsumed_samplesmicro_batch_sizedata_parallel_rankdata_parallel_sizedata_sharding	drop_lastglobal_batch_size pad_samples_to_global_batch_sizereturnc              
      s,   t  j|||||||	|
d || _|| _d S )N)r   r	   r
   r   r   r   r   r   )super__init__r   r   )selfr   r   r	   r
   r   r   r   r   r   r   	__class__ g/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/vision/data/megatron/data_samplers.pyr      s   

z/MegatronVisionPretrainingRandomSampler.__init__c                 #   s|   | j | j }| j| | _| j| }|| j dksJ t| jtr(| j| j | j	r_| j | j | j
 }|| j }| j|  t }|| j tj||d } fdd||d  D }n-| j | j | j }|}	t }|| j tj||d }
|
|	d  }|| jd | j }g }|D ]}|| t|| j
kr|  j| j7  _|V  g }qt|dkr| js|V  d S d S d S )Nr   )	generatorc                    s   g | ]} | qS r   r   ).0x	start_idxr   r   
<listcomp>D   s    zCMegatronVisionPretrainingRandomSampler.__iter__.<locals>.<listcomp>)r   last_batch_sizer	   epoch$micro_batch_times_data_parallel_size
isinstancer   r   	set_epochr   r
   r   r   torch	Generatormanual_seedrandpermtolistappendlenr   )r   active_total_samplescurrent_epoch_samplesbucket_sizebucket_offsetg
random_idx	idx_rangefull_bucket_sizefull_bucket_offsetidx_range_totalidx_range_activebatchidxr   r   r   __iter__2   sF   





z/MegatronVisionPretrainingRandomSampler.__iter__)TNF)
__name__
__module____qualname__r   intboolr   r   r8   __classcell__r   r   r   r   r      s8    
	
r   )
typingr   r$   torch.utils.datar   Bnemo.collections.nlp.data.language_modeling.megatron.data_samplersr   1nemo.collections.vision.data.megatron.vit_datasetr   r   r   r   r   r   <module>   s   