o
    ߥi5                     @   sx   d Z ddlZddlZddlZddlZddlZddlmZ G dd dej	j
ZG dd dej	j
ZG dd	 d	ej	jZdS )
zGbatch samplers that work with either random or sequential data samplers    N)datac                       sF   e Zd ZdZd fdd	Zedd Zdd	 Zd
d Zdd Z	  Z
S )RandomSamplera5  
    Based off of pytorch RandomSampler and DistributedSampler. Essentially a RandomSampler,
    but this class lets the user set an epoch like DistributedSampler
    Samples elements randomly. If without replacement, then sample from a shuffled dataset.
    If with replacement, then user can specify ``num_samples`` to draw.
    Arguments:
        data_source (Dataset): dataset to sample from
        num_samples (int): number of samples to draw, default=len(dataset)
        replacement (bool): samples are drawn with replacement if ``True``, default=False
    FNc                    s   t t| | || _|| _|| _d| _| jd ur!|du r!tdt| j	t
r,| j	dkr4td| j	t| jtsBtd| jd S )NFzfWith replacement=False, num_samples should not be specified, since a random permute will be performed.r   zFnum_samples should be a positive integer value, but got num_samples={}z=replacement should be a boolean value, but got replacement={})superr   __init__data_sourcereplacement_num_samplesepoch
ValueError
isinstancenum_samplesintformatbool)selfr   r   r   	__class__ b/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/mglm/data_utils/samplers.pyr   $   s(   zRandomSampler.__init__c                 C   s   | j d u r
t| jS | j S N)r	   lenr   r   r   r   r   r   8   s   

zRandomSampler.num_samplesc                 c   s    t | j}t }| jdkr|| j | jrFt| jd D ]}tj	|dtj
|d E d H  qtj	|| jd ftj
|d E d H  d S tj|| jd E d H  d S )Nr       )r   )highsizedtype	generator)r   )r   r   torch	Generatorr
   manual_seedr   ranger   randintint64tolistrandpermr   )r   ng_r   r   r   __iter__?   s,   



zRandomSampler.__iter__c                 C      | j S r   r   r   r   r   r   __len__Q      zRandomSampler.__len__c                 C   s
   || _ d S r   )r
   )r   r
   r   r   r   	set_epochT   s   
zRandomSampler.set_epoch)FN)__name__
__module____qualname____doc__r   propertyr   r)   r,   r.   __classcell__r   r   r   r   r      s    
r   c                       s:   e Zd Z		d fdd	Zdd Zdd Zd	d
 Z  ZS )DistributedSequentialSamplerr      c                    s^   t   |dkrd}d}| _|| _|| _d| _|| _ | _ fddt D | _	d S )Nr   r      c                    s   g | ]}|   qS r   r   ).0i
batch_sizer   r   r   
<listcomp>j   s    z9DistributedSequentialSampler.__init__.<locals>.<listcomp>)
r   r   r   rank
world_size
start_itertrain_itersr;   r!   
batch_bias)r   r   r@   r;   r=   r>   r   r:   r   r   Z   s   z%DistributedSequentialSampler.__init__c                 #   sD    t jjd D ]  fddjD }|}|V  q
d S )N
   c                    s   g | ]	} | j  qS r   r+   )r8   biasidxr   r   r   r<   p   s    z9DistributedSequentialSampler.__iter__.<locals>.<listcomp>)r!   r?   r@   rA   _batch)r   batchtbatchr   rD   r   r)   n   s   
z%DistributedSequentialSampler.__iter__c                 C   r*   r   )r@   r   r   r   r   r,   u   r-   z$DistributedSequentialSampler.__len__c                 C   4   | j | j | j }| j d | j | j }||| S z7extracts samples only pertaining to this worker's batchr7   r=   r;   r>   r   rG   startendr   r   r   rF   x      z#DistributedSequentialSampler._batch)r   r6   )r/   r0   r1   r   r)   r,   rF   r4   r   r   r   r   r5   X   s    r5   c                       sD   e Zd ZdZ				d fdd	Zdd	 Zdd
dZdd Z  ZS )DistributedBatchSamplera  
    similar to normal implementation of distributed sampler, except implementation is at the
    batch sampler level, instead of just the sampler level. This allows wrapping of arbitrary
    data samplers (sequential, random, WeightedRandomSampler, etc.) with this batch sampler.
    r   r6   FNc                    sj   t t| ||| |dkrJ d|| _|| _d| j_d| _|| _d| _|d u r.|| _	d S || | _	d S )Nr   Fzshould not be herer   )
r   rP   r   r=   r>   samplerwrap_around	wrap_lastr?   effective_batch_size)r   rQ   r;   	drop_lastr=   r>   rS   gradient_accumulation_stepsr   r   r   r      s    z DistributedBatchSampler.__init__c                 c   s    g }d}| j | jddD ])}|| t|| jkr6| |}|| j| j kr.|V  d| _|t|7 }g }qt|}|dkre| jse| j	r_| j j
| j8  _
|  j
t|7  _
|  j
| j;  _
| |V  | j	rs| j j
| j7  _
d S d S )Nr   F)rR   )data_iteratorrQ   appendr   r;   rF   r?   rT   rU   rS   rR   )r   rG   r9   rE   rH   	batch_lenr   r   r   r)      s.   

z DistributedBatchSampler.__iter__c                 c   sR    t |D ]!\}}|| j| j k rq|r#|  jd7  _|  j| j;  _|V  qdS )z-iterates through data and handles wrap aroundr7   N)	enumeraterR   r;   )r   _iterrR   r9   rE   r   r   r   rW      s   z%DistributedBatchSampler.data_iteratorc                 C   rI   rJ   rK   rL   r   r   r   rF      rO   zDistributedBatchSampler._batch)r   r6   FN)F)	r/   r0   r1   r2   r   r)   rW   rF   r4   r   r   r   r   rP      s    


rP   )r2   mathossysnumpynpr   torch.utilsr   rQ   Samplerr   r5   BatchSamplerrP   r   r   r   r   <module>   s   @'