o
    Xεi\                     @   s\   d dl Zd dlZd dlmZ G dd deZG dd dejjjZ	G dd dejjjZ
dS )	    N)DistributedSamplerc                	       s`   e Zd ZdZ				ddedededef fd	d
Zdd Z fddZdd Z	dd Z
  ZS )DistributedSamplerWrappera  Wrapper over Sampler for distributed training. It allows you to use any sampler in distributed mode.
    It is especially useful in conjunction with torch.nn.parallel.DistributedDataParallel. In such a case, each
    process can pass a torch.utils.data.DistributedSampler instance as a torch.utils.data.DataLoader sampler,
    and load a subset of the original dataset that is exclusive to it.

    .. note:
        Dataset is assumed to be of constant size.

    Args:
        sampler: Sampler used for subsampling.
        num_replicas (int, optional): Number of processes participating in distributed training. By default,
            world_size is retrieved from the current distributed group.
        rank (int, optional): Rank of the current process within num_replicas. By default, rank is retrieved
            from the current distributed group.
        shuffle (bool, optional): If True, sampler will shuffle the indices. Default: True.
        seed (int, optional): random seed used to shuffle the sampler if shuffle=True. This number should be
            identical across all processes in the distributed group. Default: 0.

    Reference: https://github.com/pytorch/pytorch/issues/23430

    NTr   num_replicasrankshuffleseedc                    s   t  j|||||d d S )N)r   r   r   r   )super__init__)selfsamplerr   r   r   r   	__class__ A/home/ubuntu/.local/lib/python3.10/site-packages/trainer/torch.pyr	      s   
z"DistributedSamplerWrapper.__init__c                 C   s   t | jd | j }||d | jt|  7 }t|| jks*J t| d| j | j| j }|||| j  }t|| jksLJ t| d| j t|S )Nz != )listdataset
total_sizelennum_samplesr   iter)r
   indicesoffsetr   r   r   __iter__-   s   &&z"DistributedSamplerWrapper.__iter__c                    sT   t  | t| jdr| j| d S t| jdr(t | j| | j_d S d S )N	set_epoch	generator)	r   r   hasattrr   torch	Generatormanual_seedr   r   )r
   epochr   r   r   r   ;   s   z#DistributedSamplerWrapper.set_epochc                 C   s
   | j  S N)r   
state_dictr
   r   r   r   r!   B   s   
z$DistributedSamplerWrapper.state_dictc                 C   s   | j | d S r    )r   load_state_dict)r
   r!   r   r   r   r#   E   s   z)DistributedSamplerWrapper.load_state_dict)NNTr   )__name__
__module____qualname____doc__intboolr	   r   r   r!   r#   __classcell__r   r   r   r   r      s&    r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )NoamLR皙?c                    s   t || _t || d S r    )floatwarmup_stepsr   r	   )r
   	optimizerr/   
last_epochr   r   r   r	   K   s   
zNoamLR.__init__c                    s"   t  jd fdd jD S )N   c                    s2   g | ]}| j d   t j d  d  qS )g      ?g      g      )r/   min).0base_lrr
   stepr   r   
<listcomp>Q   s    $z!NoamLR.get_lr.<locals>.<listcomp>)maxr1   base_lrsr"   r   r6   r   get_lrO   s   zNoamLR.get_lr)r,   r-   )r$   r%   r&   r	   r;   r*   r   r   r   r   r+   J   s    r+   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )StepwiseGradualLRzMHardcoded step-wise learning rate scheduling.
    Necessary for CapacitronVAEr-   c                    s   || _ t || d S r    )gradual_learning_ratesr   r	   )r
   r0   r=   r1   r   r   r   r	   \   s   zStepwiseGradualLR.__init__c                 C   s   t | jd}g }g }| jD ]}||d  ||d  qt||}zt|dkd d }W n	 ty:   Y nw |t |d }||d krM|d n|}||d k rY|d n|}t|t	| j
S )Nr2   r   Tr-   )r9   r1   r=   appendnp
less_equalwhere
IndexErrortiler   r:   )r
   r7   step_thresholdsratesvaluesboolean_indeces	last_truelrr   r   r   r;   `   s    
zStepwiseGradualLR.get_lr)r-   )r$   r%   r&   r'   r	   r;   r*   r   r   r   r   r<   X   s    r<   )numpyr?   r   torch.utils.data.distributedr   r   optimlr_scheduler_LRSchedulerr+   r<   r   r   r   r   <module>   s    D