o
    TiI                     @   s(   d dl Zd dlmZ G dd deZdS )    N)log_distc                       s:   e Zd ZdZd fdd	Zdd Zdd	 Zd
d Z  ZS )ProgressiveLayerDropa   Progressive Layer Dropping (PLD) for model training.
        This implements the PLD technique for compressed model training
        from this paper: https://arxiv.org/pdf/2010.13369.pdf
    Args:
        theta (float): a hyper-parameter that controls the trade-off between training time and robustness.
        The lower the theta value, the faster the training speed. Default value: 0.5.
        gamma (float): a hyper-parameter that controls how fast the drop ratio increases. Default value: 0.001.
          ?MbP?c                    s8   t    || _|| _d| _td| j ddgd d S )N      ?z,Enabled progressive layer dropping (theta = )r   )ranks)super__init__thetagammacurrent_thetar   )selfr   r   	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/runtime/progressive_layer_drop.pyr
      s
   
zProgressiveLayerDrop.__init__c                 C   s   d|   d}|S )NT)progressive_layer_drop	pld_theta)	get_theta)r   kwargsr   r   r   	get_state   s   zProgressiveLayerDrop.get_statec                 C   s   | j S )N)r   )r   r   r   r   r       s   zProgressiveLayerDrop.get_thetac                 C   s   dd }||| j | j| _d S )Nc                 S   s   d| t | |   | S )Nr   )npexp)xr   pr   r   r   _prob%   s   z0ProgressiveLayerDrop.update_state.<locals>._prob)r   r   r   )r   global_stepr   r   r   r   update_state#   s   z!ProgressiveLayerDrop.update_state)r   r   )	__name__
__module____qualname____doc__r
   r   r   r   __classcell__r   r   r   r   r   
   s    	r   )numpyr   deepspeed.utilsr   objectr   r   r   r   r   <module>   s   