o
    ̳i                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlmZ 		ddejj	de
d	e
d
ede
defddZdeejj	ef defddZdS )    N)Union)LambdaLR)OptimizerInBackwardWrapper      ?	optimizernum_warmup_stepsnum_training_steps
num_cycles
last_epochreturnc                    s&   dt dtf fdd}t| ||S )a  
    Create a learning rate schedule that linearly increases the learning rate from
    0.0 to lr over ``num_warmup_steps``, then decreases to 0.0 on a cosine schedule over
    the remaining ``num_training_steps-num_warmup_steps`` (assuming ``num_cycles`` = 0.5).

    This is based on the Hugging Face implementation
    https://github.com/huggingface/transformers/blob/v4.23.1/src/transformers/optimization.py#L104.

    Args:
        optimizer (torch.optim.Optimizer): The optimizer for which to
            schedule the learning rate.
        num_warmup_steps (int): The number of steps for the warmup phase.
        num_training_steps (int): The total number of training steps.
        num_cycles (float): The number of waves in the cosine schedule. Defaults to 0.5
            (decrease from the max value to 0 following a half-cosine).
        last_epoch (int): The index of the last epoch when resuming training. Defaults to -1

    Returns:
        torch.optim.lr_scheduler.LambdaLR with the appropriate schedule.
    current_stepr   c                    sV   | k r| t d S |  t d  }ddttj  d |   }t d|S )N   r   g      ?g       @g        )maxmathcospi)r   progresscosine_lr_multipler
   r	   r    T/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/training/lr_schedulers.py	lr_lambda+   s   
z2get_cosine_schedule_with_warmup.<locals>.lr_lambda)intfloatr   )r   r   r	   r
   r   r   r   r   r   get_cosine_schedule_with_warmup   s   r   c                 C   s   t | trg }|   D ]}||d d  qn| j}t|dk r,tdt| |d d }|D ]}|d |kr@tdq4|S )aP  
    Full_finetune_distributed and full_finetune_single_device assume all optimizers have
    the same LR, here to validate whether all the LR are the same and return if True.

    Args:
        optimizer (Union[torch.optim.Optimizer, OptimizerInBackwardWrapper]): A general
            optimizer input that could whether be a general optimizer or an optimizer
            warpper based on optimizer_in_backward.

    Returns:
        lr (float): The learning rate of the input optimizers.

    Raises:
        RuntimeError: If the learning rates of the input optimizer are not the same.
    param_groupsr   r   z,Invalid optimizer param groups with len of: lrz4LR Schedulers are different across all param groups )
isinstancer   
state_dictvaluesappendr   lenRuntimeError)r   r   paramr   groupr   r   r   get_lr=   s    
r&   )r   r   )r   typingr   torchtorch.optim.lr_schedulerr   torchtune.training.memoryr   optim	Optimizerr   r   r   r&   r   r   r   r   <module>   s2   
.