o
    `۷i                     @   sp   d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ e \ZZZe \ZZeG dd dZdS )	    )Optional)try_import_tftry_import_torch)PiecewiseSchedule)LearningRateOrSchedule
TensorType)DeveloperAPIc                	   @   s   e Zd ZdZddddededee fdd	Zeded
ededdfddZ	de
fddZdedefddZdede
fddZdS )	SchedulerzClass to manage a scheduled (framework-dependent) tensor variable.

    Uses the PiecewiseSchedule (for maximum configuration flexibility)
    torchN)	frameworkdevicefixed_value_or_scheduler   r   c                C   s^   || _ || _t|ttf| _| jr*t||d d dd| _| j|d d d| _	dS || _	dS )a  Initializes a Scheduler instance.

        Args:
            fixed_value_or_schedule: A fixed, constant value (in case no schedule should
                be used) or a schedule configuration in the format of
                [[timestep, value], [timestep, value], ...]
                Intermediary timesteps will be assigned to linerarly interpolated
                values. A schedule config's first entry must
                start with timestep 0, i.e.: [[0, initial_value], [...]].
            framework: The framework string, for which to create the tensor variable
                that hold the current value. This is the variable that can be used in
                the graph, e.g. in a loss function.
            device: Optional device (for torch) to place the tensor variable on.
        N)outside_valuer   r      )initial_value)
r   r   
isinstancelisttupleuse_scheduler   	_schedule_create_tensor_variable_curr_value)selfr   r   r    r   Y/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/utils/schedules/scheduler.py__init__   s   


zScheduler.__init__setting_namedescriptionreturnc                 C   s   t | ttfs| du rdS t | ttfrt| dk r(td| d|  d| d| d d dkrLtd| d	| d
| d d  d| d| d d  dtdd | D r`td| d| ddS )a  Performs checking of a certain schedule configuration.

        The first entry in `value_or_schedule` (if it's not a fixed value) must have a
        timestep of 0.

        Args:
            fixed_value_or_schedule: A fixed, constant value (in case no schedule should
                be used) or a schedule configuration in the format of
                [[timestep, value], [timestep, value], ...]
                Intermediary timesteps will be assigned to linerarly interpolated
                values. A schedule config's first entry must
                start with timestep 0, i.e.: [[0, initial_value], [...]].
            setting_name: The property name of the schedule setting (within a config),
                e.g. `lr` or `entropy_coeff`.
            description: A full text description of the property that's being scheduled,
                e.g. `learning rate`.

        Raises:
            ValueError: In case, errors are found in the schedule's format.
        N   z	Invalid `z` (zP) specified! Must be a list of 2 or more tuples, each of the form (`timestep`, `zG to reach`), for example `[(0, 0.001), (1e6, 0.0001), (2e6, 0.00005)]`.r   zWhen providing a `zW` schedule, the first timestep must be 0 and the corresponding lr value is the initial z! You provided ts= =r   .c                 s   s    | ]	}t |d kV  qdS )r    N)len).0pairr   r   r   	<genexpr>o   s    z%Scheduler.validate.<locals>.<genexpr>z_` schedule, each tuple in the schedule list must have exctly 2 items of the form (`timestep`, `)r   intfloatr   r   r$   
ValueErrorany)r   r   r   r   r   r   validate=   s<   

zScheduler.validatec                 C   s   | j S )a  Returns the current value (as a tensor variable).

        This method should be used in loss functions of other (in-graph) places
        where the current value is needed.

        Returns:
            The tensor variable (holding the current value to be used).
        )r   )r   r   r   r   get_current_valuew   s   	zScheduler.get_current_valuetimestepc                 C   sJ   | j r | jj|d}| jdkrt|| j_|S | j| |S | j}|S )af  Updates the underlying (framework specific) tensor variable.

        In case of a fixed value, this method does nothing and only returns the fixed
        value as-is.

        Args:
            timestep: The current timestep that the update might depend on.

        Returns:
            The current value of the tensor variable as a python float.
        )tr
   )	r   r   valuer   r
   tensorr   dataassign)r   r.   python_valuer   r   r   update   s   
zScheduler.updater   c                 C   s2   | j dkrtj|dtj| jdS tj|dtjdS )zCreates a framework-specific tensor variable to be scheduled.

        Args:
            initial_value: The initial (float) value for the variable to hold.

        Returns:
            The created framework-specific tensor variable.
        r
   F)requires_graddtyper   )	trainabler7   )r   r
   r1   float32r   tfVariable)r   r   r   r   r   r      s   
	z!Scheduler._create_tensor_variable)__name__
__module____qualname____doc__r   strr   r   staticmethodr,   r   r-   r(   r)   r5   r   r   r   r   r   r	      s0    	
*9r	   N)typingr   ray.rllib.utils.frameworkr   r   ,ray.rllib.utils.schedules.piecewise_scheduler   ray.rllib.utils.typingr   r   ray.util.annotationsr   _r:   r
   r	   r   r   r   r   <module>   s    
