o
    ci#                     @   s   d dl mZ d dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ e \ZZ Z!e \Z"Z#eG dd deZ$dS )    )SpaceN)UnionOptional)ActionDistribution)ModelV2)OldAPIStackoverride)Exploration)Random)try_import_tftry_import_torchget_variable
TensorType)convert_to_numpy)Schedule)PiecewiseSchedule)zero_logps_from_actionsc                       s  e Zd ZdZdddddddd	ed
ededededededede	e
 f fddZeedddedeeef defddZdededeeef fddZdededeeef fddZeed(d e	d! fd"d#Zeed(d$ed e	d! d%dfd&d'Z  ZS ))GaussianNoisea  An exploration that adds white noise to continuous actions.

    If explore=True, returns actions plus scale (annealed over time) x
    Gaussian noise. Also, some completely random period is possible at the
    beginning.

    If explore=False, returns the deterministic action.
    i  g?g      ?g{Gz?i'  N)random_timestepsstddevinitial_scalefinal_scalescale_timestepsscale_scheduleaction_space	frameworkmodelr   r   r   r   r   r   c                   s   |dusJ t  j|f||d|
 || _t|f| j| jd|
| _|| _|	p8t||f|| |fg|| jd| _	t
tdtj| jdtjd| _| jdkrV|  | _dS dS )a  Initializes a GaussianNoise instance.

        Args:
            random_timesteps: The number of timesteps for which to act
                completely randomly. Only after this number of timesteps, the
                `self.scale` annealing process will start (see below).
            stddev: The stddev (sigma) to use for the
                Gaussian noise to be added to the actions.
            initial_scale: The initial scaling weight to multiply
                the noise with.
            final_scale: The final scaling weight to multiply
                the noise with.
            scale_timesteps: The timesteps over which to linearly anneal
                the scaling factor (after(!) having used random actions for
                `random_timesteps` steps).
            scale_schedule: An optional Schedule object
                to use (instead of constructing one from the given parameters).
        N)r   r   )	endpointsoutside_valuer   r   timestep)r   tf_namedtypetf)super__init__r   r
   r   r   random_explorationr   r   r   r   nparrayint64last_timestep	get_state_tf_state_op)selfr   r   r   r   r   r   r   r   r   kwargs	__class__ ^/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/utils/exploration/gaussian_noise.pyr$   $   s6    


zGaussianNoise.__init__Texploreaction_distributionr   r3   c                C   s&   | j dkr| |||S | |||S )Ntorch)r   _get_torch_exploration_action_get_tf_exploration_action_op)r,   r4   r   r3   r0   r0   r1   get_exploration_actione   s   
	z$GaussianNoise.get_exploration_actionaction_distc           	         s`  |d ur|nj }|  |tjjt jd j	||\}tj
t|jk fdd fdddtj
t|trNtj|tjdn|fdd fddd}t }jd	kr|d u rsj d
 ||fS j t|tj ||fS |d u rtj d
ntj |}t|g ||fW  d    S 1 sw   Y  d S )N)r   c                          S Nr0   r0   )random_actionsr0   r1   <lambda>       z=GaussianNoise._get_tf_exploration_action_op.<locals>.<lambda>c                      s.   t   jjt   jjt   S r;   )r"   clip_by_valuer   low	ones_likehighr0   )deterministic_actionsgaussian_sampler,   r0   r1   r=      s
    )predtrue_fnfalse_fn)r!   c                      r:   r;   r0   r0   )stochastic_actionsr0   r1   r=      r>   c                      r:   r;   r0   r0   )rC   r0   r1   r=      r>   tf2   )r)   deterministic_sampler   r"   randomnormalshaper   r%   get_tf_exploration_action_opcondconvert_to_tensorr   
isinstanceboolconstantr   r   
assign_addassigncastr(   tf1control_dependencies)	r,   r9   r3   r   ts_actionlogp	assign_opr0   )rC   rD   r<   r,   rH   r1   r7   w   sD   



$z+GaussianNoise._get_tf_exploration_action_opc           
   
   C   s   |d ur|n| j d | _ |r]| j | jk r| jj|dd\}}nB| }| | j }|tjt|	 | j
d| j }tt|| tj| jjtj| jdtj| jjtj| jd}n| }tj|	 d ftj| jd}	||	fS )NrJ   Tr2   )meanstd)r!   devicer   )r)   r   r%   get_torch_exploration_actionrK   r   r5   rM   zerossizer   tora   minmaxtensorr   r@   float32rB   )
r,   r9   r3   r   r\   r[   det_actionsscalerD   r]   r0   r0   r1   r6      s<   z+GaussianNoise._get_torch_exploration_actionsessz
tf.Sessionc                 C   sR   |r| | jS | | j}| jdkrt|n|| jdkr$t| jdS | jdS )zReturns the current scale value.

        Returns:
            Union[float,tf.Tensor[float]]: The current scale value.
        r"   )	cur_scaler)   )runr+   r   r)   r   r   )r,   rl   rk   r0   r0   r1   r*      s   
zGaussianNoise.get_statestatereturnc                 C   sP   | j dkr| jj|d |d d S t| jtr|d | _d S | j|d  d S )Nr"   r)   )session)r   r)   loadrR   intrV   )r,   ro   rl   r0   r0   r1   	set_state   s
   
zGaussianNoise.set_stater;   )__name__
__module____qualname____doc__r   strr   rs   floatr   r   r$   r   r	   r   r   r   rS   r8   r7   r6   r*   dictrt   __classcell__r0   r0   r.   r1   r      sl    	
A


9

/&r   )%gymnasium.spacesr   numpyr&   typingr   r   ray.rllib.models.action_distr   ray.rllib.models.modelv2r   ray.rllib.utils.annotationsr   r   'ray.rllib.utils.exploration.explorationr	   "ray.rllib.utils.exploration.randomr
   ray.rllib.utils.frameworkr   r   r   r   ray.rllib.utils.numpyr   ray.rllib.utils.schedulesr   ,ray.rllib.utils.schedules.piecewise_scheduler   ray.rllib.utils.tf_utilsr   rX   r"   tfvr5   r[   r   r0   r0   r0   r1   <module>   s"    
