o
    ci2                     @   s   d dl Z d dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZ e \ZZZe \ZZeG d
d deZdS )    N)OptionalUnion)ActionDistribution)ModelV2)OldAPIStackoverride)Exploration)Random)get_variabletry_import_tftry_import_torch
TensorType)zero_logps_from_actionsc                	       s   e Zd ZdZdddejjdedede	f fdd	Z
eed
dddedeee	ef  defddZdd Zdedeee	f deeef fddZ  ZS )StochasticSamplinga+  An exploration that simply samples from a distribution.

    The sampling can be made deterministic by passing explore=False into
    the call to `get_exploration_action`.
    Also allows for scheduled parameters for the distributions, such as
    lowering stddev, temperature, etc.. over time.
    r   )random_timestepsaction_space	frameworkmodelr   c                   sl   |dusJ t  j|f||d| || _t|f| j| jd|| _tt	dtj
| jdtj
d| _dS )a  Initializes a StochasticSampling Exploration object.

        Args:
            action_space: The gym action space used by the environment.
            framework: One of None, "tf", "torch".
            model: The ModelV2 used by the owning Policy.
            random_timesteps: The number of timesteps for which to act
                completely randomly. Only after this number of timesteps,
                actual samples will be drawn to get exploration actions.
        N)r   r   r   timestep)r   tf_namedtype)super__init__r   r	   r   r   random_explorationr
   nparrayint64last_timestep)selfr   r   r   r   kwargs	__class__ c/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/utils/exploration/stochastic_sampling.pyr   !   s    zStochasticSampling.__init__NT)r   exploreaction_distributionr   r$   c                C   s&   | j dkr| |||S | |||S )Ntorch)r   _get_torch_exploration_action_get_tf_exploration_action_op)r   r%   r   r$   r"   r"   r#   get_exploration_actionF   s   
z)StochasticSampling.get_exploration_actionc                    s$  j d }tjt|jk  fdd fddd  tjt|tr,t|n|fddfddd}tjtj	
|t|jk fd	dttd}jd
krdj d ||fS |d u rotj dntj |}t|g ||fW  d    S 1 sw   Y  d S )N   c                      s   j j ddd S )NTr$   r   )r   get_tf_exploration_action_opr"   )action_distr   r"   r#   <lambda>\   s
   zBStochasticSampling._get_tf_exploration_action_op.<locals>.<lambda>c                            S N)sampler"   r-   r"   r#   r.   a       )predtrue_fnfalse_fnc                          S r0   r"   r"   )stochastic_actionsr"   r#   r.   g       c                      r7   r0   r"   r"   )deterministic_actionsr"   r#   r.   h   r9   )r5   r6   c                      r/   r0   )sampled_action_logpr"   r2   r"   r#   r.   o   r3   tf2)r   tfcondconvert_to_tensorr   deterministic_sample
isinstanceboolconstantmathlogical_and	functoolspartialr   r   
assign_addtf1assigncontrol_dependencies)r   r-   r   r$   tsactionlogp	assign_opr"   )r-   r:   r   r8   r#   r(   W   s:   

	




	$z0StochasticSampling._get_tf_exploration_action_opr-   c                 C   sz   |d ur|n| j d | _ |r.| j | jk r"| jj|dd\}}||fS | }| }||fS | }t| }||fS )Nr*   Tr+   )	r   r   r   get_torch_exploration_actionr1   r;   r@   r&   
zeros_like)r   r-   r   r$   rM   rN   r"   r"   r#   r'      s   
z0StochasticSampling._get_torch_exploration_action)__name__
__module____qualname____doc__gymspacesSpacestrr   intr   r   r   r   r   r   r   rB   r)   r(   r'   __classcell__r"   r"   r    r#   r      s<    %)

r   ) rF   	gymnasiumrV   numpyr   typingr   r   ray.rllib.models.action_distr   ray.rllib.models.modelv2r   ray.rllib.utils.annotationsr   r   'ray.rllib.utils.exploration.explorationr   "ray.rllib.utils.exploration.randomr	   ray.rllib.utils.frameworkr
   r   r   r   ray.rllib.utils.tf_utilsr   rI   r=   tfvr&   _r   r"   r"   r"   r#   <module>   s    
