o
    ciB                     @   s   d dl mZmZmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ eG d	d
 d
eZdS )    )DiscreteMultiDiscreteSpace)UnionOptional)ActionDistribution)Categorical)TorchCategorical)OldAPIStackoverride)StochasticSampling)
TensorTypec                       sf   e Zd ZdZdddedee def fddZe	e
		dd
edeeef def fddZ  ZS )SoftQzSpecial case of StochasticSampling w/ Categorical and temperature param.

    Returns a stochastic sample from a Categorical parameterized by the model
    output divided by the temperature. Returns the argmax iff explore=False.
    g      ?temperatureaction_space	frameworkr   c                   s4   t |ttfs	J t j|fd|i| || _dS )aK  Initializes a SoftQ Exploration object.

        Args:
            action_space: The gym action space used by the environment.
            temperature: The temperature to divide model outputs by
                before creating the Categorical distribution to sample from.
            framework: One of None, "tf", "torch".
        r   N)
isinstancer   r   super__init__r   )selfr   r   r   kwargs	__class__ V/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/utils/exploration/soft_q.pyr      s   
zSoftQ.__init__Taction_distributiontimestepexplorec                    s@   t |}t|ttfsJ ||j| j| jd}t j|||dS )Nr   )r   r   r   )	type
issubclassr   r	   inputsmodelr   r   get_exploration_action)r   r   r   r   clsdistr   r   r   r#   (   s   zSoftQ.get_exploration_action)T)__name__
__module____qualname____doc__r   r   strfloatr   r   r   r   r   intr   boolr#   __classcell__r   r   r   r   r      s&    
r   N)gymnasium.spacesr   r   r   typingr   r   ray.rllib.models.action_distr   "ray.rllib.models.tf.tf_action_distr   (ray.rllib.models.torch.torch_action_distr	   ray.rllib.utils.annotationsr
   r   /ray.rllib.utils.exploration.stochastic_samplingr   ray.rllib.utils.frameworkr   r   r   r   r   r   <module>   s    