o
    $ia                     @   sV   d dl Zd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ eG dd dZdS )    N)ModelV2)OldAPIStack)ListModelConfigDict
TensorTypeUnionc                
   @   s   e Zd ZdZdee defddZdefddZdefd	d
Z	defddZ
dedefddZdd defddZdefddZdd defddZdefddZeedejdedeeejf fddZdS )ActionDistributionzThe policy action distribution of an agent.

    Attributes:
        inputs: input vector to compute samples from.
        model (ModelV2): reference to model producing the inputs.
    inputsmodelc                 C   s   || _ || _dS )a  Initializes an ActionDist object.

        Args:
            inputs: input vector to compute samples from.
            model (ModelV2): reference to model producing the inputs. This
                is mainly useful if you want to use model variables to compute
                action outputs (i.e., for autoregressive action distributions,
                see examples/autoregressive_action_dist.py).
        N)r	   r
   )selfr	   r
    r   Y/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/models/action_dist.py__init__   s   

zActionDistribution.__init__returnc                 C      t )z+Draw a sample from the action distribution.NotImplementedErrorr   r   r   r   sample      zActionDistribution.samplec                 C   r   )z
        Get the deterministic "sampling" output from the distribution.
        This is usually the max likelihood output, i.e. mean for Normal, argmax
        for Categorical, etc..
        r   r   r   r   r   deterministic_sample#   s   z'ActionDistribution.deterministic_samplec                 C   r   )z7Returns the log probability of the last sampled action.r   r   r   r   r   sampled_action_logp+   r   z&ActionDistribution.sampled_action_logpxc                 C   r   )z.The log-likelihood of the action distribution.r   )r   r   r   r   r   logp/   r   zActionDistribution.logpotherc                 C   r   )z3The KL-divergence between two action distributions.r   r   r   r   r   r   kl3   r   zActionDistribution.klc                 C   r   )z'The entropy of the action distribution.r   r   r   r   r   entropy7   r   zActionDistribution.entropyc                 C   s
   |  |S )zThe KL-divergence between two action distributions.

        This differs from kl() in that it can return an array for
        MultiDiscrete. TODO(ekl) consider removing this.
        )r   r   r   r   r   multi_kl;   s   
zActionDistribution.multi_klc                 C   s   |   S )zThe entropy of the action distribution.

        This differs from entropy() in that it can return an array for
        MultiDiscrete. TODO(ekl) consider removing this.
        )r   r   r   r   r   multi_entropyC   s   z ActionDistribution.multi_entropyaction_spacemodel_configc                 C   r   )a|  Returns the required shape of an input parameter tensor for a
        particular action space and an optional dict of distribution-specific
        options.

        Args:
            action_space (gym.Space): The action space this distribution will
                be used for, whose shape attributes will be used to determine
                the required shape of the input parameter tensor.
            model_config: Model's config dict (as defined in catalog.py)

        Returns:
            model_output_shape (int or np.ndarray of ints): size of the
                required input vector (minus leading batch dimension).
        r   )r    r!   r   r   r   required_model_output_shapeK   s   z.ActionDistribution.required_model_output_shapeN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r   r   r   staticmethodr   gymSpacer   r   intnpndarrayr"   r   r   r   r   r   	   s(    r   )	gymnasiumr(   numpyr+   ray.rllib.models.modelv2r   ray.rllib.utils.annotationsr   ray.rllib.utils.typingr   r   r   r   r   r   r   r   r   <module>   s    