o
    `Û·ia  ã                   @   sV   d dl Zd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ eG dd„ dƒƒZdS )é    N)ÚModelV2)ÚOldAPIStack)ÚListÚModelConfigDictÚ
TensorTypeÚUnionc                
   @   sÌ   e Zd ZdZdee defdd„Zdefdd„Zdefd	d
„Z	defdd„Z
dedefdd„Zdd defdd„Zdefdd„Zdd defdd„Zdefdd„Zeedejdedeeejf fdd„ƒƒZdS )ÚActionDistributionz¹The policy action distribution of an agent.

    Attributes:
        inputs: input vector to compute samples from.
        model (ModelV2): reference to model producing the inputs.
    ÚinputsÚmodelc                 C   s   || _ || _dS )a™  Initializes an ActionDist object.

        Args:
            inputs: input vector to compute samples from.
            model (ModelV2): reference to model producing the inputs. This
                is mainly useful if you want to use model variables to compute
                action outputs (i.e., for autoregressive action distributions,
                see examples/autoregressive_action_dist.py).
        N)r	   r
   )Úselfr	   r
   © r   úR/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/models/action_dist.pyÚ__init__   s   

zActionDistribution.__init__Úreturnc                 C   ó   t ‚)z+Draw a sample from the action distribution.©ÚNotImplementedError©r   r   r   r   Úsample   ó   zActionDistribution.samplec                 C   r   )z¿
        Get the deterministic "sampling" output from the distribution.
        This is usually the max likelihood output, i.e. mean for Normal, argmax
        for Categorical, etc..
        r   r   r   r   r   Údeterministic_sample#   s   z'ActionDistribution.deterministic_samplec                 C   r   )z7Returns the log probability of the last sampled action.r   r   r   r   r   Úsampled_action_logp+   r   z&ActionDistribution.sampled_action_logpÚxc                 C   r   )z.The log-likelihood of the action distribution.r   )r   r   r   r   r   Úlogp/   r   zActionDistribution.logpÚotherc                 C   r   )z3The KL-divergence between two action distributions.r   ©r   r   r   r   r   Úkl3   r   zActionDistribution.klc                 C   r   )z'The entropy of the action distribution.r   r   r   r   r   Úentropy7   r   zActionDistribution.entropyc                 C   s
   |   |¡S )z¸The KL-divergence between two action distributions.

        This differs from kl() in that it can return an array for
        MultiDiscrete. TODO(ekl) consider removing this.
        )r   r   r   r   r   Úmulti_kl;   s   
zActionDistribution.multi_klc                 C   s   |   ¡ S )z±The entropy of the action distribution.

        This differs from entropy() in that it can return an array for
        MultiDiscrete. TODO(ekl) consider removing this.
        )r   r   r   r   r   Úmulti_entropyC   s   z ActionDistribution.multi_entropyÚaction_spaceÚmodel_configc                 C   r   )a|  Returns the required shape of an input parameter tensor for a
        particular action space and an optional dict of distribution-specific
        options.

        Args:
            action_space (gym.Space): The action space this distribution will
                be used for, whose shape attributes will be used to determine
                the required shape of the input parameter tensor.
            model_config: Model's config dict (as defined in catalog.py)

        Returns:
            model_output_shape (int or np.ndarray of ints): size of the
                required input vector (minus leading batch dimension).
        r   )r    r!   r   r   r   Úrequired_model_output_shapeK   s   z.ActionDistribution.required_model_output_shapeN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r   r   r   r   r   r   r   Ústaticmethodr   ÚgymÚSpacer   r   ÚintÚnpÚndarrayr"   r   r   r   r   r   	   s(    ÿÿþr   )Ú	gymnasiumr(   Únumpyr+   Úray.rllib.models.modelv2r   Úray.rllib.utils.annotationsr   Úray.rllib.utils.typingr   r   r   r   r   r   r   r   r   Ú<module>   s    