o
    ci!                     @   sb   d Z ddlmZ ddlZddlZddlmZ ddlm	Z	m
Z
 ddlmZ eG dd dejZdS )	z;This is the next version of action distribution base class.    )TupleN)ExperimentalAPI)
TensorTypeUnion)overridec                   @   s   e Zd ZdZejddddeedf dede	e
ee
e
f f fd	d
Zejddddeedf dede	e
ee
e
f f fddZejde
de
fddZejdd de
fddZejde
fddZeejdejdefddZede
dd fddZe		 		 d ddZd!ddZdS )"DistributionaE  The base class for distribution over a random variable.

    Examples:

    .. testcode::

        import torch
        from ray.rllib.core.models.configs import MLPHeadConfig
        from ray.rllib.models.torch.torch_distributions import TorchCategorical

        model = MLPHeadConfig(input_dims=[1]).build(framework="torch")

        # Create an action distribution from model logits
        action_logits = model(torch.Tensor([[1]]))
        action_dist = TorchCategorical.from_logits(action_logits)
        action = action_dist.sample()

        # Create another distribution from a dummy Tensor
        action_dist2 = TorchCategorical.from_logits(torch.Tensor([0]))

        # Compute some common metrics
        logp = action_dist.logp(action)
        kl = action_dist.kl(action_dist2)
        entropy = action_dist.entropy()
    NF)sample_shapereturn_logpr   .r	   returnc                K      dS )a  Draw a sample from the distribution.

        Args:
            sample_shape: The shape of the sample to draw.
            return_logp: Whether to return the logp of the sampled values.
            **kwargs: Forward compatibility placeholder.

        Returns:
            The sampled values. If return_logp is True, returns a tuple of the
            sampled values and its logp.
        N selfr   r	   kwargsr   r   R/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/models/distributions.pysample'       zDistribution.samplec                K   r   )a  Draw a re-parameterized sample from the action distribution.

        If this method is implemented, we can take gradients of samples w.r.t. the
        distribution parameters.

        Args:
            sample_shape: The shape of the sample to draw.
            return_logp: Whether to return the logp of the sampled values.
            **kwargs: Forward compatibility placeholder.

        Returns:
            The sampled values. If return_logp is True, returns a tuple of the
            sampled values and its logp.
        Nr   r   r   r   r   rsample;   r   zDistribution.rsamplevaluec                 K   r   )a	  The log-likelihood of the distribution computed at `value`

        Args:
            value: The value to compute the log-likelihood at.
            **kwargs: Forward compatibility placeholder.

        Returns:
            The log-likelihood of the value.
        Nr   )r   r   r   r   r   r   logpR   r   zDistribution.logpotherc                 K   r   )zThe KL-divergence between two distributions.

        Args:
            other: The other distribution.
            **kwargs: Forward compatibility placeholder.

        Returns:
            The KL-divergence between the two distributions.
        Nr   )r   r   r   r   r   r   kl^   r   zDistribution.klc                 K   r   )zThe entropy of the distribution.

        Args:
            **kwargs: Forward compatibility placeholder.

        Returns:
            The entropy of the distribution.
        Nr   )r   r   r   r   r   entropyj   r   zDistribution.entropyspacec                 K   r   )a  Returns the required length of an input parameter tensor.

        Args:
            space: The space this distribution will be used for,
                whose shape attributes will be used to determine the required shape of
                the input parameter tensor.
            **kwargs: Forward compatibility placeholder.

        Returns:
            size of the required input vector (minus leading batch dimension).
        Nr   )r   r   r   r   r   required_input_dimu   r   zDistribution.required_input_dimlogitsc                 K   s   t )a  Creates a Distribution from logits.

        The caller does not need to have knowledge of the distribution class in order
        to create it and sample from it. The passed batched logits vectors might be
        split up and are passed to the distribution class' constructor as kwargs.

        Args:
            logits: The logits to create the distribution from.
            **kwargs: Forward compatibility placeholder.

        Returns:
            The created distribution.

        .. testcode::

            import numpy as np
            from ray.rllib.models.distributions import Distribution

            class Uniform(Distribution):
                def __init__(self, lower, upper):
                    self.lower = lower
                    self.upper = upper

                def sample(self):
                    return self.lower + (self.upper - self.lower) * np.random.rand()

                def logp(self, x):
                    ...

                def kl(self, other):
                    ...

                def entropy(self):
                    ...

                @staticmethod
                def required_input_dim(space):
                    ...

                def rsample(self):
                    ...

                @classmethod
                def from_logits(cls, logits, **kwargs):
                    return Uniform(logits[:, 0], logits[:, 1])

            logits = np.array([[0.0, 1.0], [2.0, 3.0]])
            my_dist = Uniform.from_logits(logits)
            sample = my_dist.sample()
        )NotImplementedError)clsr   r   r   r   r   from_logits   s   4zDistribution.from_logits
parent_clsc                    s&   G  fddd }  d|_ |S )zReturns a partial child of TorchMultiActionDistribution.

        This is useful if inputs needed to instantiate the Distribution from logits
        are available, but the logits are not.
        c                       sr   e Zd Z fddZefddZeedej	de
ffddZeed	edd
ffddZ  ZS )z>Distribution.get_partial_dist_cls.<locals>.DistributionPartialc                    s   t  j|i | d S )N)super__init__)r   argsr   	__class__r   r   r!      s   zGDistribution.get_partial_dist_cls.<locals>.DistributionPartial.__init__c                     s4   t | t  @ }|rtd| di  | }|S )z9Checks if keys in kwargs don't clash with partial_kwargs.z&Cannot override the following kwargs: zS.
This is because they were already set at the time this partial class was defined.)set
ValueError)r   overlapmerged_kwargs)partial_kwargsr   r   _merge_kwargs   s   
zLDistribution.get_partial_dist_cls.<locals>.DistributionPartial._merge_kwargsr   r
   c                    s0   | j di |}||d ksJ  jdi |S )Nr   r   )r*   r   )r   r   r   r(   r   r   r   r      s   zQDistribution.get_partial_dist_cls.<locals>.DistributionPartial.required_input_dimr   DistributionPartialc                    s,   | j di |} j|fi |}| |_|S )Nr   )r*   r   r$   )r   r   r   r(   distributionr+   r   r   r      s   zJDistribution.get_partial_dist_cls.<locals>.DistributionPartial.from_logits)__name__
__module____qualname__r!   staticmethodr*   classmethodr   gymSpaceintr   r   r   __classcell__r   r   r)   r#   r   r,      s    r,   Partial)r.   )r   r)   r,   r   r7   r   get_partial_dist_cls   s   
(z!Distribution.get_partial_dist_clsc                 C   s   | S )a^  Returns a deterministic equivalent for this distribution.

        Specifically, the deterministic equivalent for a Categorical distribution is a
        Deterministic distribution that selects the action with maximum logit value.
        Generally, the choice of the deterministic replacement is informed by
        established conventions.
        r   )r   r   r   r   to_deterministic   s   zDistribution.to_deterministic)r   r   r
   r   )r
   r   )r.   r/   r0   __doc__abcabstractmethodr   r5   boolr   r   r   r   r   r   r   r1   r3   r4   r   r2   r   r9   r:   r   r   r   r   r      sP    


55r   )r;   typingr   	gymnasiumr3   r<   ray.rllib.utils.annotationsr   ray.rllib.utils.typingr   r   r   ABCr   r   r   r   r   <module>   s    