o
    ci1I                     @   s  d Z ddlZddlZddlZddlmZmZm	Z	m
Z
 ddlZddlmZ ddlmZmZ ddlmZmZ ddlmZmZmZ e \ZZZe ZeG dd	 d	eejZeG d
d deZeG dd deZeG dd deZeG dd deZ eG dd deZ!dS )a  The main difference between this and the old ActionDistribution is that this one
has more explicit input args. So that the input format does not have to be guessed from
the code. This matches the design pattern of torch distribution which developers may
already be familiar with.
    N)DictIterableListOptional)Distribution)overrideDeveloperAPI)try_import_tftry_import_tfp)
TensorTypeUnionTuplec                       s   e Zd ZdZ fddZejdddZee	de
de
fd	d
Zee	de
fddZee	ddde
fddZee	dddee
ee
e
f f fddZee	dddee
ee
e
f f fddZ  ZS )TfDistributionz$Wrapper class for tfp.distributions.c                    s    t    | j|i || _d S N)super__init___get_tf_distribution_distselfargskwargs	__class__ X/home/ubuntu/.local/lib/python3.10/site-packages/ray/rllib/models/tf/tf_distributions.pyr      s   
zTfDistribution.__init__returntfp.distributions.Distributionc                 O   s   dS )z9Returns the tfp.distributions.Distribution object to use.Nr   r   r   r   r   r   !   s    z#TfDistribution._get_tf_distributionvaluec                 K   s   | j j|fi |S r   )r   log_probr   r   r   r   r   r   logp%      zTfDistribution.logpc                 C   s
   | j  S r   )r   entropyr   r   r   r   r#   )   s   
zTfDistribution.entropyotherr   c                 C   s   | j |j S r   )r   kl_divergencer   r%   r   r   r   kl-   s   zTfDistribution.klr   sample_shapec                C   s   | j |}|S r   )r   sample)r   r*   r+   r   r   r   r+   1   s   zTfDistribution.samplec                C      t r   NotImplementedError)r   r*   r   r   r   rsample8   s   zTfDistribution.rsampler   r   )__name__
__module____qualname____doc__r   abcabstractmethodr   r   r   r   r!   r#   r(   r   r   r+   r/   __classcell__r   r   r   r   r      s*    r   c                       s   e Zd ZdZee		d					d fddZeed	edefd
dZ	ee		d					dddZ
eeedejdefddZeedddZeeededd fddZdddZ  ZS )TfCategoricala  Wrapper class for Categorical distribution.

    Creates a categorical distribution parameterized by either :attr:`probs` or
    :attr:`logits` (but not both).

    Samples are integers from :math:`\{0, \ldots, K-1\}` where `K` is
    ``probs.size(-1)``.

    If `probs` is 1-dimensional with length-`K`, each element is the relative
    probability of sampling the class at that index.

    If `probs` is N-dimensional, the first N-1 dimensions are treated as a batch of
    relative probability vectors.

    .. testcode::
        :skipif: True

        m = TfCategorical([ 0.25, 0.25, 0.25, 0.25 ])
        m.sample(sample_shape=(2,))  # equal probability of 0, 1, 2, 3

    .. testoutput::

        tf.Tensor([2 3], shape=(2,), dtype=int32)

    Args:
        probs: The probabilities of each event.
        logits: Event log probabilities (unnormalized)
        temperature: In case of using logits, this parameter can be used to determine
            the sharpness of the distribution. i.e.
            ``probs = softmax(logits / temperature)``. The temperature must be strictly
            positive. A low value (e.g. 1e-10) will result in argmax sampling while a
            larger value will result in uniform sampling.
    Nprobs	tf.Tensorlogitsr   c                    sJ   |d u |d u ksJ d|| _ || _tjj||d| _t j||d d S )Nz4Exactly one out of `probs` and `logits` must be set!)r;   r9   )r9   r;   tfpdistributionsOneHotCategoricalone_hotr   r   r   r9   r;   r   r   r   r   c   s   zTfCategorical.__init__r   c                 K   s4   t jj| jd ur| jnt | jt |t jd S )N)r;   labels)tfnn(sparse_softmax_cross_entropy_with_logitsr;   logr9   castint32r    r   r   r   r!   s   s   zTfCategorical.logpr   c                 C      t jj||dS )N)r9   r;   )r<   r=   Categoricalr@   r   r   r   r   |   s   z"TfCategorical._get_tf_distributionspacec                 K   s   t | tjjs	J t| jS r   )
isinstancegymspacesDiscreteintnrJ   r   r   r   r   required_input_dim   s   
z TfCategorical.required_input_dimr   c                 C   s"   | j |}t|| j | j S r   )r?   r+   rB   stop_gradientsr9   )r   r*   one_hot_sampler   r   r   r/      s   zTfCategorical.rsamplec                 K   s   t dd|i|S )Nr;   r   r8   clsr;   r   r   r   r   from_logits   s   zTfCategorical.from_logitsTfDeterministicc                 C   s.   | j d ur	| j }n| j}ttjj|dddS )Naxisloc)r9   r;   rY   rB   mathargmax)r   probs_or_logitsr   r   r   to_deterministic   s   
zTfCategorical.to_deterministic)NN)r9   r:   r;   r:   r   N)r9   r:   r;   r:   r   r   r   r   rY   )r1   r2   r3   r4   r   r   r   r   r   r!   r   staticmethodrL   SpacerO   rR   r/   classmethodrX   rb   r7   r   r   r   r   r8   ?   sB    "r8   c                	       s  e Zd ZdZee	d deeef de	eeef  f fddZ
eed!d	d
Zeededef fddZeedef fddZeedddef fddZeeedejdefddZeed"ddZeeededd fddZd#ddZ  ZS )$TfDiagGaussiana  Wrapper class for Normal distribution.

    Creates a normal distribution parameterized by :attr:`loc` and :attr:`scale`. In
    case of multi-dimensional distribution, the variance is assumed to be diagonal.

    .. testcode::
        :skipif: True

        m = TfDiagGaussian(loc=[0.0, 0.0], scale=[1.0, 1.0])
        m.sample(sample_shape=(2,))  # 2d normal dist with loc=0 and scale=1

    .. testoutput::

        tensor([[ 0.1046, -0.6120], [ 0.234, 0.556]])

    .. testcode::
        :skipif: True

        # scale is None
        m = TfDiagGaussian(loc=[0.0, 1.0])
        m.sample(sample_shape=(2,))  # normally distributed with loc=0 and scale=1

    .. testoutput::

        tensor([0.1046, 0.6120])


    Args:
        loc: mean of the distribution (often referred to as mu). If scale is None, the
            second half of the `loc` will be used as the log of scale.
        scale: standard deviation of the distribution (often referred to as sigma).
            Has to be positive.
    Nr^   scalec                    s   || _ t j||d d S Nr^   ri   )r^   r   r   r   r^   ri   r   r   r   r      s   zTfDiagGaussian.__init__r   r   c                 C   rH   rj   )r<   r=   Normalrl   r   r   r   r      s   z#TfDiagGaussian._get_tf_distributionr   c                       t jjt |ddS NrZ   r[   )rB   r_   
reduce_sumr   r!   )r   r   r   r   r   r!         zTfDiagGaussian.logpc                    s   t jjt  ddS ro   )rB   r_   rp   r   r#   r$   r   r   r   r#      s   zTfDiagGaussian.entropyr%   r   c                    rn   ro   )rB   r_   rp   r   r(   r'   r   r   r   r(      rq   zTfDiagGaussian.klrJ   c                 K   s,   t | tjjs	J ttj| jtjdd S )Ndtype   	rK   rL   rM   BoxrO   npprodshaperG   rQ   r   r   r   rR      s   z!TfDiagGaussian.required_input_dimr   c                 C   s    t j|}| jj|| jj  S r   )rB   randomnormalr   r^   ri   )r   r*   epsr   r   r   r/      s   zTfDiagGaussian.rsampler;   c                 K   s,   t j|ddd\}}t j|}t||dS )Nrt   rZ   )num_or_size_splitsr\   rk   )rB   splitr_   exprh   )rW   r;   r   r^   log_stdri   r   r   r   rX      s   zTfDiagGaussian.from_logitsrY   c                 C   s   t | jdS Nr]   )rY   r^   r$   r   r   r   rb      s   zTfDiagGaussian.to_deterministicr   r0   rc   rd   )r1   r2   r3   r4   r   r   r   floatr   r   r   r   r!   r#   r(   re   r   rL   rf   rO   rR   r/   rg   rX   rb   r7   r   r   r   r   rh      s4    "
rh   c                
       s,  e Zd ZdZeed! fddZeedd	d
eedf de	e
ee
e
f f fddZeedd	d
eedf de	e
ee
e
f f fddZeede
de
fddZeede
fddZeeddde
fddZeeedejdefddZeeede
dd fddZd"dd Z  ZS )#rY   a  The distribution that returns the input values directly.

    This is similar to DiagGaussian with standard deviation zero (thus only
    requiring the "mean" values as NN output).

    Note: entropy is always zero, ang logp and kl are not implemented.

    .. testcode::
        :skipif: True

        m = TfDeterministic(loc=tf.constant([0.0, 0.0]))
        m.sample(sample_shape=(2,))

    .. testoutput::

        Tensor([[ 0.0, 0.0], [ 0.0, 0.0]])

    Args:
        loc: the determinsitic value to return
    r^   r:   r   Nc                       t    || _d S r   )r   r   r^   )r   r^   r   r   r   r     s   

zTfDeterministic.__init__r   r)   r*   .c                K   s$   || j j }tj|| j jd| j  S Nrr   )r^   ry   rB   onesrs   )r   r*   r   ry   r   r   r   r+     s   zTfDeterministic.samplec                K   r,   r   r-   )r   r*   r   r   r   r   r/     s   zTfDeterministic.rsampler   c                 K   s   t | jS r   )rB   
zeros_liker^   r    r   r   r   r!     s   zTfDeterministic.logpc                 K      t d| jj d)Nz`entropy()` not supported for .RuntimeErrorr   r1   )r   r   r   r   r   r#   #  r"   zTfDeterministic.entropyr%   r   c                 K   r   )Nz`kl()` not supported for r   r   )r   r%   r   r   r   r   r(   '  r"   zTfDeterministic.klrJ   c                 K   s(   t | tjjs	J ttj| jtjdS r   ru   rQ   r   r   r   rR   +  s   z"TfDeterministic.required_input_dimr;   c                 K   s
   t |dS r   )rY   rV   r   r   r   rX   1  s   
zTfDeterministic.from_logitsc                 C   s   | S r   r   r$   r   r   r   rb   6  s   z TfDeterministic.to_deterministic)r^   r:   r   Nrd   )r1   r2   r3   r4   r   r   r   r   rO   r   r   r+   r/   r!   r#   r(   re   rL   rf   rR   rg   rX   rb   r7   r   r   r   r   rY      s>    
	
rY   c                       s   e Zd ZdZeedee f fddZeede	fddZ
eedd	d
Zeedejde	fddZeede	fddZeedede	fddZeeedejdefddZeeedejdee dd fddZdddZ  ZS )TfMultiCategoricalz>MultiCategorical distribution for MultiDiscrete action spaces.categoricalsc                    r   r   )r   r   _cats)r   r   r   r   r   r   >  s   

zTfMultiCategorical.__init__r   c                 C   s"   dd | j D }tj|dd}|S )Nc                 S      g | ]}|  qS r   r+   .0catr   r   r   
<listcomp>H      z-TfMultiCategorical.sample.<locals>.<listcomp>rZ   r[   r   rB   stack)r   arrsample_r   r   r   r+   F     zTfMultiCategorical.sampler   c                 C   s"   dd | j D }tj|dd}|S )Nc                 S   r   r   )r/   r   r   r   r   r   N  r   z.TfMultiCategorical.rsample.<locals>.<listcomp>rZ   r[   r   )r   r*   r   r   r   r   r   r/   L  r   zTfMultiCategorical.rsampler   c                 C   sB   t jt |t jdd}t dd t| j|D }t j|ddS )NrZ   r[   c                 S      g | ]	\}}| |qS r   )r!   )r   r   actr   r   r   r   U      z+TfMultiCategorical.logp.<locals>.<listcomp>r   )rB   unstackrF   rG   r   zipr   rp   )r   r   actionslogpsr   r   r   r!   R  s   zTfMultiCategorical.logpc                 C   s$   t jt jdd | jD ddddS )Nc                 S   r   r   r#   r   r   r   r   r   [  r   z.TfMultiCategorical.entropy.<locals>.<listcomp>rZ   r[   )rB   rp   r   r   r$   r   r   r   r#   X  s   zTfMultiCategorical.entropyr%   c                 C   s0   t jdd t| j|jD dd}t j|ddS )Nc                 S   r   r   r(   )r   r   oth_catr   r   r   r   a  r   z)TfMultiCategorical.kl.<locals>.<listcomp>rZ   r[   )rB   r   r   r   rp   )r   r%   klsr   r   r   r(   ^  s   zTfMultiCategorical.klrJ   c                 K   s"   t | tjjs	J tt| jS r   )rK   rL   rM   MultiDiscreterO   rw   sumnvecrQ   r   r   r   rR   e  s   z%TfMultiCategorical.required_input_dimr;   
input_lensc                 K   s$   dd t j||ddD }t|dS )a  Creates this Distribution from logits (and additional arguments).

        If you wish to create this distribution from logits only, please refer to
        `Distribution.get_partial_dist_cls()`.

        Args:
            logits: The tensor containing logits to be separated by logit_lens.
                child_distribution_cls_struct: A struct of Distribution classes that can
                be instantiated from the given logits.
            input_lens: A list of integers that indicate the length of the logits
                vectors to be passed into each child distribution.
            **kwargs: Forward compatibility kwargs.
        c                 S   s   g | ]}t |d qS ))r;   rU   )r   r;   r   r   r   r     s    z2TfMultiCategorical.from_logits.<locals>.<listcomp>rZ   r[   )r   )rB   r~   r   )rW   r;   r   r   r   r   r   r   rX   k  s   
zTfMultiCategorical.from_logitsTfMultiDistributionc                 C   s   t dd | jD S )Nc                 S   r   r   rb   r   r   r   r   r     r   z7TfMultiCategorical.to_deterministic.<locals>.<listcomp>)r   r   r$   r   r   r   rb     s   z#TfMultiCategorical.to_deterministicrc   r   r   )r1   r2   r3   r4   r   r   r   r8   r   r   r+   r/   rB   Tensorr!   r#   r(   re   rL   rf   rO   rR   rg   rX   rb   r7   r   r   r   r   r   :  s:    r   c                       s  e Zd ZdZdeeeef f fddZe	e
dddeedf d	eeeeef f fd
dZe	e
dd Ze	e
dd Ze	e
dd Ze	e
dd Zee	e
dejdee d	efddZee	e
dejdeeef deeee f dejd	d f
ddZdddZ  ZS )r   zGAction distribution that operates on multiple, possibly nested actions.child_distribution_structc                    s    t    || _t|| _dS )zInitializes a TfMultiDistribution object.

        Args:
            child_distribution_struct: Any struct
                that contains the child distribution classes to use to
                instantiate the child distributions from `logits`.
        N)r   r   _original_structtreeflatten_flat_child_distributions)r   r   r   r   r   r     s   
zTfMultiDistribution.__init__Nr)   r*   .r   c                K   s@   g }| j D ]}|jdd|i|}|| qt| j|}|S )Nr*   r   )r   r/   appendr   unflatten_asr   )r   r*   r   rsamplesdistr/   r   r   r   r/     s   
zTfMultiDistribution.rsamplec                 C   s   t |tjtjfrNg }| jD ]6}t |tr|d qt |tr)|t	|j
 q| }t	|jdkr:|d q|t|d  qtj||dd}nt|}dd }t||| j}t|S )N   r[   c                 S   s>   t |trt| jdkr| jd dkrtj| dd} || S )Nr   rZ   r[   )rK   r8   lenry   rB   squeezer!   )valr   r   r   r   map_  s   
z&TfMultiDistribution.logp.<locals>.map_)rK   rB   r   rw   ndarrayr   r8   r   r   r   r   r+   ry   r~   r   r   map_structurer   )r   r   split_indicesr   r+   split_valuer   
flat_logpsr   r   r   r!     s$   



zTfMultiDistribution.logpc                 C   s    dd t | j|jD }t|S )Nc                 S   r   r   r   )r   dor   r   r   r     s    z*TfMultiDistribution.kl.<locals>.<listcomp>)r   r   r   )r   r%   kl_listr   r   r   r(     s   zTfMultiDistribution.klc                 C   s   dd | j D }t|S )Nc                 S   r   r   r   )r   r   r   r   r   r     r   z/TfMultiDistribution.entropy.<locals>.<listcomp>)r   r   )r   entropy_listr   r   r   r#     s   zTfMultiDistribution.entropyc                 C   s    t | j| j}t dd |S )Nc                 S   s   |   S r   r   )sr   r   r   <lambda>  s    z,TfMultiDistribution.sample.<locals>.<lambda>)r   r   r   r   r   )r   child_distributions_structr   r   r   r+     s   zTfMultiDistribution.samplerJ   r   c                 K   s   t |S r   )r   )rJ   r   r   r   r   r   rR     s   z&TfMultiDistribution.required_input_dimr;   child_distribution_cls_structc                 K   sP   t |}t |}tj||dd}t dd |t|}	t ||	}
t|
dS )a  Creates this Distribution from logits (and additional arguments).

        If you wish to create this distribution from logits only, please refer to
        `Distribution.get_partial_dist_cls()`.

        Args:
            logits: The tensor containing logits to be separated by `input_lens`.
                child_distribution_cls_struct: A struct of Distribution classes that can
                be instantiated from the given logits.
            child_distribution_cls_struct: A struct of Distribution classes that can
                be instantiated from the given logits.
            input_lens: A list or dict of integers that indicate the length of each
                logit. If this is given as a dict, the structure should match the
                structure of child_distribution_cls_struct.
            space: The possibly nested output space.
            **kwargs: Forward compatibility kwargs.

        Returns:
            A TfMultiDistribution object.
        r   r[   c                 S   s
   |  |S r   )rX   )r   input_r   r   r   r     s   
 z1TfMultiDistribution.from_logits.<locals>.<lambda>)r   )r   r   rB   r~   r   listr   r   )rW   r;   r   r   rJ   r   
logit_lenschild_distribution_cls_listsplit_logitschild_distribution_listr   r   r   r   rX     s   

zTfMultiDistribution.from_logitsc                 C   s&   dd | j D }t| j|}t|S )Nc                 S   s   g | ]}|j qS r   r   )r   r   r   r   r   r   "  s    z8TfMultiDistribution.to_deterministic.<locals>.<listcomp>)r   r   r   r   r   )r   flat_deterministic_distsdeterministic_distsr   r   r   rb   !  s   z$TfMultiDistribution.to_deterministicr   )r1   r2   r3   r4   r   r   r   r   r   r   r   rO   r   r/   r!   r(   r#   r+   re   rL   rf   rR   rg   rB   r   r   rX   rb   r7   r   r   r   r   r     sJ    

(
	

 
.r   )"r4   	gymnasiumrL   r   numpyrw   typingr   r   r   r   r5   ray.rllib.models.distributionsr   ray.rllib.utils.annotationsr   r   ray.rllib.utils.frameworkr	   r
   ray.rllib.utils.typingr   r   r   _rB   r<   ABCr   r8   rh   rY   r   r   r   r   r   r   <module>   s0    %]RIP