o
    $i8N                     @   s  d dl mZ d dlmZ d dlmZmZ d dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZmZmZ e \ZZZe \ZZd
ZdZdZ dZ!eeddddKde"de"dej#fddZ$eeddd	dLdeej# dee% dej#fddZ&edMdede%defddZ'e		dNdej#d ej#d!eej# d"ee( dej#f
d#d$Z)e			dOd%ed&ee d'e%d(e%def
d)d*Z*ed+d, Z+edPdej#d.e,dej#fd/d0Z-edej#dej#fd1d2Z.e				-dQd ej#d!eej# d3eej# de%d4e,f
d5d6Z/ed d-d7ej0fdeee"f d8e"d9e,d:e,d;e1dej#fd<d=Z2eee" fd>d?Z3edRdej#d@e,dej#fdAdBZ4edSdej#dCe%dej#fdDdEZ5e	dTdeej#e6f dGe"dHee, dej#fdIdJZ7dS )U    )OrderedDict)MappingProxyType)ListOptionalN)DiscreteMultiDiscrete)
Deprecated)	PublicAPI)try_import_tftry_import_torch)SpaceStructTensorStructType
TensorTypeUniongư>i    z)RLlib itself has no use for this anymore.F)helperror@   sizealignreturnc                 C   s   | |j  }tj||d  tjd}|jj| }|dkrdn|| }|dkr5|||d  dd |}n||||  |}t|| ksLJ t||jj| dksZJ |jj|S )a  Returns an array of a given size that is 64-byte aligned.

    The returned array can be efficiently copied into GPU memory by TensorFlow.

    Args:
        size: The size (total number of items) of the array. For example,
            array([[0.0, 1.0], [2.0, 3.0]]) would have size=4.
        dtype: The numpy dtype of the array.
        align: The alignment to use.

    Returns:
        A np.ndarray with the given specifications.
       dtyper   )itemsizenpemptyuint8ctypesdataviewlen)r   r   r   nr   
data_alignoffsetoutput r'   R/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/rllib/utils/numpy.pyaligned_array   s   
 r)   items
time_majorc                 C   sv  t | dkrg S t | dkr| d S t| d tjr| d jtjtjtjfv r| d j}tt	dd | D |}|durx|du r\t	dd | D }| d j
d |f| d j
dd  }n1t	d	d | D }|| d j
d f| d j
dd  }nt	d
d | D }|f| d j
dd  }||}|jjd dksJ |jjtj| ||rdndd |S tj| |rddS ddS )a  Concatenate arrays, ensuring the output is 64-byte aligned.

    We only align float arrays; other arrays are concatenated as normal.

    This should be used instead of np.concatenate() to improve performance
    when the output array is likely to be fed into TensorFlow.

    Args:
        items: The list of items to concatenate and align.
        time_major: Whether the data in items is time-major, in which
            case, we will concatenate along axis=1.

    Returns:
        The concat'd and aligned array.
    r   r   c                 s   s    | ]}|j V  qd S N)r   .0sr'   r'   r(   	<genexpr>a   s    z!concat_aligned.<locals>.<genexpr>NTc                 s       | ]}|j d  V  qdS )r   Nshaper-   r'   r'   r(   r0   d       r   c                 s   r1   r   Nr2   r-   r'   r'   r(   r0   i   r4   c                 s   r1   r5   r2   r-   r'   r'   r(   r0   n   r4   r   )outaxisr7   )r"   
isinstancer   ndarrayr   float32float64r   r)   sumr3   reshaper   r    concatenate)r*   r+   r   flat	batch_dim	new_shaper&   r'   r'   r(   concat_aligned=   s@   

rC   Txreduce_typec                    s    fdd}t || S )a  Converts values in `stats` to non-Tensor numpy or python types.

    Args:
        x: Any (possibly nested) struct, the values in which will be
            converted and returned as a new struct with all torch/tf tensors
            being converted to numpy types.
        reduce_type: Whether to automatically reduce all float64 and int64 data
            into float32 and int32 data, respectively.

    Returns:
        A new struct with the same structure as `x`, but with all
        values converted to numpy arrays (on CPU).
    c                    s   t rt| t jrt|  dkr|   n|    }nt	r:t| t	jt	j
fr:t| dr:t	 s5J |  }n| } rat|tjrat|jtjrT|tj}|S t|jtra|tj}|S )Nr   numpy)torchr9   Tensorr"   r   cpuitemdetachrF   tfVariablehasattrexecuting_eagerlyr   r:   
issubdtyper   floatingastyper;   intint32)rJ   retrE   r'   r(   mapping   s*   
z!convert_to_numpy.<locals>.mapping)treemap_structure)rD   rE   rW   r'   rV   r(   convert_to_numpyx   s   rZ   weightsbiases	frameworkc                 C   st   d
dd}|| } |dko | j d |j d ko | j d |j d k}|||d}||}t| ||du r7d	 S | S )a  Calculates FC (dense) layer outputs given weights/biases and input.

    Args:
        x: The input to the dense layer.
        weights: The weights matrix.
        biases: The biases vector. All 0s if None.
        framework: An optional framework hint (to figure out,
            e.g. whether to transpose torch weight matrices).

    Returns:
        The dense layer's output.
    Fc                 S   sR   t rt| t jr|    } tr t r t| tjr |  } |r't	
| } | S r,   )rG   r9   rH   rI   rK   rF   rL   rO   rM   r   	transpose)r    r^   r'   r'   r(   map_   s   
zfc.<locals>.map_rG   r   r   )r^   N        F)r3   r   matmul)rD   r[   r\   r]   r_   r^   r'   r'   r(   fc   s   
&rc   inputsspaces_struct	time_axis
batch_axisc                    s  |r|sJ t | }|durt |ndgt| }d}d}g }t||D ]\ }	|du r<|r< jd }|r< jd }t|	tr[|rLt || g |	t
 |	jdtj q&t|	tr|rlt || dg |r|	tj fddt|	jD dd q&|	tj fd	dt|	jD dd q&t trt g |rt || dg n|rt |dg nt dg |	 tj q&tj|dd}
|rt|
||dg}
|
S )
a	  Flattens arbitrary input structs according to the given spaces struct.

    Returns a single 1D tensor resulting from the different input
    components' values.

    Thereby:
    - Boxes (any shape) get flattened to (B, [T]?, -1). Note that image boxes
    are not treated differently from other types of Boxes and get
    flattened as well.
    - Discrete (int) values are one-hot'd, e.g. a batch of [1, 0, 3] (B=3 with
    Discrete(4) space) results in [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1]].
    - MultiDiscrete values are multi-one-hot'd, e.g. a batch of
    [[0, 2], [1, 4]] (B=2 with MultiDiscrete([2, 5]) space) results in
    [[1, 0,  0, 0, 1, 0, 0], [0, 1,  0, 0, 0, 0, 1]].

    Args:
        inputs: The inputs to be flattened.
        spaces_struct: The (possibly nested) structure of the spaces that `inputs`
            belongs to.
        time_axis: Whether all inputs have a time-axis (after the batch axis).
            If True, will keep not only the batch axis (0th), but the time axis
            (1st) as-is and flatten everything from the 2nd axis up.
        batch_axis: Whether all inputs have a batch axis.
            If True, will keep that batch axis as-is and flatten everything from the
            other dims up.

    Returns:
        A single 1D tensor resulting from concatenating all
        flattened/one-hot'd input components. Depending on the time_axis flag,
        the shape is (B, n) or (B, T, n).

    .. testcode::
        :skipif: True

        # B=2
        from ray.rllib.utils.tf_utils import flatten_inputs_to_1d_tensor
        from gymnasium.spaces import Discrete, Box
        out = flatten_inputs_to_1d_tensor(
            {"a": [1, 0], "b": [[[0.0], [0.1]], [1.0], [1.1]]},
            spaces_struct=dict(a=Discrete(2), b=Box(shape=(2, 1)))
        )
        print(out)

        # B=2; T=2
        out = flatten_inputs_to_1d_tensor(
            ([[1, 0], [0, 1]],
             [[[0.0, 0.1], [1.0, 1.1]], [[2.0, 2.1], [3.0, 3.1]]]),
            spaces_struct=tuple([Discrete(2), Box(shape=(2, ))]),
            time_axis=True
        )
        print(out)

    .. testoutput::

        [[0.0, 1.0,  0.0, 0.1], [1.0, 0.0,  1.0, 1.1]]  # B=2 n=4
        [[[0.0, 1.0, 0.0, 0.1], [1.0, 0.0, 1.0, 1.1]],
        [[1.0, 0.0, 2.0, 2.1], [0.0, 1.0, 3.0, 3.1]]]  # B=2 T=2 n=4
    Nr   r   depthc                    s0   g | ]\}}t  d d |f |dtjqS )Nrh   one_hotrR   r   r;   r.   ir#   input_r'   r(   
<listcomp>-  s    z/flatten_inputs_to_1d_tensor.<locals>.<listcomp>r8   c                    s(   g | ]\}}t  | |d tjqS )rh   rk   rm   ro   r'   r(   rq   7  s    )rX   flattenr"   zipr3   r9   r   r   r>   appendrl   r#   rR   r;   r   r?   	enumeratenvecfloatarray)rd   re   rf   rg   flat_inputsflat_spacesBTr6   spacemergedr'   ro   r(   flatten_inputs_to_1d_tensor   sd   B









r   c                 C   sH   t | tjr| jdd | S t | trtt| S t | tr"t| S | S )a{  Flags actions immutable to notify users when trying to change them.

    Can also be used with any tree-like structure containing either
    dictionaries, numpy arrays or already immutable objects per se.
    Note, however that `tree.map_structure()` will in general not
    include the shallow object containing all others and therefore
    immutability will hold only for all objects contained in it.
    Use `tree.traverse(fun, action, top_down=False)` to include
    also the containing object.

    Args:
        obj: The object to be made immutable.

    Returns:
        The immutable object.

    .. testcode::
        :skipif: True

        import tree
        import numpy as np
        from ray.rllib.utils.numpy import make_action_immutable
        arr = np.arange(1,10)
        d = dict(a = 1, b = (arr, arr))
        tree.traverse(make_action_immutable, d, top_down=False)
    F)write)r9   r   r:   setflagsr   r   dict)objr'   r'   r(   make_action_immutableS  s   

r         ?deltac                 C   s6   t t | |k t | dd |t | d|   S )z4Reference: https://en.wikipedia.org/wiki/Huber_loss.       @g      ?)r   whereabspower)rD   r   r'   r'   r(   
huber_lossz  s   .r   c                 C   s   t t | d S )zComputes half the L2 norm of a tensor (w/o the sqrt): sum(x**2) / 2.

    Args:
        x: The input tensor.

    Returns:
        The l2-loss output according to the above formula given `x`.
    r   )r   r=   square)rD   r'   r'   r(   l2_loss  s   
r   initial_internal_statesforget_biasc                 C   s  | j |rdnd }| j |rdnd }|j d d }|du r.tj||fd}	tj||fd}
n|d }	|d }
|rBtj|||fd}n	tj|||fd}t|D ]}|r^| |ddddf n
| dd|ddf }tj||
fdd}t||| }t|dd|d |d f | }t|	|}	t|ddd|f }t|dd||d f }t	|	t||}	t|dd|d |d f }t|t|	}
|r|
||ddddf< qO|
|dd|ddf< qO||	|
ffS )	a  Calculates LSTM layer output given weights/biases, states, and input.

    Args:
        x: The inputs to the LSTM layer including time-rank
            (0th if time-major, else 1st) and the batch-rank
            (1st if time-major, else 0th).
        weights: The weights matrix.
        biases: The biases vector. All 0s if None.
        initial_internal_states: The initial internal
            states to pass into the layer. All 0s if None.
        time_major: Whether to use time-major or not. Default: False.
        forget_bias: Gets added to first sigmoid (forget gate) output.
            Default: 1.0.

    Returns:
        Tuple consisting of 1) The LSTM layer's output and
        2) Tuple: Last (c-state, h-state).
    r   r      Nr2   r8   r      )
r3   r   zerosranger?   rb   sigmoidmultiplytanhadd)rD   r[   r\   r   r+   r   sequence_length
batch_sizeunitsc_statesh_statesunrolled_outputstinput_matrixinput_matmul_matrix	sigmoid_1	sigmoid_2tanh_3	sigmoid_4r'   r'   r(   lstm  s4   0$ r   r`   ri   on_value	off_valuer   c                 C   sD  t | trtj| tjd} ntrt | tjr|  } | jtj	kr(| 
tj} d}|dkr3t| d }t| |k sEJ dt| || j}tjg ||R d| }g }t| jD ]5}dg| j }	dg| j }
d|
|< t|| |
}|dkr||d  |	|d < t||	}|| q\||  ||t|< |
|S )a  One-hot utility function for numpy.

    Thanks to qianyizhang:
    https://gist.github.com/qianyizhang/07ee1c15cad08afb03f5de69349efc30.

    Args:
        x: The input to be one-hot encoded.
        depth: The max. number to be one-hot encoded (size of last rank).
        on_value: The value to use for on. Default: 1.0.
        off_value: The value to use for off. Default: 0.0.

    Returns:
        The one-hot encoded equivalent of the input array.
    r   r   r   r   z<ERROR: The max. index of `x` ({}) is larger than depth ({})!r2   rj   )r9   rS   r   rx   rT   rG   rH   rF   r   bool_rR   int_maxformatr3   onesr   ndimaranger>   tilert   tuple)rD   ri   r   r   r   r3   r6   indicesrn   tilesr/   rr'   r'   r(   rl     s:   



rl   c                    s@   t rtt jr j tj fddt|D ddS )Nc                    sD   g | ]\}}t t d kr| ndd|f |dtjqS )r   Nrh   )rl   r"   rR   r   r;   rm   r3   rD   r'   r(   rq     s    2z)one_hot_multidiscrete.<locals>.<listcomp>rj   r8   )rG   r9   rH   rF   r3   r   r?   ru   )rD   depthsr'   r   r(   one_hot_multidiscrete  s   r   alphac                 C   s   t | | | | S )zImplementation of the leaky ReLU function.

    y = x * alpha if x < 0 else x

    Args:
        x: The input values.
        alpha: A scaling ("leak") factor to use for negative x.

    Returns:
        The leaky ReLU output for x.
    )r   maximum)rD   r   r'   r'   r(   relu"  s   r   
derivativec                 C   s$   |r| d|   S ddt |    S )aY  
    Returns the sigmoid function applied to x.
    Alternatively, can return the derivative or the sigmoid function.

    Args:
        x: The input to the sigmoid function.
        derivative: Whether to return the derivative or not.
            Default: False.

    Returns:
        The sigmoid function (or its derivative) applied to x.
    r   )r   exp)rD   r   r'   r'   r(   r   2  s   r   rj   r7   epsilonc                 C   s.   |pt }t| }t|tj||dd |S )a{  Returns the softmax values for x.

    The exact formula used is:
    S(xi) = e^xi / SUMj(e^xj), where j goes over all elements in x.

    Args:
        x: The input to the softmax function.
        axis: The axis along which to softmax.
        epsilon: Optional epsilon as a minimum value. If None, use
            `SMALL_NUMBER`.

    Returns:
        The softmax over x.
    T)keepdims)SMALL_NUMBERr   r   r   r=   )rD   r7   r   x_expr'   r'   r(   softmaxF  s   
r   )r   r,   )T)NN)NFT)r   )NNFr   )r`   ra   )rj   N)8collectionsr   typesr   typingr   r   rF   r   rX   gymnasium.spacesr   r   ray._common.deprecationr   ray.rllib.utils.annotationsr	   ray.rllib.utils.frameworkr
   r   ray.rllib.utils.typingr   r   r   r   tf1rL   tfvrG   _r   LARGE_INTEGERMIN_LOG_NN_OUTPUTMAX_LOG_NN_OUTPUTrS   r:   r)   boolrC   rZ   strrc   r   r   rw   r   r   r   r;   typerl   r   r   r   listr   r'   r'   r'   r(   <module>   s    
6)) 
&E
<