o
    `۷ic%                  
   @   s   d dl mZ d dlmZmZmZ d dlZd dlZd dl	m
Z
mZ d dlmZ edededeeef fd	d
Zedeee
ejjef  dedeejj fddZedejjdee dedeejj fddZedee dejjdejjfddZdS )    )deque)ListTupleUnionN)BatchedNdArraybatch)DeveloperAPIepisode_lenTreturnc                 C   s   g }g }t | |}|| tdg| dg||   tj}|| | | }|dkrTt ||}|| tdg| dg||   tj}|| ||8 }|dks,||fS )a7  Creates loss mask and a seq_lens array, given an episode length and T.

    Args:
        episode_lens: A list of episode lengths to infer the loss mask and seq_lens
            array from.
        T: The maximum number of timesteps in each "row", also known as the maximum
            sequence length (max_seq_len). Episodes are split into chunks that are at
            most `T` long and remaining timesteps will be zero-padded (and masked out).

    Returns:
         Tuple consisting of a) list of the loss masks to use (masking out areas that
         are past the end of an episode (or rollout), but had to be zero-added due to
         the added extra time rank (of length T) and b) the list of sequence lengths
         resulting from splitting the given episodes into chunks of at most `T`
         timesteps.
       r   )minappendnparraybool_)r	   r
   maskseq_lenslen_rowoverflow	extra_row r   a/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/rllib/utils/postprocessing/zero_padding.pycreate_mask_and_seq_lens   s   

"


"
r   	item_listmax_seq_lenc                 C   sT  t dd | d }g }g }d}t| } t| dkr|  }|| }t |}g }	g }
d}|D ].}t|trV|
|d|  t||krJt|}q/|}|	||d  q/|
| d}q/||7 }|t 	||
 |	rv| 
t 	||	 ||kr|t|dd g }d}t| dks|dkr||k r||g||   |t|dd |S )a  Splits the contents of `item_list` into a new list of ndarrays and returns it.

    In the returned list, each item is one ndarray of len (axis=0) `max_seq_len`.
    The last item in the returned list may be (right) zero-padded, if necessary, to
    reach `max_seq_len`.

    If `item_list` contains one or more `BatchedNdArray` (instead of individual
    items), these will be split accordingly along their axis=0 to yield the returned
    structure described above.

    .. testcode::

        from ray.rllib.utils.postprocessing.zero_padding import (
            BatchedNdArray,
            split_and_zero_pad,
        )
        from ray.rllib.utils.test_utils import check

        # Simple case: `item_list` contains individual floats.
        check(
            split_and_zero_pad([0, 1, 2, 3, 4, 5, 6, 7], 5),
            [[0, 1, 2, 3, 4], [5, 6, 7, 0, 0]],
        )

        # `item_list` contains BatchedNdArray (ndarrays that explicitly declare they
        # have a batch axis=0).
        check(
            split_and_zero_pad([
                BatchedNdArray([0, 1]),
                BatchedNdArray([2, 3, 4, 5]),
                BatchedNdArray([6, 7, 8]),
            ], 5),
            [[0, 1, 2, 3, 4], [5, 6, 7, 8, 0]],
        )

    Args:
        item_list: A list of individual items or BatchedNdArrays to be split into
            `max_seq_len` long pieces (the last of which may be zero-padded).
        max_seq_len: The maximum length of each item in the returned list.

    Returns:
        A list of np.ndarrays (all of length `max_seq_len`), which contains the same
        data as `item_list`, but split into sub-chunks of size `max_seq_len`.
        The last item in the returned list may be zero-padded, if necessary.
    c                 S   s    t t| tr| d gS | S )Nr   )r   
zeros_like
isinstancer   )sr   r   r   <lambda>d   s     z$split_and_zero_pad.<locals>.<lambda>r   Nr   auto)'individual_items_already_have_batch_dim)treemap_structurer   lenpopleftflattenr   r   r   unflatten_as
appendleftr   extend)r   r   zero_elementretcurrent_time_row	current_titemt	item_flatitem_list_appendcurrent_time_row_flat_itemsadd_to_current_titmr   r   r   split_and_zero_pad1   sX   2	



+
r6   nd_arrayepisode_lensc                 C   sD   g }d}|D ]}t | |||  }|t|g| ||7 }q|S )a  Splits and zero-pads a single np.ndarray based on episode lens and a maxlen.

    Args:
        nd_array: The single np.ndarray to be split into n chunks, based on the given
            `episode_lens` and the `max_seq_len` argument. For example, if `nd_array`
            has a batch dimension (axis 0) of 21, `episode_lens` is [15, 3, 3], and
            `max_seq_len` is 6, then the returned list would have np.ndarrays in it of
            batch dimensions (axis 0): [6, 6, 6 (zero-padded), 6 (zero-padded),
            6 (zero-padded)].
            Note that this function doesn't work on nested data, such as dicts of
            ndarrays.
        episode_lens: A list of episode lengths along which to split and zero-pad the
            given `nd_array`.
        max_seq_len: The maximum sequence length to split at (and zero-pad).

    Returns: A list of n np.ndarrays, resulting from splitting and zero-padding the
        given `nd_array`.
    r   )r   r*   r6   )r7   r8   r   r,   cursorr	   itemsr   r   r   split_and_zero_pad_n_episodes   s   
r;   datac           	      C   s   t |jdkr	|S t |jdksJ g }d}|jd }| D ].}t||\}}t|D ]}|||  |d7 }q*|dkrK|||d|f  |d7 }qt|S )a  Removes right-side zero-padding from data based on `episode_lens`.

    ..testcode::

        from ray.rllib.utils.postprocessing.zero_padding import unpad_data_if_necessary
        import numpy as np

        unpadded = unpad_data_if_necessary(
            episode_lens=[4, 2],
            data=np.array([
                [2, 4, 5, 3, 0, 0, 0, 0],
                [-1, 3, 0, 0, 0, 0, 0, 0],
            ]),
        )
        assert (unpadded == [2, 4, 5, 3, -1, 3]).all()

        unpadded = unpad_data_if_necessary(
            episode_lens=[1, 5],
            data=np.array([
                [2, 0, 0, 0, 0],
                [-1, -2, -3, -4, -5],
            ]),
        )
        assert (unpadded == [2, -1, -2, -3, -4, -5]).all()

    Args:
        episode_lens: A list of actual episode lengths.
        data: A 2D np.ndarray with right-side zero-padded rows.

    Returns:
        A 1D np.ndarray resulting from concatenation of the un-padded
        input data along the 0-axis.
    r      r   N)r%   shapedivmodranger   r   concatenate)	r8   r<   new_datarow_idxr
   r   num_rowscol_idxir   r   r   unpad_data_if_necessary   s    '


rG   )collectionsr   typingr   r   r   numpyr   r#   "ray.rllib.utils.spaces.space_utilsr   r   ray.util.annotationsr   intr   _typingNDArrayfloatr6   r;   rG   r   r   r   r   <module>   sD     %
u
"