o
    bi7                     @   sj  d dl Z d dlZd dlmZmZmZmZmZmZm	Z	 d dl
Zd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZmZ d d	lm Z  zd d
lm!Z! W n e"ym   d dlm#Z! Y nw dee$ fddZ%dee$ fddZ&dZ'e(e'ddkreej)edk rd dl*m+Z+ e+Z,nd dl*m-Z- e-Z,e,j.e,_/edeej)  kredk rn ne%e,_.ne&e,_.de,_0e ddej1j2j3G dd dej1j2j4Z5G dd dej1j2j6Z7G dd  d Z8e ddG d!d" d"e7e8Z9e ddG d#d$ d$ej1j2j:e7e8Z;e9<  e9=  e9>  e;<  e;=  e;>  e ddd%ej?de@fd&d'ZAdS )(    N)AnyCallableListOptionalSequenceTupleUnion)Version)Dtype)set_function_name)ABCDataFrame	ABCSeries)check_array_indexervalidate_indices)_create_possibly_ragged_ndarray"_is_ndarray_variable_shaped_tensor)	PublicAPI)ABCIndex)ABCIndexClassreturnc           
         s   ddl m} ddlm  tjts S |jdd}t	|}|j
dkr+ S  fdd}j}|d u r?|jdd	}|d
}t	|||}|jjrRdnd}|j|j|d}	||	d S )Nr   extract_arrayformat_arrayTextract_numpy   c                    s0    | |j jjjjjjjd
}|S )Nfloat_formatna_repdigitsspacejustifydecimalleading_spacequotingr   array_
formatter_
fmt_valuesr   self Y/home/ubuntu/.local/lib/python3.10/site-packages/ray/air/util/tensor_extensions/pandas.pyformat_array_wrapM   s   z2_format_strings_patched.<locals>.format_array_wrapboxedKFCorder)pandas.core.constructionr   pandas.io.formats.formatr   
isinstancevaluesTensorArray_format_strings_orignpasarrayndim	formatter
_formatterravelflagsf_contiguousreshapeshape)
r+   r   r9   arrayr.   flat_formatter
flat_arrayfmt_flat_arrayr5   	fmt_arrayr,   r*   r-   _format_strings_patched@   s"   



rK   c                    s   ddl m ddlm} ddlm  ddlm tj	t
s" S |j	dd}t|}|jdkr7 S  fd	d
}j}|d u rK|jdd}|d}t|||}|jjr^dnd}|j|j|d}	fdd}
|
|	jS )Nr   )partialr   r   )pprint_thingTr   r   c                    s,    | |j jjjjjjd	}|S )Nr   r   r    r!   r"   r#   r$   rN   r&   r*   r,   r-   r.   z   s   z9_format_strings_patched_v1_0_0.<locals>.format_array_wrapr/   r1   r2   r3   r4   c                    sP   dd  fdd}g }| D ]}|du rdnd}| |j||d q|S )	N)	
)escape_charsc                    s   t  | S Nstr)xr?   r,   r-   _format   s   zL_format_strings_patched_v1_0_0.<locals>.format_strings_slim.<locals>._formatFz{v}z {v})v)appendformat)r'   r$   rX   r)   rY   tpl)rL   rM   rW   r-   format_strings_slim   s   z;_format_strings_patched_v1_0_0.<locals>.format_strings_slim)	functoolsrL   r6   r   r7   r   pandas.io.formats.printingrM   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   r$   )r+   r   r9   rF   r.   rG   rH   rI   r5   rJ   r]   r,   )r   rL   rM   r+   r-   _format_strings_patched_v1_0_0j   s(   


r`   )TENSOR_COLUMN_EXTENSION_FORMATTER_ENABLED1z2.2.0)ExtensionArrayFormatter)_ExtensionArrayFormatterz1.1.0z1.3.0Tbeta)	stabilityc                   @   s   e Zd ZdZdZdeee df dej	fddZ
edd	 Zed
d Zedd Zedd ZedefddZedefddZedd Zdeejejf fddZdefddZdefddZedd  ZdS )!TensorDtypea  
    Pandas extension type for a column of homogeneous-typed tensors.

    This extension supports tensors in which the elements have different shapes.
    However, each tensor element must be non-ragged, i.e. each tensor element must have
    a well-defined, non-ragged shape.

    See:
    https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/base.py
    for up-to-date interface documentation and the subclassing contract. The
    docstrings of the below properties and methods were copied from the base
    ExtensionDtype.

    Examples:
        >>> # Create a DataFrame with a list of ndarrays as a column.
        >>> import pandas as pd
        >>> import numpy as np
        >>> import ray
        >>> df = pd.DataFrame({
        ...     "one": [1, 2, 3],
        ...     "two": list(np.arange(24).reshape((3, 2, 2, 2)))})
        >>> # Note the opaque np.object dtype for this column.
        >>> df.dtypes # doctest: +SKIP
        one     int64
        two    object
        dtype: object
        >>> # Cast column to our TensorDtype extension type.
        >>> from ray.data.extensions import TensorDtype
        >>> df["two"] = df["two"].astype(TensorDtype(np.int64, (3, 2, 2, 2)))
        >>> # Note that the column dtype is now TensorDtype instead of
        >>> # np.object.
        >>> df.dtypes # doctest: +SKIP
        one          int64
        two    TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
        dtype: object
        >>> # Pandas is now aware of this tensor column, and we can do the
        >>> # typical DataFrame operations on this column.
        >>> col = 2 * (df["two"] + 10)
        >>> # The ndarrays underlying the tensor column will be manipulated,
        >>> # but the column itself will continue to be a Pandas type.
        >>> type(col) # doctest: +SKIP
        pandas.core.series.Series
        >>> col # doctest: +SKIP
        0   [[[ 2  4]
              [ 6  8]]
             [[10 12]
               [14 16]]]
        1   [[[18 20]
              [22 24]]
             [[26 28]
              [30 32]]]
        2   [[[34 36]
              [38 40]]
             [[42 44]
              [46 48]]]
        Name: two, dtype: TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
        >>> # Once you do an aggregation on that column that returns a single
        >>> # row's value, you get back our TensorArrayElement type.
        >>> tensor = col.mean()
        >>> type(tensor) # doctest: +SKIP
        ray.data.extensions.tensor_extension.TensorArrayElement
        >>> tensor # doctest: +SKIP
        array([[[18., 20.],
                [22., 24.]],
               [[26., 28.],
                [30., 32.]]])
        >>> # This is a light wrapper around a NumPy ndarray, and can easily
        >>> # be converted to an ndarray.
        >>> type(tensor.to_numpy()) # doctest: +SKIP
        numpy.ndarray
        >>> # In addition to doing Pandas operations on the tensor column,
        >>> # you can now put the DataFrame into a Dataset.
        >>> ds = ray.data.from_pandas(df) # doctest: +SKIP
        >>> # Internally, this column is represented the corresponding
        >>> # Arrow tensor extension type.
        >>> ds.schema() # doctest: +SKIP
        one: int64
        two: extension<arrow.py_extension_type<ArrowTensorType>>
        >>> # You can write the dataset to Parquet.
        >>> ds.write_parquet("/some/path") # doctest: +SKIP
        >>> # And you can read it back.
        >>> read_ds = ray.data.read_parquet("/some/path") # doctest: +SKIP
        >>> read_ds.schema() # doctest: +SKIP
        one: int64
        two: extension<arrow.py_extension_type<ArrowTensorType>>
        >>> read_df = ray.get(read_ds.to_pandas_refs())[0] # doctest: +SKIP
        >>> read_df.dtypes # doctest: +SKIP
        one          int64
        two    TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
        dtype: object
        >>> # The tensor extension type is preserved along the
        >>> # Pandas --> Arrow --> Parquet --> Arrow --> Pandas
        >>> # conversion chain.
        >>> read_df.equals(df) # doctest: +SKIP
        True
    NrE   .dtypec                 C   s   || _ || _d S rS   _shape_dtype)r+   rE   rh   r,   r,   r-   __init__&  s   
zTensorDtype.__init__c                 C      t S )a   
        The scalar type for the array, e.g. ``int``
        It's expected ``ExtensionArray[item]`` returns an instance
        of ``ExtensionDtype.type`` for scalar ``item``, assuming
        that value is valid (not NA). NA values do not need to be
        instances of `type`.
        )TensorArrayElementr+   r,   r,   r-   type*     	zTensorDtype.typec                 C      | j S )z>
        The dtype of the underlying tensor elements.
        )rk   ro   r,   r,   r-   element_dtype5  s   zTensorDtype.element_dtypec                 C   rr   )z
        The shape of the underlying tensor elements. This will be a tuple of Nones if
        the corresponding TensorArray for this TensorDtype holds variable-shaped tensor
        elements.
        )rj   ro   r,   r,   r-   element_shape<  s   zTensorDtype.element_shapec                 C   s   t dd | jD S )z{
        Whether the corresponding TensorArray for this TensorDtype holds variable-shaped
        tensor elements.
        c                 s   s    | ]}|d u V  qd S rS   r,   ).0dim_sizer,   r,   r-   	<genexpr>K  s    z1TensorDtype.is_variable_shaped.<locals>.<genexpr>)allrE   ro   r,   r,   r-   is_variable_shapedE  s   zTensorDtype.is_variable_shapedr   c                 C   s   d| j  d| j dS )zp
        A string identifying the data type.
        Will be used for display in, e.g. ``Series.dtype``
        znumpy.ndarray(shape=z, dtype=)ri   ro   r,   r,   r-   nameM  s   zTensorDtype.namestringc                 C   s   ddl }ddl}t|tstdt| d}|||}d| j d| d}|du r0t|| }t	|dkr>t||\}}	}
|
|	}	t|
}
| |	|
S )	a=  
        Construct this type from a string.

        This is useful mainly for data types that accept parameters.
        For example, a period dtype accepts a frequency parameter that
        can be set as ``period[H]`` (where H means hourly frequency).

        By default, in the abstract class, just the name of the type is
        expected. But subclasses can overwrite this method to accept
        parameters.

        Parameters
        ----------
        string : str
            The name of the type, for example ``category``.

        Returns
        -------
        ExtensionDtype
            Instance of the dtype.

        Raises
        ------
        TypeError
            If a class cannot be constructed from this 'string'.

        Examples
        --------
        For extension dtypes with arguments the following may be an
        adequate implementation.

        >>> import re
        >>> @classmethod
        ... def construct_from_string(cls, string):
        ...     pattern = re.compile(r"^my_type\[(?P<arg_name>.+)\]$")
        ...     match = pattern.match(string)
        ...     if match:
        ...         return cls(**match.groupdict())
        ...     else:
        ...         raise TypeError(
        ...             f"Cannot construct a '{cls.__name__}' from '{string}'"
        ...         )
        r   Nz.'construct_from_string' expects a string, got zP^(TensorDtype|numpy.ndarray)\(shape=(\((?:(?:\d+|None),?\s?)*\)), dtype=(\w+)\)$zCannot construct a 'z' from 'zF'; expected a string like 'TensorDtype(shape=(1, 2, 3), dtype=int64)'.   )astrer8   rU   	TypeErrorrp   search__name__groupslenliteral_evalr<   rh   )clsr|   r~   r   regexmerr_msgr   _rE   rh   r,   r,   r-   construct_from_stringU  s(   -




z!TensorDtype.construct_from_stringc                 C   rm   )zq
        Return the array type associated with this dtype.

        Returns
        -------
        type
        )r:   )r   r,   r,   r-   construct_array_type  rq   z TensorDtype.construct_array_typerF   c                 C   s\   t |tjr&|jdkrtdd | D }t	|S |d }t	|S | }t	|S )a|  
        Convert a pyarrow (chunked) array to a TensorArray.

        This and TensorArray.__arrow_array__ make up the
        Pandas extension type + array <--> Arrow extension type + array
        interoperability protocol. See
        https://pandas.pydata.org/pandas-docs/stable/development/extending.html#compatibility-with-apache-arrow
        for more information.
        r   c                 S   s   g | ]}|  qS r,   )to_numpy)ru   chunkr,   r,   r-   
<listcomp>      z.TensorDtype.__from_arrow__.<locals>.<listcomp>r   )
r8   paChunkedArray
num_chunksr<   concatenate
iterchunksr   r   r:   )r+   rF   r9   r,   r,   r-   __from_arrow__  s   

zTensorDtype.__from_arrow__c                 C   rr   rS   )r{   ro   r,   r,   r-   __str__  s   zTensorDtype.__str__c                 C   s   t | S rS   rT   ro   r,   r,   r-   __repr__  s   zTensorDtype.__repr__c                 C   s   ddl m} || jS )B  
        Whether this extension array should be considered boolean.

        By default, ExtensionArrays are assumed to be non-numeric.
        Setting this to True will affect the behavior of several places,
        e.g.

        * is_bool
        * boolean indexing

        Returns
        -------
        bool
        r   )is_bool_dtype)pandas.core.dtypes.commonr   rk   )r+   r   r,   r,   r-   _is_boolean  s   
zTensorDtype._is_boolean)r   
__module____qualname____doc__baser   r   intr<   rh   rl   propertyrp   rs   rt   ry   rU   r{   classmethodr   r   r   r   Arrayr   r   r   r   r   r,   r,   r,   r-   rg      s.    d 




H

rg   c                   @   s*   e Zd ZdZedddZedd ZdS )	_TensorOpsMixinze
    Mixin for TensorArray operator support, applying operations on the
    underlying ndarrays.
    TNc                    s*    fdd}dj  dt| S )zc
        Add support for binary operators by unwrapping, applying, and
        rewrapping.
        c                    s   | j }t|tttfrtS dv rtt|ttfr|j }n|}||}t| tr:t|tr4t	
|s:t|}|S  |}|S )N)
__divmod____rdivmod__)_tensorr8   r   r   r   NotImplementedNotImplementedErrorr:   rn   r<   isscalar)r+   otherlvaluesrvaluesresultresult_wrappedr   opop_namer,   r-   _binop  s$   

z._TensorOpsMixin._create_method.<locals>._binop__)r   r   )r   r   coerce_to_dtyperesult_dtyper   r,   r   r-   _create_method  s   
z_TensorOpsMixin._create_methodc                 C   s
   |  |S rS   )r   )r   r   r,   r,   r-   _create_logical_method  s   
z&_TensorOpsMixin._create_logical_method)TN)r   r   r   r   r   r   r   r,   r,   r,   r-   r     s    *r   c                   @   sP   e Zd ZdZdeegef fddZdd Zdd Zd	d
 Z	dd Z
dd ZdS )_TensorScalarCastMixinzH
    Mixin for casting scalar tensors to a particular numeric type.
    funcc                 C   s
   || j S rS   r   )r+   r   r,   r,   r-   _scalarfunc     
z"_TensorScalarCastMixin._scalarfuncc                 C   
   |  tS rS   )r   complexro   r,   r,   r-   __complex__  r   z"_TensorScalarCastMixin.__complex__c                 C   r   rS   )r   floatro   r,   r,   r-   	__float__  r   z _TensorScalarCastMixin.__float__c                 C   r   rS   )r   r   ro   r,   r,   r-   __int__!  r   z_TensorScalarCastMixin.__int__c                 C   r   rS   )r   hexro   r,   r,   r-   __hex__$  r   z_TensorScalarCastMixin.__hex__c                 C   r   rS   )r   octro   r,   r,   r-   __oct__'  r   z_TensorScalarCastMixin.__oct__N)r   r   r   r   r   r   r   r   r   r   r   r   r,   r,   r,   r-   r     s    r   c                   @   s   e Zd ZdZdejfddZdd Zdd Ze	d	d
 Z
e	dd Ze	dd Ze	dd Zdd ZddejdejfddZdS )rn   zJ
    Single element of a TensorArray, wrapping an underlying ndarray.
    r9   c                 C   s
   || _ dS )z
        Construct a TensorArrayElement from a NumPy ndarray.

        Args:
            values: ndarray that underlies this TensorArray element.
        Nr   r+   r9   r,   r,   r-   rl   1  s   
zTensorArrayElement.__init__c                 C   
   | j  S rS   r   r   ro   r,   r,   r-   r   :  r   zTensorArrayElement.__repr__c                 C   r   rS   r   r   ro   r,   r,   r-   r   =  r   zTensorArrayElement.__str__c                 C      | j jS zf
        Get the dtype of the tensor.
        :return: The numpy dtype of the backing ndarray
        r   rh   ro   r,   r,   r-   numpy_dtype@     zTensorArrayElement.numpy_dtypec                 C   r   zl
        Get the number of tensor dimensions.
        :return: integer for the number of dimensions
        r   r>   ro   r,   r,   r-   
numpy_ndimH  r   zTensorArrayElement.numpy_ndimc                 C   r   z~
        Get the shape of the tensor.
        :return: A tuple of integers for the numpy shape of the backing ndarray
        r   rE   ro   r,   r,   r-   numpy_shapeP  r   zTensorArrayElement.numpy_shapec                 C   r   zo
        Get the size of the tensor.
        :return: integer for the number of elements in the tensor
        r   sizero   r,   r,   r-   
numpy_sizeX  r   zTensorArrayElement.numpy_sizec                 C   s   t | jS )zG
        Return the values of this element as a NumPy ndarray.
        r<   r=   r   ro   r,   r,   r-   r   `  s   zTensorArrayElement.to_numpyNrh   r   c                 K      t j| jfd|i|S Nrh   r   r+   rh   kwargsr,   r,   r-   	__array__f     zTensorArrayElement.__array__rS   )r   r   r   r   r<   ndarrayrl   r   r   r   r   r   r   r   r   rh   r   r,   r,   r,   r-   rn   +  s    	



rn   c                       s  e Zd ZdZejejejejej	ej
ejejejejd
Zdeejeeeejef  eef fddZedddd	ee d
efddZedejdejjjfddZ dee!e"ejf ded fddZ#de!fddZ$e%dejjj&fddZ'e%dd Z(e%de!fddZ)dUddZ*	dVdee! d ed!edd fd"d#Z+dUd$d%Z,ed&ed  dd fd'd(Z-d)ee!ejf d*eddfd+d,Z.def fd-d.Z/d/d0 Z0d1d2 Z1de2ejef fd3d4Z3dWd6e4d7efd8d9Z5dXd	ej'dejfd:d;Z6d<e7d=e4fd>d?Z8ddejjj9fd	ej'd
ed@efdAdBZ:e%dCdD Z;e%dEdF Z<e%dGdH Z=e%dIdJ Z>dWdKdLZ?dYdMdNZdYdOdPZdXdQdRZ@e%dSdT ZA  ZBS )Zr:   a  
    Pandas `ExtensionArray` representing a tensor column, i.e. a column
    consisting of ndarrays as elements.

    This extension supports tensors in which the elements have different shapes.
    However, each tensor element must be non-ragged, i.e. each tensor element must have
    a well-defined, non-ragged shape.

    Examples:
        >>> # Create a DataFrame with a list of ndarrays as a column.
        >>> import pandas as pd
        >>> import numpy as np
        >>> import ray
        >>> from ray.data.extensions import TensorArray
        >>> df = pd.DataFrame({
        ...     "one": [1, 2, 3],
        ...     "two": TensorArray(np.arange(24).reshape((3, 2, 2, 2)))})
        >>> # Note that the column dtype is TensorDtype.
        >>> df.dtypes # doctest: +SKIP
        one          int64
        two    TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
        dtype: object
        >>> # Pandas is aware of this tensor column, and we can do the
        >>> # typical DataFrame operations on this column.
        >>> col = 2 * (df["two"] + 10)
        >>> # The ndarrays underlying the tensor column will be manipulated,
        >>> # but the column itself will continue to be a Pandas type.
        >>> type(col) # doctest: +SKIP
        pandas.core.series.Series
        >>> col # doctest: +SKIP
        0   [[[ 2  4]
              [ 6  8]]
             [[10 12]
               [14 16]]]
        1   [[[18 20]
              [22 24]]
             [[26 28]
              [30 32]]]
        2   [[[34 36]
              [38 40]]
             [[42 44]
              [46 48]]]
        Name: two, dtype: TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
        >>> # Once you do an aggregation on that column that returns a single
        >>> # row's value, you get back our TensorArrayElement type.
        >>> tensor = col.mean() # doctest: +SKIP
        >>> type(tensor) # doctest: +SKIP
        ray.data.extensions.tensor_extension.TensorArrayElement
        >>> tensor # doctest: +SKIP
        array([[[18., 20.],
                [22., 24.]],
               [[26., 28.],
                [30., 32.]]])
        >>> # This is a light wrapper around a NumPy ndarray, and can easily
        >>> # be converted to an ndarray.
        >>> type(tensor.to_numpy()) # doctest: +SKIP
        numpy.ndarray
        >>> # In addition to doing Pandas operations on the tensor column,
        >>> # you can now put the DataFrame into a Dataset.
        >>> ds = ray.data.from_pandas(df) # doctest: +SKIP
        >>> # Internally, this column is represented the corresponding
        >>> # Arrow tensor extension type.
        >>> ds.schema() # doctest: +SKIP
        one: int64
        two: extension<arrow.py_extension_type<ArrowTensorType>>
        >>> # You can write the dataset to Parquet.
        >>> ds.write_parquet("/some/path") # doctest: +SKIP
        >>> # And you can read it back.
        >>> read_ds = ray.data.read_parquet("/some/path") # doctest: +SKIP
        >>> read_ds.schema() # doctest: +SKIP
        one: int64
        two: extension<arrow.py_extension_type<ArrowTensorType>>

        >>> read_df = ray.get(read_ds.to_pandas_refs())[0] # doctest: +SKIP
        >>> read_df.dtypes # doctest: +SKIP
        one          int64
        two    TensorDtype(shape=(3, 2, 2, 2), dtype=int64)
        dtype: object
        >>> # The tensor extension type is preserved along the
        >>> # Pandas --> Arrow --> Parquet --> Arrow --> Pandas
        >>> # conversion chain.
        >>> read_df.equals(df) # doctest: +SKIP
        True
    )
sumrx   anyminmaxmeanmedianprodstdvarr9   c                 C   s  t |tr
t|}n!t |trdd |D }t|}nt |tr+tjt|gdd}t |tjra|j	j
tju r`t|dkr?n9tdd |D rTdd |D }t|}n$td	t
|d  d
nt |trjtdtd| dt
|j dt |tjsJ || _d| _dS )zy
        Args:
            values: A NumPy ndarray or sequence of NumPy ndarrays of equal
                shape.
        c                 S   $   g | ]}t |trt|n|qS r,   r8   rn   r<   r=   ru   rY   r,   r,   r-   r     s    z(TensorArray.__init__.<locals>.<listcomp>Fcopyr   c                 s   s.    | ]}t |tjttfot |t V  qd S rS   )r8   r<   r   rn   r   rU   r   r,   r,   r-   rw     s    

z'TensorArray.__init__.<locals>.<genexpr>c                 S   s   g | ]}t |qS r,   )r<   r=   r   r,   r,   r-   r     s    zExpected a well-typed ndarray or an object-typed ndarray of ndarray pointers, but got an object-typed ndarray whose subndarrays are of type .z8Use the copy() method to create a copy of a TensorArray.zDExpected a numpy.ndarray or sequence of numpy.ndarray, but received z	 of type z	 instead.N)r8   r   r   r   rn   r<   rF   r=   r   rh   rp   object_r   rx   r   r:   r   r   _is_variable_shapedr   r,   r,   r-   rl     sH   








zTensorArray.__init__NFrh   r   rh   r   c                C   sF   |rt |tjr| }t|S t |tr|r|j n|j}t|S )aZ  
        Construct a new ExtensionArray from a sequence of scalars.

        Parameters
        ----------
        scalars : Sequence
            Each element will be an instance of the scalar type for this
            array, ``cls.dtype.type`` or be converted into this type in this
            method.
        dtype : dtype, optional
            Construct for this particular dtype. This should be a Dtype
            compatible with the ExtensionArray.
        copy : bool, default False
            If True, copy the underlying data.

        Returns
        -------
        ExtensionArray
        )r8   r<   r   r   r:   r   )r   scalarsrh   r   r,   r,   r-   _from_sequence  s   
zTensorArray._from_sequenceoriginalc                 C      t )a  
        Reconstruct an ExtensionArray after factorization.

        Parameters
        ----------
        values : ndarray
            An integer ndarray with the factorized values.
        original : ExtensionArray
            The original ExtensionArray that factorize was called on.

        See Also
        --------
        factorize : Top-level factorize method that dispatches here.
        ExtensionArray.factorize : Encode the extension array as an enumerated
            type.
        r   )r   r9   r   r,   r,   r-   _from_factorized,  s   zTensorArray._from_factorizeditemr   )r:   rn   c                 C   s   t |tr| j| }t|r|S t|S t |tr4t|dkr4|d tkr4t|dkr0t	d|d }t |t
r>t|}t| |}t
| j| S )aA  
        Select a subset of self.

        Parameters
        ----------
        item : int, slice, or ndarray
            * int: The position in 'self' to get.
            * slice: A slice object, where 'start', 'stop', and 'step' are
              integers or None
            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'

        Returns
        -------
        item : scalar or ExtensionArray

        Notes
        -----
        For scalar ``item``, return a scalar value suitable for the array's
        type. This should be an instance of ``self.dtype.type``.
        For slice ``key``, return an instance of ``ExtensionArray``, even
        if the slice is length 0 or 1.
        For a boolean mask, return an instance of ``ExtensionArray``, filtered
        to the values where ``item`` is True.
        r   r      zCWorkaround Pandas issue #42430 not implemented for tuple length > 2)r8   r   r   r<   r   rn   tupler   Ellipsis
ValueErrorr:   r=   r   )r+   r   valuer,   r,   r-   __getitem__B  s   


"


zTensorArray.__getitem__c                 C   s
   t | jS )z]
        Length of this array.

        Returns
        -------
        length : int
        )r   r   ro   r,   r,   r-   __len__u  s   
zTensorArray.__len__c                 C   sB   | j r| jd j}d| jd j }n
| j}| jdd }t||S )z2
        An instance of 'ExtensionDtype'.
        r   rS   r   N)ry   r   rh   r>   r   r   rg   )r+   rh   rE   r,   r,   r-   rh     s   
zTensorArray.dtypec                 C   s   | j du rt| j| _ | j S )zQ
        Whether this TensorArray holds variable-shaped tensor elements.
        N)r   r   r   ro   r,   r,   r-   ry     s   
zTensorArray.is_variable_shapedc                 C   r   )zL
        The number of bytes needed to store this object in memory.
        )r   nbytesro   r,   r,   r-   r    s   zTensorArray.nbytesc                    s    j jjtju rtj fddtt D tdS  j jjtj	u r3tj
 j dkttd j jdS tj
t j ttd j jdS )a  
        A 1-D array indicating if each value is missing.

        Returns
        -------
        na_values : Union[np.ndarray, ExtensionArray]
            In most cases, this should return a NumPy ndarray. For
            exceptional cases like ``SparseArray``, where returning
            an ndarray would be expensive, an ExtensionArray may be
            returned.

        Notes
        -----
        If returning an ExtensionArray, then

        * ``na_values._is_boolean`` should be True
        * `na_values` should implement :func:`ExtensionArray._reduce`
        * ``na_values.any`` and ``na_values.all`` should be implemented
        c                    s   g | ]	} j | d u qS rS   r   )ru   iro   r,   r-   r     s    z$TensorArray.isna.<locals>.<listcomp>rh    r   axis)r   rh   rp   r<   r   rF   ranger   boolstr_rx   r   r>   isnanro   r,   ro   r-   isna  s   "zTensorArray.isnaindices
allow_fill
fill_valuec                 C   s   |rLt j|t jd}t|t| j t |dk }|rL|du r"t j}t t|f| jj	dd  |}|dk}t 
|t |d | j||   t|S | jj|dd}t|S )a  
        Take elements from an array.

        Parameters
        ----------
        indices : sequence of int
            Indices to be taken.
        allow_fill : bool, default False
            How to handle negative values in `indices`.

            * False: negative values in `indices` indicate positional indices
              from the right (the default). This is similar to
              :func:`numpy.take`.

            * True: negative values in `indices` indicate
              missing values. These values are set to `fill_value`. Any other
              other negative values raise a ``ValueError``.

        fill_value : any, optional
            Fill value to use for NA-indices when `allow_fill` is True.
            This may be ``None``, in which case the default NA value for
            the type, ``self.dtype.na_value``, is used.

            For many ExtensionArrays, there will be two representations of
            `fill_value`: a user-facing "boxed" scalar, and a low-level
            physical NA value. `fill_value` should be the user-facing version,
            and the implementation should handle translating that to the
            physical version for processing the take if necessary.

        Returns
        -------
        ExtensionArray

        Raises
        ------
        IndexError
            When the indices are out of bounds for the array.
        ValueError
            When `indices` contains negative values other than ``-1``
            and `allow_fill` is True.

        See Also
        --------
        numpy.take : Take elements from an array along an axis.
        api.extensions.take : Take elements from an array.

        Notes
        -----
        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
        ``iloc``, when `indices` is a sequence of values. Additionally,
        it's called by :meth:`Series.reindex`, or any other method
        that causes realignment, with a `fill_value`.

        Examples
        --------
        Here's an example implementation, which relies on casting the
        extension array to object dtype. This uses the helper method
        :func:`pandas.api.extensions.take`.

        .. code-block:: python

           def take(self, indices, allow_fill=False, fill_value=None):
               from pandas.core.algorithms import take

               # If the ExtensionArray is backed by an ndarray, then
               # just pass that here instead of coercing to object.
               data = self.astype(object)

               if allow_fill and fill_value is None:
                   fill_value = self.dtype.na_value

               # fill value should always be translated from the scalar
               # type for the array, to the physical storage type for
               # the data, before passing to take.

               result = take(data, indices, fill_value=fill_value,
                             allow_fill=allow_fill)
               return self._from_sequence(result, dtype=self.dtype)
        r  r   Nr   r  )r<   r=   intpr   r   r   r   nanfullrE   putwherer:   take)r+   r  r  r  has_missingr9   	is_nonnegr,   r,   r-   r    s   R""zTensorArray.takec                 C   s   t | j S )ze
        Return a copy of the array.

        Returns
        -------
        ExtensionArray
        )r:   r   r   ro   r,   r,   r-   r   ,  s   	zTensorArray.copy	to_concatc                 C   s|   d}d}|D ]}|du r|j j}|js|j j|krd} nq|r0ttjdd |D td}|S ttdd |D }|S )z
        Concatenate multiple array of this dtype.

        Parameters
        ----------
        to_concat : sequence of this type

        Returns
        -------
        ExtensionArray
        FNTc                 S   s   g | ]
}|j D ]}|qqS r,   r   )ru   aer,   r,   r-   r   N  s    z1TensorArray._concat_same_type.<locals>.<listcomp>r  c                 S   s   g | ]}|j qS r,   r   )ru   r  r,   r,   r-   r   Q  s    )rh   rt   ry   r:   r<   rF   objectr   )r   r  should_flattenrE   r  concatedr,   r,   r-   _concat_same_type7  s    zTensorArray._concat_same_typekeyr  c                 C   s   t | |}t|tst|rt|}t|tr dd |D }t|tr.t|jt	r.|j
}|du s=t|trLt|dkrLt| j| tj| j|< dS t|tttjfr\|| j|< dS tdt| d)a^  
        Set one or more values inplace.

        This method is not required to satisfy the pandas extension array
        interface.

        Parameters
        ----------
        key : int, ndarray, or slice
            When called from, e.g. ``Series.__setitem__``, ``key`` will be
            one of

            * scalar int
            * ndarray of integers.
            * boolean ndarray
            * slice object

        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
            value or values to be set of ``key``.

        Returns
        -------
        None
        c                 S   r   r,   r   r   r,   r,   r-   r   q  s    z+TensorArray.__setitem__.<locals>.<listcomp>Nr   z__setitem__ with key type 'z' not implemented)r   r8   rn   r<   r   r=   listr   rh   rg   r9   r   r   	full_liker   r  r   slicer   r   rp   )r+   r!  r  r,   r,   r-   __setitem__T  s    


zTensorArray.__setitem__c                    sD   t |trt|}|jdkrt| r|   S t	 
|S )z,
        Return for `item in self`.
        r   )r8   rn   r<   r=   r   r  rx   r  r   super__contains__)r+   r   np_item	__class__r,   r-   r'    s
   

zTensorArray.__contains__c                 C   r   rS   r   ro   r,   r,   r-   r     r   zTensorArray.__repr__c                 C   r   rS   r   ro   r,   r,   r-   r     r   zTensorArray.__str__c                 C   r   rS   r   ro   r,   r,   r-   _values_for_factorize  s   z!TensorArray._values_for_factorizeTr{   skipnac              	   K   sz   dg}i }|D ]}z|| ||< W q t y   Y qw zt| j| | jfddi|W S  t y<   td| ddw )a  
        Return a scalar result of performing the reduction operation.

        Parameters
        ----------
        name : str
            Name of the function, supported values are:
            { any, all, min, max, sum, mean, median, prod,
            std, var, sem, kurt, skew }.
        skipna : bool, default True
            If True, skip NaN values.
        **kwargs
            Additional keyword arguments passed to the reduction function.
            Currently, `ddof` is the only supported kwarg.

        Returns
        -------
        scalar

        Raises
        ------
        TypeError : subclass does not define reductions
        ddofr	  r   'z' aggregate not implemented.N)KeyErrorrn   SUPPORTED_REDUCERSr   r   )r+   r{   r,  r   supported_kwargsreducer_kwargskwr,   r,   r-   _reduce  s   zTensorArray._reducec                 K   r   r   r   r   r,   r,   r-   r     r   zTensorArray.__array__ufuncmethodc                    s   | dd}|| D ]}t|ttjtjfst  S q
tdd |D }|r1tdd |D |d< t	|||i |}t
|tu rLt fdd|D S |dkrRdS t
 |S )	z`
        Supports NumPy ufuncs without requiring sloppy coercion to an
        ndarray.
        outr,   c                 s   $    | ]}t |tr|jn|V  qd S rS   r8   r:   r   ru   rV   r,   r,   r-   rw     s   " z.TensorArray.__array_ufunc__.<locals>.<genexpr>c                 s   r8  rS   r9  r:  r,   r,   r-   rw     s    
c                 3   s    | ]	}t  |V  qd S rS   )rp   r:  ro   r,   r-   rw     s    atN)getr8   r:   r<   r   numbersNumberr   r   getattrrp   )r+   r5  r6  inputsr   r7  rV   r   r,   ro   r-   __array_ufunc__  s    zTensorArray.__array_ufunc__na_valuec                 C   s\   |dur t jj|}|rtj| j|dd}|S | j|}|S |r)| j }|S | j}|S )aj  
        Convert to a NumPy ndarray.

        .. versionadded:: 1.0.0

        This is similar to :meth:`numpy.asarray`, but may provide additional
        control over how the conversion is done.

        Parameters
        ----------
        dtype : str or numpy.dtype, optional
            The dtype to pass to :meth:`numpy.asarray`.
        copy : bool, default False
            Whether to ensure that the returned value is a not a view on
            another array. Note that ``copy=False`` does not *ensure* that
            ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
            a copy is made, even if not strictly necessary.
        na_value : Any, optional
            The value to use for missing values. The default value depends
            on `dtype` and the type of the array.

        Returns
        -------
        numpy.ndarray
        NTr   )	pdapitypespandas_dtyper<   rF   r   astyper   )r+   rh   r   rB  r9   r,   r,   r-   r     s   
zTensorArray.to_numpyc                 C   r   r   r   ro   r,   r,   r-   r     r   zTensorArray.numpy_dtypec                 C   r   r   r   ro   r,   r,   r-   r     r   zTensorArray.numpy_ndimc                 C   r   r   r   ro   r,   r,   r-   r     r   zTensorArray.numpy_shapec                 C   r   r   r   ro   r,   r,   r-   r     r   zTensorArray.numpy_sizec                 C   s   t jj|}t|tr|rt| j }|S | }|S t jj	|r)t jj
|sEtdd | jD }t|t jrC| j|ddS |S t jj	|rgtjt| td}tt| D ]	}| j| ||< q[|S | jj||d}|S )a  
        Cast to a NumPy array with 'dtype'.

        Parameters
        ----------
        dtype : str or dtype
            Typecode or data-type to which the array is cast.
        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        Returns
        -------
        array : ndarray
            NumPy ndarray with 'dtype' for its dtype.
        c                 S   s   g | ]}t |qS r,   rT   )ru   tr,   r,   r-   r   <  r   z&TensorArray.astype.<locals>.<listcomp>Fr   r  )rC  rD  rE  rF  r8   rg   r:   r   r   is_object_dtypeis_string_dtyper<   rF   StringDtyper   r   emptyr   r  r
  rG  )r+   rh   r   r9   r  r,   r,   r-   rG  #  s*   
zTensorArray.astypec                 C   &   | j j|||d}|du r|S t|S )ab  
        Test whether any array element along a given axis evaluates to True.

        See numpy.any() documentation for more information
        https://numpy.org/doc/stable/reference/generated/numpy.any.html#numpy.any

        :param axis: Axis or axes along which a logical OR reduction is
            performed.
        :param out: Alternate output array in which to place the result.
        :param keepdims: If this is set to True, the axes which are reduced are
            left in the result as dimensions with size one.
        :return: single boolean unless axis is not None else TensorArray
        r	  r7  keepdimsN)r   r   r:   r+   r	  r7  rO  r   r,   r,   r-   r   J  s   zTensorArray.anyc                 C   rM  )a  
        Test whether all array elements along a given axis evaluate to True.

        :param axis: Axis or axes along which a logical AND reduction is
            performed.
        :param out: Alternate output array in which to place the result.
        :param keepdims: If this is set to True, the axes which are reduced are
            left in the result as dimensions with size one.
        :return: single boolean unless axis is not None else TensorArray
        rN  N)r   rx   r:   rP  r,   r,   r-   rx   [  s   zTensorArray.allc                 C   s.   ddl m}m} | jr|| jS || jS )a  
        Convert this TensorArray to an ArrowTensorArray extension array.

        This and TensorDtype.__from_arrow__ make up the
        Pandas extension type + array <--> Arrow extension type + array
        interoperability protocol. See
        https://pandas.pydata.org/pandas-docs/stable/development/extending.html#compatibility-with-apache-arrow
        for more information.
        r   )ArrowTensorArrayArrowVariableShapedTensorArray)$ray.air.util.tensor_extensions.arrowrQ  rR  ry   
from_numpyr   )r+   rp   rQ  rR  r,   r,   r-   __arrow_array__i  s   
zTensorArray.__arrow_array__c                 C   r   )r   )rh   r   ro   r,   r,   r-   r   }  s   
zTensorArray._is_boolean)r   r:   )FN)TrS   )NNF)Cr   r   r   r   r<   r   rx   r   r   r   r   r   r   r   r   r0  r   r   r   r   rn   r   rl   r   r   r
   r  r   rC  rD  
extensionsExtensionArrayr   r   r$  r  r  r   ExtensionDtyperh   ry   r  r  r  r   r   r%  r'  r   r   r   r+  rU   r4  r   r   rA  
no_defaultr   r   r   r   r   rG  rU  r   __classcell__r,   r,   r)  r-   r:   j  s    V
;
3


"

m +
&
+





'

r:   sc                 C   s(   | j jtju o| j ot| jd tjS )a  Return whether the provided pandas Series column needs a tensor extension
    representation. This tensor extension representation provides more efficient slicing
    and interop with ML frameworks.

    Args:
        s: The pandas Series column that may need to be represented using the tensor
            extension.

    Returns:
        Whether the provided Series needs a tensor extension representation.
    r   )rh   rp   r<   r   rL  r8   ilocr   )r[  r,   r,   r-   column_needs_tensor_extension  s   &r]  )Br=  ostypingr   r   r   r   r   r   r   numpyr<   pandasrC  pyarrowr   packaging.versionr	   pandas._typingr
   pandas.compatr   pandas.core.dtypes.genericr   r   pandas.core.indexersr   r   $ray.air.util.tensor_extensions.utilsr   r   ray.util.annotationsr   r   ImportErrorr   rU   rK   r`   _FORMATTER_ENABLED_ENV_VARgetenv__version__r7   rc   formatter_clsrd   _format_stringsr;   _patched_by_ray_datasetsrD  rV  register_extension_dtyperX  rg   ExtensionScalarOpsMixinr   r   rn   rW  r:   _add_arithmetic_ops_add_comparison_ops_add_logical_opsSeriesr  r]  r,   r,   r,   r-   <module>   sz    $
*<&   6>
      .