o
    bi                     @   s  d dl Z d dlmZ d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
mZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZmZmZmZ d dlmZmZ d dl m!Z! d d	l"m#Z#m$Z$ d d
l%m&Z& e Z'edZ(edZ)dZ*e+e,Z-e#G dd de.Z/dd Z0e#dee1ee
e ej2f f dej3fddZ4e#dddee
e ej5ef de1dej2fddZ6dee
e ej5f de1dej2fddZ7de
eej2ej8f  fddZ9dej5d ej:de1fd!d"Z;dee
e ej5f deej< fd#d$Z=d%d%d%d%d%d%d%d&d'd(d(d(d(d)Z>dee
e ej5f deej< fd*d+Z?e#d,d- Z@e#d.d/ ZAe#d0d1 ZBG d2d3 d3ejCe jDZEe$d4dG d5d6 d6eEZFe$ddG d7d8 d8eEZGe0 rhe$d4dG d9d: d:ejHZIG d;d< d<ZJe$d4dG d=d> d>eJejKZLe$ddG d?d@ d@ejCZMe$ddG dAdB dBeJejKZNdCej5dDeej5 deOfdEdFZPdGej5deQfdHdIZRdJdK ZSdLdM ZTz!eUeFdNeV  eUeGdNeV  eUeMeV d  W dS  ejWy   Y dS w )O    N)datetime)AnyDictIterableListOptionalSequenceTupleUnion)parse)get_pyarrow_version)"_is_ndarray_variable_shaped_tensorcreate_ragged_ndarray_should_convert_to_tensor	ArrayLike)convert_to_numpy _convert_datetime_to_np_datetime)log_once)DeveloperAPI	PublicAPI)	INT32_MAXz9.0.0z13.0.0   c                       s*   e Zd ZdZdZdef fddZ  ZS )ArrowConversionErrorz=Error raised when there is an issue converting data to Arrow.   data_strc                    s:   t || jkr|d | j d }d| }t | d S )Nz...z Error converting data to Arrow: )lenMAX_DATA_STR_LENsuper__init__)selfr   message	__class__ X/home/ubuntu/.local/lib/python3.10/site-packages/ray/air/util/tensor_extensions/arrow.pyr   0   s   
zArrowConversionError.__init__)__name__
__module____qualname____doc__r   strr   __classcell__r#   r#   r!   r$   r   *   s    r   c                   C   s   t du pt tkS )z
    Whether Arrow ExtensionScalars support subclassing in the current pyarrow version.

    This returns True if the pyarrow version is 9.0.0+, or if the pyarrow version is
    unknown.
    N)PYARROW_VERSION#MIN_PYARROW_VERSION_SCALAR_SUBCLASSr#   r#   r#   r$   )_arrow_extension_scalars_are_subclassable7   s   	r-   pydictreturnc              
   C   s6   zt j| W S  ty } ztt| |d}~ww )z}
    Convert a Python dictionary to a pyarrow Table.

    Raises:
        ArrowConversionError: if the conversion fails.
    N)paTablefrom_pydict	Exceptionr   r)   )r.   er#   r#   r$   pyarrow_table_from_pydictE   s   
r5   alpha)	stabilitycolumn_valuescolumn_namec           
   
   C   s  z t | dkrt| |rddlm} |t| |W S t| |W S  ty } zWddlm	} ddl
m}m} | j}| sJd}dt dt d}	n|d	u pO|}|rUd
}	nd}	|d	u s]|sqtdrqtjd| d| d|	 |d |st || W  Y d	}~S d	}~ww )a  Converts provided NumPy `ndarray` into PyArrow's `array` while utilizing
    both Arrow's natively supported types as well as custom extension types:

        - ArrowTensorArray (for tensors)
        - ArrowPythonObjectArray (for user-defined python class objects, as well as
        any python object that aren't represented by a corresponding Arrow's native
        scalar type)
    r   ArrowTensorArrayDataContext)ArrowPythonObjectArray_object_extension_type_allowedFz[skipping fallback to serialize as pickled python objects (due to unsupported Arrow version z, min required version is )Nz3falling back to serialize as pickled python objectsz~skipping fallback to serialize as pickled python objects (due to DataContext.enable_fallback_to_arrow_object_ext_type = False)0_fallback_to_arrow_object_extension_type_warningzFailed to convert column 'z' into pyarrow array due to: z; exc_info)r   r   $ray.data.extensions.tensor_extensionr;   
from_numpyr    _convert_to_pyarrow_native_arrayr   ray.datar=   $ray.data.extensions.object_extensionr>   r?   get_current(enable_fallback_to_arrow_object_ext_typer+   r,   r   loggerwarningfrom_objects)
r8   r9   r;   acer=   r>   r?   enable_fallback_config object_ext_type_fallback_allowedobject_ext_type_detailr#   r#   r$   convert_to_pyarrow_arrayU   sZ   
	rR   c              
   C   s   zlt | dkrt| d trt| } t | dkr(t| d tjtjfr(t| W S t| }|r@tj	
|r@t| tjr@t| ||} ttdd| d| d t | dkret| d tjredd | D } tj| |dW S  ty } ztt| |d	}~ww )
zConverts provided NumPy `ndarray` into PyArrow's `array` while only utilizing
    Arrow's natively supported types (ie no custom extension types)r   TRACEzInferred dtype of 'z' for column ''c                 S   s   g | ]	}|j r	|nd qS N)is_valid.0vr#   r#   r$   
<listcomp>       z4_convert_to_pyarrow_native_array.<locals>.<listcomp>typeN)r   
isinstancer   r   r0   ArrayChunkedArray_combine_as_list_array_infer_pyarrow_typetypesis_timestampnpndarray-_coerce_np_datetime_to_pa_timestamp_precisionrK   loglogginggetLevelName
ListScalararrayr3   r   r)   )r8   r9   pa_typer4   r#   r#   r$   rF      s0   

rF   c                 C   sd   dd | D }t jtdgt|gt  d}t tjdd | D  }t j	
||t |jS )z7Combines list of Arrow arrays into a single `ListArray`c                 S   s   g | ]}t |qS r#   )r   rW   r#   r#   r$   rZ          z*_combine_as_list_array.<locals>.<listcomp>r   r\   c                 S   s$   g | ]}t |tjr|jn|gqS r#   )r^   r0   r`   chunksrW   r#   r#   r$   rZ      s    )r0   rl   re   concatenatecumsumint32concat_arrays	itertoolschain	ListArrayfrom_arrayslist_r]   )r8   lensoffsetscombinedr#   r#   r$   ra      s   $
ra   dtypec                 C   sp   t | jt js
J t | j\}}|j}||kr6| d| d} td| dr6t	d|d| d | S )Nzdatetime64[]column__timestamp_warningzConverting a z$ precision datetime NumPy array to 'z' precision Arrow timestamp. This conversion occurs because Arrow supports fewer precisions than Arrow and might result in a loss of precision or unrepresentable values.)
re   
issubdtyper|   
datetime64datetime_dataunitastyper   rK   rL   )r8   r|   r9   numpy_precision_arrow_precisionr#   r#   r$   rg      s   rg   c                    s   t | dkrdS t| }|dur|S t| }dtdtfdd tj|r5t fdd| D r5t	 S tj
|rJt fd	d| D rJt S |S )
a  Infers target Pyarrow `DataType` based on the provided
    columnar values.

    NOTE: This is a wrapper on top of `pa.infer_type(...)` utility
          performing up-casting of `binary` and `string` types to
          corresponding `large_binary` and `large_string` types in case
          any of the array elements exceeds 2Gb in size therefore
          making it impossible for original types to accommodate such
          values.

          Unfortunately, for unknown reasons PA doesn't perform
          that upcasting itself henceforth we have to do perform
          it manually

    Args:
        column_values: List of columnar values

    Returns:
        Instance of PyArrow's `DataType` based on the provided
        column values
    r   Nobjr/   c                 S   s   t | ttfrt| tkS dS NF)r^   r)   bytesr   r   )r   r#   r#   r$   _len_gt_overflow_threshold;  s   z7_infer_pyarrow_type.<locals>._len_gt_overflow_thresholdc                       g | ]} |qS r#   r#   rW   r   r#   r$   rZ   I  rn   z'_infer_pyarrow_type.<locals>.<listcomp>c                    r   r#   r#   rW   r   r#   r$   rZ   M  rn   )r   _try_infer_pa_timestamp_typer0   
infer_typer   boolrc   	is_binaryanylarge_binary	is_stringlarge_string)r8   dtype_with_timestamp_typeinferred_pa_dtyper#   r   r$   rb     s    
rb   smsusns)YDMWhmr   r   r   r   psfsasc                 C   sr   t | trt| dkrt| d }|rt|S d S t | tjr7t| j	tj
r7t| j	\}}tt| S d S )Nr   )r^   listr   r   r0   rx   re   rf   r   r|   r   r   	timestamp_NUMPY_TO_ARROW_PRECISION_MAP)r8   element_typenp_precisionr   r#   r#   r$   r   g  s   r   c                   C   s   g t  t R S )zXReturns list of extension types of Arrow Array holding
    multidimensional tensors
    ),get_arrow_extension_fixed_shape_tensor_types/get_arrow_extension_variable_shape_tensor_typesr#   r#   r#   r$    get_arrow_extension_tensor_types{  s
   r   c                   C   s   t tfS z`Returns list of Arrow extension types holding multidimensional
    tensors of *fixed* shape
    )ArrowTensorTypeArrowTensorTypeV2r#   r#   r#   r$   r     s   r   c                   C   s   t fS r   )ArrowVariableShapedTensorTyper#   r#   r#   r$   r        r   c                       s   e Zd ZdZdeedf dejdef fddZ	e
dd	 Ze
d
d Zdd Zdd Zdd Zdd Ze r;dd ZdddejfddZdefddZdefddZedeed   defd!d"Z  ZS )#_BaseFixedShapeArrowTensorTypea  
    Arrow ExtensionType for an array of fixed-shaped, homogeneous-typed
    tensors.

    This is the Arrow side of TensorDtype.

    See Arrow extension type docs:
    https://arrow.apache.org/docs/python/extending_types.html#defining-extension-types-user-defined-types
    shape.tensor_dtypeext_type_idc                    s   || _ t || d S rU   )_shaper   r   )r   r   r   r   r!   r#   r$   r     s   z'_BaseFixedShapeArrowTensorType.__init__c                 C      | j S )z-
        Shape of contained tensors.
        )r   r   r#   r#   r$   r     r   z$_BaseFixedShapeArrowTensorType.shapec                 C   s   | j jS )3Returns the type of the underlying tensor elements.)storage_type
value_typer   r#   r#   r$   scalar_type     z*_BaseFixedShapeArrowTensorType.scalar_typec                 C   s   ddl m} || j| j S )
        Convert Arrow extension type to corresponding Pandas dtype.

        Returns:
            An instance of pd.api.extensions.ExtensionDtype.
        r   TensorDtype)%ray.air.util.tensor_extensions.pandasr   r   r   to_pandas_dtyper   r   r#   r#   r$   r     s   z._BaseFixedShapeArrowTensorType.to_pandas_dtypec                 C      | j | j|  ffS rU   __arrow_ext_deserialize__r   __arrow_ext_serialize__r   r#   r#   r$   
__reduce__     z)_BaseFixedShapeArrowTensorType.__reduce__c                 C      t | j S rU   )jsondumpsr   encoder   r#   r#   r$   r        z6_BaseFixedShapeArrowTensorType.__arrow_ext_serialize__c                 C      t S z
        ExtensionArray subclass with custom logic for this array of tensors
        type.

        Returns:
            A subclass of pd.api.extensions.ExtensionArray.
        r:   r   r#   r#   r$   __arrow_ext_class__     z2_BaseFixedShapeArrowTensorType.__arrow_ext_class__c                 C   r   zd
            ExtensionScalar subclass with custom logic for this array of tensors type.
            ArrowTensorScalarr   r#   r#   r$   __arrow_ext_scalar_class__     z9_BaseFixedShapeArrowTensorType.__arrow_ext_scalar_class__scalarpa.ExtensionScalarr/   c                 C   s6   |j j}|jj}|j}|j}| d }t||||S )A
        Convert an ExtensionScalar to a tensor element.
           )valuevaluesr]   r   offsetbuffers_to_ndarray_helper)r   r   
raw_valuesr   r   r   data_bufferr#   r#   r$   _extension_scalar_to_ndarray  s   z;_BaseFixedShapeArrowTensorType._extension_scalar_to_ndarrayc                 C   s   d| j  d| jj dS )Nznumpy.ndarray(shape=, dtype=r@   )r   r   r   r   r#   r#   r$   __str__  s   z&_BaseFixedShapeArrowTensorType.__str__c                 C      t | S rU   r)   r   r#   r#   r$   __repr__     z'_BaseFixedShapeArrowTensorType.__repr__array_types)r   r   r   c                 C   sZ   d}|D ]&}t |tr dS t |t std| |dur'|j|kr' dS |j}qdS )a  
        Whether the provided list of tensor types needs a variable-shaped
        representation (i.e. `ArrowVariableShapedTensorType`) when concatenating
        or chunking. If one or more of the tensor types in `array_types` are
        variable-shaped and/or any of the tensor arrays have a different shape
        than the others, a variable-shaped tensor array representation will be
        required and this method will return True.

        Args:
            array_types: List of tensor types to check if a variable-shaped
                representation is required for concatenation

        Returns:
            True if concatenating arrays with types `array_types` requires
            a variable-shaped representation
        NTzqAll provided array types must be an instance of either ArrowTensorType or ArrowVariableShapedTensorType, but got F)r^   r   r   
ValueErrorr   )clsr   r   arr_typer#   r#   r$   "_need_variable_shaped_tensor_array  s   
zA_BaseFixedShapeArrowTensorType._need_variable_shaped_tensor_array)r%   r&   r'   r(   r	   intr0   DataTyper)   r   propertyr   r   r   r   r   r   r-   r   re   rf   r   r   r   classmethodr   r
   r   r   r*   r#   r#   r!   r$   r     s@    




r   betac                       N   e Zd ZdZejZdeedf de	j
f fddZedd Zd	d
 Z  ZS )r   zArrow ExtensionType (v1) for tensors.

    NOTE: This type does *NOT* support tensors larger than 4Gb (due to
          overflow of int32 offsets utilized inside Pyarrow `ListType`)
    r   .r|   c                       t  |t|d dS )
        Construct the Arrow extension type for array of fixed-shaped tensors.

        Args:
            shape: Shape of contained tensors.
            dtype: pyarrow dtype of tensor elements.
        zray.data.arrow_tensorN)r   r   r0   rx   r   r   r|   r!   r#   r$   r   $     	zArrowTensorType.__init__c                 C      t t|}| ||jS rU   tupler   loadsr   r   r   
serializedr   r#   r#   r$   r   /     z)ArrowTensorType.__arrow_ext_deserialize__c                 C   "   t |to|j| jko|j| jkS rU   )r^   r   r   r   r   otherr#   r#   r$   __eq__4  
   


zArrowTensorType.__eq__)r%   r&   r'   r(   re   rr   OFFSET_DTYPEr	   r   r0   r   r   r   r   r  r*   r#   r#   r!   r$   r     s     
r   c                       r   )r   z@Arrow ExtensionType (v2) for tensors (supporting tensors > 4Gb).r   .r|   c                    r   )r   zray.data.arrow_tensor_v2N)r   r   r0   
large_listr   r!   r#   r$   r   B  r   zArrowTensorTypeV2.__init__c                 C   r   rU   r   r   r#   r#   r$   r   M  r   z+ArrowTensorTypeV2.__arrow_ext_deserialize__c                 C   r   rU   )r^   r   r   r   r  r#   r#   r$   r  R  r  zArrowTensorTypeV2.__eq__)r%   r&   r'   r(   re   int64r  r	   r   r0   r   r   r   r   r  r*   r#   r#   r!   r$   r   <  s     
r   c                   @   s,   e Zd ZdejfddZdejfddZdS )r   r/   c                 K   s   | j | S rU   )r]   r   )r   kwargsr#   r#   r$   as_py^  s   zArrowTensorScalar.as_pyc                 C   s   |   S rU   )r	  r   r#   r#   r$   	__array__a  r   zArrowTensorScalar.__array__N)r%   r&   r'   re   rf   r	  r
  r#   r#   r#   r$   r   \  s    r   c                       s>   e Zd ZdZe sdd Zdd Z fddZ  ZS   ZS )_ArrowTensorScalarIndexingMixinz
    A mixin providing support for scalar indexing in tensor extension arrays for
    Arrow < 9.0.0, before full ExtensionScalar support was added. This mixin overrides
    __getitem__, __iter__, and to_pylist.
    c                 c   s$    t t| D ]}| |V  qd S rU   )ranger   __getitem__)r   ir#   r#   r$   __iter__q  s   z(_ArrowTensorScalarIndexingMixin.__iter__c                 C   r   rU   )r   r   r#   r#   r$   	to_pylisty  r   z)_ArrowTensorScalarIndexingMixin.to_pylistc                    s&   t  |}t|ts|j|}|S rU   )r   r  r^   slicer]   r   )r   keyitemr!   r#   r$   r  ~  s   

z+_ArrowTensorScalarIndexingMixin.__getitem__)	r%   r&   r'   r(   r-   r  r  r  r*   r#   r#   r!   r$   r  f  s    r  c                	   @   s   e Zd ZdZe	ddeejeej f de	e
 ded fddZedejded fd	d
Zdde	e defddZddefddZe	d deed  deded fddZedeed  dejfddZd!ddZdS )"r;   z
    An array of fixed-shape, homogeneous-typed tensors.

    This is the Arrow side of TensorArray.

    See Arrow docs for customizing extension arrays:
    https://arrow.apache.org/docs/python/extending_types.html#custom-extension-array-class
    Narrr9   r/   )r;   ArrowVariableShapedTensorArrayc              
   C   s>  t |tjst |trt|}t |ttfrN|rNt |d tjrNz	tj|dd}W n$ tyM } ztj	d| d|d tj
|td}W Y d}~nd}~ww t |tjs`tdt| d	| zt|}|rmt|||}| |W S  ty } z d
}|r|d| d7 }|d|j d|j d| 7 }t||d}~ww )aN  
        Convert an ndarray or an iterable of ndarrays to an array of homogeneous-typed
        tensors. If given fixed-shape tensor elements, this will return an
        ``ArrowTensorArray``; if given variable-shape tensor elements, this will return
        an ``ArrowVariableShapedTensorArray``.

        Args:
            arr: An ndarray or an iterable of ndarrays.
            column_name: Optional. Used only in logging outputs to provide
                additional details.

        Returns:
            - If fixed-shape tensor elements, an ``ArrowTensorArray`` containing
              ``len(arr)`` tensors of fixed shape.
            - If variable-shaped tensor elements, an ``ArrowVariableShapedTensorArray``
              containing ``len(arr)`` tensors of variable shape.
            - If scalar elements, a ``pyarrow.Array``.
        r   )axiszFailed to stack lists due to: z3; falling back to using np.array(..., dtype=object)rB   )r|   Nz/Must give ndarray or iterable of ndarrays, got   z	column: 'z', zshape: z	, dtype: z, data: )r^   re   rf   r   r   r   stackr   rK   rL   rl   objectr]   r   rg   _from_numpyr3   r   r|   r   )r   r  r9   vetimestamp_dtyper4   r   r#   r#   r$   rE     s@   "


zArrowTensorArray.from_numpyc                    s  t |dkrt|d rt|S t|rt|S |jj	s$t
|}t|j}tj|rP|jjdksA|jjdkrItjdkrItd|j t|jj}|jd }|jdd  }|j}|rft|nd tj|rutj|dd}t|}tj||d |g}dd	lm} | jrt ||}	nt!||}	t|	" fd
dt#|d D }
tjj|	j$|d |
g|gd}tj%&|	|S )Nr   >=big:Only little-endian string tensors are supported, but got: r   littlebitorderr<   c                    s   g | ]}|  qS r#   r#   rX   r  num_items_per_elementr#   r$   rZ     rn   z0ArrowTensorArray._from_numpy.<locals>.<listcomp>)children)'r   re   isscalarr0   rl   r   r  rE   flagsc_contiguousascontiguousarrayfrom_numpy_dtyper|   rc   r   	byteordersysr   binaryitemsizer   sizeprod
is_booleanpackbits	py_bufferr_   from_buffersrG   r=   rI   use_arrow_tensor_v2r   r   r  r  r   ExtensionArrayfrom_storage)r   r  scalar_dtype	outer_lenelement_shapetotal_num_itemsr   
data_arrayr=   pa_type_offset_bufferstorager#   r&  r$   r    sT   







zArrowTensorArray._from_numpyFindexzero_copy_onlyc                 C   s`  |   }|d }| jj}|j}| }| jj}tj|r!|j	}	n|j	d }	|r-t
|nd}
| j|
 }|	| }|durX|d }t
jt| f|| jjd}|| }||	| 7 }nt| f| }tj|r|d }|d }d|t
| d d  }t
j|ft
j||d}t
j|dd}t
j|t
j||dS tj|rt
d	|jt  }t
j||||dS )
at  
        Helper for getting either an element of the array of tensors as an
        ndarray, or the entire array of tensors as a single ndarray.

        Args:
            index: The index of the tensor element that we wish to return as
                an ndarray. If not given, the entire array of tensors is
                returned as an ndarray.
            zero_copy_only: If True, an exception will be raised if the
                conversion to a NumPy array would require copying the
                underlying data (e.g. in presence of nulls, or for
                non-primitive types). This argument is currently ignored, so
                zero-copy isn't enforced even if this argument is true.

        Returns:
            The corresponding tensor element as an ndarray if an index was
            given, or the entire array of tensors as an ndarray otherwise.
              r   N)bufferr|   r|   rG  r   r"  r#  <U)r   rB  r]   r   r   r   r0   rc   r4  	bit_widthre   r3  r   rf   r   r  uint8
unpackbitsbool_is_fixed_size_binaryr|   
byte_widthNUM_BYTES_PER_UNICODE_CHAR)r   rC  rD  r   r   storage_list_typer   	ext_dtyper   buffer_item_widthr'  buffer_offsetr   rA  offset_arrayindex_offsetbyte_bucket_offsetbool_offsetnum_boolean_byte_bucketsr  r#   r#   r$   	_to_numpy  sJ   

zArrowTensorArray._to_numpyTc                 C      | j |dS )a  
        Convert the entire array of tensors into a single ndarray.

        Args:
            zero_copy_only: If True, an exception will be raised if the
                conversion to a NumPy array would require copying the
                underlying data (e.g. in presence of nulls, or for
                non-primitive types). This argument is currently ignored, so
                zero-copy isn't enforced even if this argument is true.

        Returns:
            A single ndarray representing the entire array of tensors.
        rD  rZ  r   rD  r#   r#   r$   to_numpyu  s   zArrowTensorArray.to_numpy	to_concatensure_copyc                 C   sj   dd |D }t |rtdd |D S |s"t|dkr"|d S tdd |D }t|d j	|S )a  
        Concatenate multiple tensor arrays.

        If one or more of the tensor arrays in to_concat are variable-shaped and/or any
        of the tensor arrays have a different shape than the others, a variable-shaped
        tensor array will be returned.

        Args:
            to_concat: Tensor arrays to concat
            ensure_copy: Skip copying when ensure_copy is False and there is exactly 1 chunk.
        c                 S      g | ]}|j qS r#   r\   rX   r  r#   r#   r$   rZ         z6ArrowTensorArray._concat_same_type.<locals>.<listcomp>c                 S   s   g | ]	}|D ]}|qqS r#   r#   )rX   ar4   r#   r#   r$   rZ     r[   r   r   c                 S   rb  r#   )rB  )rX   cr#   r#   r$   rZ     rd  )
r   r   r  rE   r   r0   rs   r;   r:  r]   )r   r`  ra  to_concat_typesrB  r#   r#   r$   _concat_same_type  s   
z"ArrowTensorArray._concat_same_typearrsc                 C   sd   dd |D }t |r-g }|D ]}t|jt r| }t|jts%J || q|}t	|S )zD
        Create a ChunkedArray from multiple tensor arrays.
        c                 S   rb  r#   r\   rc  r#   r#   r$   rZ     rd  z9ArrowTensorArray._chunk_tensor_arrays.<locals>.<listcomp>)
r   r   r^   r]   r   to_variable_shaped_tensor_arrayr   appendr0   chunked_array)r   ri  
arrs_typesnew_arrsre  r#   r#   r$   _chunk_tensor_arrays  s   

z%ArrowTensorArray._chunk_tensor_arraysr  c                 C   s   t |  S )a  
        Convert this tensor array to a variable-shaped tensor array.

        This is primarily used when concatenating multiple chunked tensor arrays where
        at least one chunked array is already variable-shaped and/or the shapes of the
        chunked arrays differ, in which case the resulting concatenated tensor array
        will need to be in the variable-shaped representation.
        )r  rE   r_  r   r#   r#   r$   rj    s   z0ArrowTensorArray.to_variable_shaped_tensor_arrayrU   r   T)F)r/   r  )r%   r&   r'   r(   r   r
   re   rf   r   r   r)   rE   r  r   r   rZ  r_  r   rh  r0   r`   ro  rj  r#   r#   r#   r$   r;     sJ    	A@V%
r;   c                       s   e Zd ZdZdejdef fddZdd Ze	defd	d
Z
e	dd Zdd Zdd Zedd Zdd Ze r>dd ZdefddZdefddZdddejfddZ  ZS )r   a  
    Arrow ExtensionType for an array of heterogeneous-shaped, homogeneous-typed
    tensors.

    This is the Arrow side of TensorDtype for tensor elements with different shapes.
    Note that this extension only supports non-ragged tensor elements; i.e., when
    considering each tensor element in isolation, they must have a well-defined,
    non-ragged shape.

    See Arrow extension type docs:
    https://arrow.apache.org/docs/python/extending_types.html#defining-extension-types-user-defined-types
    r|   ndimc              
      s:   || _ t tdt|fdtt fgd dS )z
        Construct the Arrow extension type for array of heterogeneous-shaped tensors.

        Args:
            dtype: pyarrow dtype of tensor elements.
            ndim: The number of dimensions in the tensor elements.
        datar   z%ray.data.arrow_variable_shaped_tensorN)_ndimr   r   r0   structr  rx   r  )r   r|   rq  r!   r#   r$   r     s   z&ArrowVariableShapedTensorType.__init__c                 C   s*   ddl m} |d| j | jd jj S )r   r   r   rU   rr  )r   r   rq  r   r]   r   r   r   r#   r#   r$   r     s
   z-ArrowVariableShapedTensorType.to_pandas_dtyper/   c                 C   r   )z7Return the number of dimensions in the tensor elements.)rs  r   r#   r#   r$   rq    s   z"ArrowVariableShapedTensorType.ndimc                 C   s   | j d}| j | jjS )r   rr  )r   get_field_indexr]   r   )r   data_field_indexr#   r#   r$   r     s   z)ArrowVariableShapedTensorType.scalar_typec                 C   r   rU   r   r   r#   r#   r$   r     r   z(ArrowVariableShapedTensorType.__reduce__c                 C   r   rU   )r   r   rs  r   r   r#   r#   r$   r   
  r   z5ArrowVariableShapedTensorType.__arrow_ext_serialize__c                 C   s    t |}|d jj}| ||S )Nrr  )r   r   r]   r   )r   r   r   rq  r|   r#   r#   r$   r     s   

z7ArrowVariableShapedTensorType.__arrow_ext_deserialize__c                 C   r   r   )r  r   r#   r#   r$   r     r   z1ArrowVariableShapedTensorType.__arrow_ext_class__c                 C   r   r   r   r   r#   r#   r$   r     r   z8ArrowVariableShapedTensorType.__arrow_ext_scalar_class__c                 C   s"   | j d jj}d| j d| dS )Nrr  znumpy.ndarray(ndim=r   r@   )r   r]   r   rq  )r   r|   r#   r#   r$   r   %  s   z%ArrowVariableShapedTensorType.__str__c                 C   r   rU   r   r   r#   r#   r$   r   )  r   z&ArrowVariableShapedTensorType.__repr__r   r   c                 C   sL   |j d}|j}t|j d }|j}|j}| d }t||||S )r   rr  r   r   )	r   getr   r   r	  r]   r   r   r   )r   r   rr  r   r   r   r   r   r#   r#   r$   r   ,  s   z:ArrowVariableShapedTensorType._extension_scalar_to_ndarray)r%   r&   r'   r(   r0   r   r   r   r   r   rq  r   r   r   r   r   r   r-   r   r)   r   r   re   rf   r   r*   r#   r#   r!   r$   r     s$    


r   c                   @   sf   e Zd ZdZedeejeej e	ej f dd fddZ
ddee d	efd
dZdd	efddZdS )r  aB  
    An array of heterogeneous-shaped, homogeneous-typed tensors.

    This is the Arrow side of TensorArray for tensor elements that have differing
    shapes. Note that this extension only supports non-ragged tensor elements; i.e.,
    when considering each tensor element in isolation, they must have a well-defined
    shape. This extension also only supports tensor elements that all have the same
    number of dimensions.

    See Arrow docs for customizing extension arrays:
    https://arrow.apache.org/docs/python/extending_types.html#custom-extension-array-class
    r  r/   c                 C   s  t |tttjfstdt| t|dkrtdg g g }}}d}|D ]6}t|}|durC|j	|krCtd| d|j	 |j	}|
|j |
|j tj|dd}|
| q(t|}t|}|d	 }td
d t|D r~|d	 j}	nt|}	|	j}
t|
}tj|r|
jdks|
jdkrtjdkrtd|
 t|
j}|
jtju rtj|	dd}	t|	}tj !||d|g}t"|dd}t|}tj#$||}t|}tj%$||gddg}t&||}tj'(||S )ax  
        Convert an ndarray or an iterable of heterogeneous-shaped ndarrays to an array
        of heterogeneous-shaped, homogeneous-typed tensors.

        Args:
            arr: An ndarray or an iterable of heterogeneous-shaped ndarrays.

        Returns:
            An ArrowVariableShapedTensorArray containing len(arr) tensors of
            heterogeneous shape.
        zmArrowVariableShapedTensorArray can only be constructed from an ndarray or a list/tuple of ndarrays, but got: r   z5Creating empty ragged tensor arrays is not supported.NzArrowVariableShapedTensorArray only supports tensor elements that all have the same number of dimensions, but got tensor elements with dimensions: z, C)orderc                 s   s    | ]
\}}t ||V  qd S rU   )_is_contiguous_view)rX   prevcurrr#   r#   r$   	<genexpr>  s    z<ArrowVariableShapedTensorArray.from_numpy.<locals>.<genexpr>r  r  r   r!  r"  r#  rr  r   ))r^   r   r   re   rf   r   r]   r   asarrayrq  rk  r   r2  ravelrl   rq   all	_pairwisebaserp   r|   r0   r-  rc   r   r.  r/  r0  r1  rM  r5  r6  r_   r7  insertLargeListArrayrw   StructArrayr   r9  r:  )r   r  shapessizesraveledrq  re  size_offsets
total_sizenp_data_bufferr|   pa_dtyper   value_arrayrU  r?  shape_arrayrB  type_r#   r#   r$   rE   N  sp   









z)ArrowVariableShapedTensorArray.from_numpyNFrC  rD  c           
         s   |du r fddt t D }t|S  jd} jd}||  }|jj}|j|  }|	 d }	t
||||	S )ad  
        Helper for getting either an element of the array of tensors as an ndarray, or
        the entire array of tensors as a single ndarray.

        Args:
            index: The index of the tensor element that we wish to return as an
                ndarray. If not given, the entire array of tensors is returned as an
                ndarray.
            zero_copy_only: If True, an exception will be raised if the conversion to a
                NumPy array would require copying the underlying data (e.g. in presence
                of nulls, or for non-primitive types). This argument is currently
                ignored, so zero-copy isn't enforced even if this argument is true.

        Returns:
            The corresponding tensor element as an ndarray if an index was given, or
            the entire array of tensors as an ndarray otherwise.
        Nc                    s   g | ]}  |qS r#   r]  r%  r^  r#   r$   rZ     s    z<ArrowVariableShapedTensorArray._to_numpy.<locals>.<listcomp>rr  r   rE  )r  r   r   rB  fieldr	  r]   r   rz   r   r   )
r   rC  rD  ri  rr  r  r   r   r   r   r#   r^  r$   rZ    s   z(ArrowVariableShapedTensorArray._to_numpyTc                 C   r[  )a
  
        Convert the entire array of tensors into a single ndarray.

        Args:
            zero_copy_only: If True, an exception will be raised if the conversion to a
                NumPy array would require copying the underlying data (e.g. in presence
                of nulls, or for non-primitive types). This argument is currently
                ignored, so zero-copy isn't enforced even if this argument is true.

        Returns:
            A single ndarray representing the entire array of tensors.
        r\  r]  r^  r#   r#   r$   r_    s   z'ArrowVariableShapedTensorArray.to_numpyr   rp  )r%   r&   r'   r(   r   r
   re   rf   r   r	   rE   r   r   r   rZ  r_  r#   r#   r#   r$   r  =  s    c#r  r}  r|  c                 C   sZ   | j du s| jjr|dur| j |j urdS |dur+t| t| |j jj|j kr+dS dS )ab  Check if the provided tensor element is contiguous with the previous tensor
    element.

    Args:
        curr: The tensor element whose contiguity that we wish to check.
        prev: The previous tensor element in the tensor array.

    Returns:
        Whether the provided tensor element is contiguous with the previous tensor
        element.
    NFT)r  rr  r+  _get_buffer_addressr|   r1  r2  )r}  r|  r#   r#   r$   r{    s   
r{  r  c                 C   s   | j d d S )zDGet the address of the buffer underlying the provided NumPy ndarray.rr  r   )__array_interface__)r  r#   r#   r$   r  	  s   r  c                 C   s"   t | \}}t|d  t||S rU   )rt   teenextzip)iterablere  br#   r#   r$   r    s   

r  c                 C   s   t j|r
|j}n|jd }|| }t j|rJ|d }|d }d|t|  d d  }tj|ftj||d}	tj|	dd}	tj| tj	|	|dS |
 }
t j|r_td|jt  }
tj| |
||dS )NrF  r   rH  r"  r#  rI  )r0   rc   r4  rJ  re   r3  rf   rK  rL  rM  r   rN  r|   rO  rP  )r   r   r   r   rS  data_offsetrW  rX  rY  r  rR  r#   r#   r$   r     s(   
r   )r   )Xabcr   rt   r   ri   r/  typingr   r   r   r   r   r   r	   r
   numpyre   pyarrowr0   packaging.versionr   parse_versionray._private.arrow_utilsr   $ray.air.util.tensor_extensions.utilsr   r   r   r    ray.data._internal.numpy_supportr   r   ray.utilr   ray.util.annotationsr   r   ray.util.commonr   r+   r,   9MIN_PYARROW_VERSION_CHUNKED_ARRAY_TO_NUMPY_ZERO_COPY_ONLYrP  	getLoggerr%   rK   r3   r   r-   r)   r_   r1   r5   rf   rR   rF   r`   ra   TimestampTyperg   r   rb   r   r   r   r   r   ExtensionTypeABCr   r   r   ExtensionScalarr   r  r9  r;   r   r  r   r{  r   r  r  r   register_extension_typer  ArrowKeyErrorr#   r#   r#   r$   <module>   s    (
Y
7

A




 !	+  <p
 ($+