o
    bi0                  	   @   s  d dl Z d dlmZ d dlmZmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZmZ er4d dlZzd dlZW n eyE   dZY nw dadd ZeG d	d
 d
eeZeG dd deeZ	d(dededdfddZ	d(dddededefddZe	d(dedefddZe	d(dddedefddZdedee	jeee	jf f fddZ de	jdedee	j f fdd Z!de	jde	jfd!d"Z"d)d$d%Z#d)d&d'Z$dS )*    N)Enum)TYPE_CHECKINGDictListUnion)TENSOR_COLUMN_NAME)DataBatchType)
DeprecatedDeveloperAPIc                  C   s   t d u r
dd l} | a t S )Nr   )_pandaspandas)r    r   V/home/ubuntu/.local/lib/python3.10/site-packages/ray/air/util/data_batch_conversion.py_lazy_import_pandas   s   r   c                   @   s   e Zd ZdZdZdZdS )BatchFormatr   arrownumpyN)__name__
__module____qualname__PANDASARROWNUMPYr   r   r   r   r   "   s    r   c                   @   s   e Zd ZdZdZdZdZdS )BlockFormatz#Internal Dataset block format enum.r   r   simpleN)r   r   r   __doc__r   r   SIMPLEr   r   r   r   r   *   s
    r   Fdatacast_tensor_columnsreturnpd.DataFramec                 C   s   t  }t| tjr|tt| i} nPt| trBi }|  D ]\}}t|tjs5t	dt
| d| dt|||< q||} n!tdurQt| tjrQ|  } nt| |jsct	dt
|  dt |rit| } | S )a  Convert the provided data to a Pandas DataFrame.

    Args:
        data: Data of type DataBatchType
        cast_tensor_columns: Whether tensor columns should be cast to NumPy ndarrays.

    Returns:
        A pandas Dataframe representation of the input data.

    GAll values in the provided dict must be of type np.ndarray. Found type 	 for key 	 instead.NReceived data of type: , but expected it to be one of )r   
isinstancenpndarray	DataFramer   _ndarray_to_columndictitems
ValueErrortypepyarrowTable	to_pandasr    _cast_tensor_columns_to_ndarrays)r   r   pdtensor_dictcol_namecolr   r   r   _convert_batch_type_to_pandas3   s6   

r7   r.   c                 C   s   |rt | } |tjkr| S |tjkr5t| jdkr$| jdddf  S i }| D ]
}| |  ||< q(|S |tjkrFt	s@t
dt	j| S t
d| dt )`  Convert the provided Pandas dataframe to the provided ``type``.

    Args:
        data: A Pandas DataFrame
        type: The specific ``BatchFormat`` to convert to.
        cast_tensor_columns: Whether tensor columns should be cast to our tensor
            extension type.

    Returns:
        The input data represented with the provided type.
       Nr   z|Attempted to convert data to Pyarrow Table but Pyarrow is not installed. Please do `pip install pyarrow` to install Pyarrow.zReceived type r%   ))_cast_ndarray_columns_to_tensor_extensionr   r   r   lencolumnsilocto_numpyr   r/   r-   r0   from_pandasr   )r   r.   r   output_dictcolumnr   r   r   _convert_pandas_to_batch_type\   s(   


rB   c                 C   s   t dt t| |dS )a5  Convert the provided data to a Pandas DataFrame.

    This API is deprecated from Ray 2.4.

    Args:
        data: Data of type DataBatchType
        cast_tensor_columns: Whether tensor columns should be cast to NumPy ndarrays.

    Returns:
        A pandas Dataframe representation of the input data.

    z`convert_batch_type_to_pandas` is deprecated as a developer API starting from Ray 2.4. All batch format conversions should be done manually instead of relying on this API.r   r   )warningswarnPendingDeprecationWarningr7   rC   r   r   r   convert_batch_type_to_pandas   s   rG   c                 C   s   t dt t| ||dS )r8   z`convert_pandas_to_batch_type` is deprecated as a developer API starting from Ray 2.4. All batch format conversions should be done manually instead of relying on this API.r   r.   r   )rD   rE   rF   rB   rH   r   r   r   convert_pandas_to_batch_type   s   rI   c           	      C   s"  t  }t| tjr| S t| tr.|  D ]\}}t|tjs+tdt| d| dq| S tduryt| tj	ryddl
m} ddlm} g }| jD ]}||}||j|dd	 qI| }| jtgkrqt| jjd |rq|d S tt| j|S t| |jrt| tjS td
t|  dt )zConvert the provided data to a NumPy ndarray or dict of ndarrays.

    Args:
        data: Data of type DataBatchType

    Returns:
        A numpy representation of the input data.
    r!   r"   r#   Nr   ),get_arrow_extension_fixed_shape_tensor_types)transform_pyarrowF)zero_copy_onlyr$   r%   )r   r&   r'   r(   r+   r,   r-   r.   r/   r0   $ray.air.util.tensor_extensions.arrowrJ   ray.data._internal.arrow_opsrK   r<   combine_chunked_arrayappendr>   column_namesr   schematypeszipr)   rB   r   r   r   )	r   r3   r5   r6   rJ   rK   column_values_ndarrayscombined_arrayarrow_fixed_shape_tensor_typesr   r   r   _convert_batch_type_to_numpy   sJ   


rX   arrz	pd.Seriesc                 C   s.   t  }z|| W S  ty   t|  Y S w )zConvert a NumPy ndarray into an appropriate column format for insertion into a
    pandas DataFrame.

    If conversion to a pandas Series fails (e.g. if the ndarray is multi-dimensional),
    fall back to a list of NumPy ndarrays.
    )r   Seriesr-   list)rY   r3   r   r   r   r*      s   r*   c                 C   sB   | j jtju rztdd | D } W | S  ty   Y | S w | S )zUnwrap an object-dtyped NumPy ndarray containing ndarray pointers into a single
    contiguous ndarray, if needed/possible.
    c                 S   s   g | ]}t |qS r   )r'   asarray).0vr   r   r   
<listcomp>  s    z9_unwrap_ndarray_object_type_if_needed.<locals>.<listcomp>)dtyper.   r'   object_array	Exception)rY   r   r   r   %_unwrap_ndarray_object_type_if_needed  s   rd   dfc                 C   s   t  }z|jjj}W n ty   |jj}Y nw ddlm}m} | 	 D ]H\}}||rlz*t
  t
jdtd t
jd|d ||| |< W d   n1 sPw   Y  W q$ tyk } z	td| d|d}~ww q$| S )zY
    Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray.
    r   )TensorArraycolumn_needs_tensor_extensionignorecategoryNzTried to cast column z to the TensorArray tensor extension type but the conversion failed. To disable automatic casting to this tensor extension, set ctx = DataContext.get_current(); ctx.enable_tensor_extension_casting = False.)r   corecommonSettingWithCopyWarningAttributeErrorerrors%ray.air.util.tensor_extensions.pandasrf   rg   r,   rD   catch_warningssimplefilterFutureWarningrc   r-   )re   r3   rm   rf   rg   r5   r6   er   r   r   r:     s4   

r:   c              	   C   s   t  }z|jjj}W n ty   |jj}Y nw ddlm} |  D ]4\}}t	|j
|rVt  tjdtd tjd|d t| | |< W d   n1 sQw   Y  q"| S )z:Cast all tensor extension columns in df to NumPy ndarrays.r   )TensorDtyperh   ri   N)r   rk   rl   rm   rn   ro   rp   ru   r,   r&   r`   rD   rq   rr   rs   r[   r>   )re   r3   rm   ru   r5   r6   r   r   r   r2   I  s    
r2   )F)re   r    r   r    )%rD   enumr   typingr   r   r   r   r   r'   ray.air.constantsr   ray.air.data_batch_typer   ray.util.annotationsr	   r
   r   r3   r/   ImportErrorr   r   strr   r   boolr7   rB   rG   rI   r(   rX   r*   rd   r:   r2   r   r   r   r   <module>   s~    	

,
/
";
-