o
    `۷i)                  	   @   sX  d dl Z d dlmZ d dlmZmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZ er2d dlZzd dlZW n eyC   dZY nw dadd ZeG d	d
 d
eeZ	d"dededdfddZ	d"dddededefddZdedee	jeee	jf f fddZde	jdedee	j f fddZde	jde	jfddZd#ddZd#d d!Z dS )$    N)Enum)TYPE_CHECKINGDictListUnion)DataBatchType)TENSOR_COLUMN_NAME)DeveloperAPIc                  C   s   t d u r
dd l} | a t S )Nr   )_pandaspandas)r    r   Y/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/data/util/data_batch_conversion.py_lazy_import_pandas   s   r   c                   @   s   e Zd ZdZdZdZdS )BatchFormatr   arrownumpyN)__name__
__module____qualname__PANDASARROWNUMPYr   r   r   r   r   "   s    r   Fdatacast_tensor_columnsreturnpd.DataFramec                 C   s   t  }t| tjr|tt| i} nPt| trBi }|  D ]\}}t|tjs5t	dt
| d| dt|||< q||} n!tdurQt| tjrQ|  } nt| |jsct	dt
|  dt |rit| } | S )a  Convert the provided data to a Pandas DataFrame.

    Args:
        data: Data of type DataBatchType
        cast_tensor_columns: Whether tensor columns should be cast to NumPy ndarrays.

    Returns:
        A pandas Dataframe representation of the input data.

    GAll values in the provided dict must be of type np.ndarray. Found type 	 for key 	 instead.NReceived data of type: , but expected it to be one of )r   
isinstancenpndarray	DataFramer   _ndarray_to_columndictitems
ValueErrortypepyarrowTable	to_pandasr    _cast_tensor_columns_to_ndarrays)r   r   pdtensor_dictcol_namecolr   r   r   _convert_batch_type_to_pandas*   s6   

r2   r)   c                 C   s   |rt | } |tjkr| S |tjkr5t| jdkr$| jdddf  S i }| D ]
}| |  ||< q(|S |tjkrFt	s@t
dt	j| S t
d| dt )a`  Convert the provided Pandas dataframe to the provided ``type``.

    Args:
        data: A Pandas DataFrame
        type: The specific ``BatchFormat`` to convert to.
        cast_tensor_columns: Whether tensor columns should be cast to our tensor
            extension type.

    Returns:
        The input data represented with the provided type.
       Nr   z|Attempted to convert data to Pyarrow Table but Pyarrow is not installed. Please do `pip install pyarrow` to install Pyarrow.zReceived type r    ))_cast_ndarray_columns_to_tensor_extensionr   r   r   lencolumnsilocto_numpyr   r*   r(   r+   from_pandasr   )r   r)   r   output_dictcolumnr   r   r   _convert_pandas_to_batch_typeS   s(   


r<   c           	      C   s"  t  }t| tjr| S t| tr.|  D ]\}}t|tjs+tdt| d| dq| S tduryt| tj	ryddl
m} ddlm} g }| jD ]}||}||j|dd	 qI| }| jtgkrqt| jjd |rq|d S tt| j|S t| |jrt| tjS td
t|  dt )zConvert the provided data to a NumPy ndarray or dict of ndarrays.

    Args:
        data: Data of type DataBatchType

    Returns:
        A numpy representation of the input data.
    r   r   r   Nr   )transform_pyarrow),get_arrow_extension_fixed_shape_tensor_typesF)zero_copy_onlyr   r    )r   r!   r"   r#   r&   r'   r(   r)   r*   r+   ray.data._internal.arrow_opsr=   *ray.data._internal.tensor_extensions.arrowr>   r6   combine_chunked_arrayappendr8   column_namesr   schematypeszipr$   r<   r   r   r   )	r   r.   r0   r1   r=   r>   column_values_ndarrayscombined_arrayarrow_fixed_shape_tensor_typesr   r   r   _convert_batch_type_to_numpy   sJ   


rK   arrz	pd.Seriesc                 C   s.   t  }z|| W S  ty   t|  Y S w )zConvert a NumPy ndarray into an appropriate column format for insertion into a
    pandas DataFrame.

    If conversion to a pandas Series fails (e.g. if the ndarray is multi-dimensional),
    fall back to a list of NumPy ndarrays.
    )r   Seriesr(   list)rL   r.   r   r   r   r%      s   r%   c                 C   sB   | j jtju rztdd | D } W | S  ty   Y | S w | S )zUnwrap an object-dtyped NumPy ndarray containing ndarray pointers into a single
    contiguous ndarray, if needed/possible.
    c                 S   s   g | ]}t |qS r   )r"   asarray).0vr   r   r   
<listcomp>   s    z9_unwrap_ndarray_object_type_if_needed.<locals>.<listcomp>)dtyper)   r"   object_array	Exception)rL   r   r   r   %_unwrap_ndarray_object_type_if_needed   s   rW   dfc                 C   s   t  }z|jjj}W n ty   |jj}Y nw ddlm}m} | 	 D ]H\}}||rlz*t
  t
jdtd t
jd|d ||| |< W d   n1 sPw   Y  W q$ tyk } z	td| d|d}~ww q$| S )zY
    Cast all NumPy ndarray columns in df to our tensor extension type, TensorArray.
    r   )TensorArraycolumn_needs_tensor_extensionignorecategoryNzTried to cast column z to the TensorArray tensor extension type but the conversion failed. To disable automatic casting to this tensor extension, set ctx = DataContext.get_current(); ctx.enable_tensor_extension_casting = False.)r   corecommonSettingWithCopyWarningAttributeErrorerrors+ray.data._internal.tensor_extensions.pandasrY   rZ   r'   warningscatch_warningssimplefilterFutureWarningrV   r(   )rX   r.   r`   rY   rZ   r0   r1   er   r   r   r4      s4   

r4   c              	   C   s   t  }z|jjj}W n ty   |jj}Y nw ddlm} |  D ]4\}}t	|j
|rVt  tjdtd tjd|d t| | |< W d   n1 sQw   Y  q"| S )z:Cast all tensor extension columns in df to NumPy ndarrays.r   )TensorDtyper[   r\   N)r   r^   r_   r`   ra   rb   rc   ri   r'   r!   rS   rd   re   rf   rg   rN   r8   )rX   r.   r`   ri   r0   r1   r   r   r   r-     s    
r-   )F)rX   r   r   r   )!rd   enumr   typingr   r   r   r   r   r"   ray.air.data_batch_typer   ray.data.constantsr   ray.util.annotationsr	   r   r.   r*   ImportErrorr
   r   strr   boolr2   r<   r#   rK   r%   rW   r4   r-   r   r   r   r   <module>   sZ    		
,
.
";
-