o
    ci                     @   sX   d dl mZmZ d dlmZ d dlmZ d dlmZ er"d dl	m
Z
 G dd deZdS )	    )TYPE_CHECKINGOptional)BatchFormat)Dataset)Preprocessor)DataBatchTypec                   @   s   e Zd ZdZdd ZdefddZdedefd	d
ZdedefddZ				ddede
e de
e de
e de
e defddZdddZdd ZdefddZdS )Chaina  Combine multiple preprocessors into a single :py:class:`Preprocessor`.

    When you call ``fit``, each preprocessor is fit on the dataset produced by the
    preceeding preprocessor's ``fit_transform``.

    Example:
        >>> import pandas as pd
        >>> import ray
        >>> from ray.data.preprocessors import *
        >>>
        >>> df = pd.DataFrame({
        ...     "X0": [0, 1, 2],
        ...     "X1": [3, 4, 5],
        ...     "Y": ["orange", "blue", "orange"],
        ... })
        >>> ds = ray.data.from_pandas(df)  # doctest: +SKIP
        >>>
        >>> preprocessor = Chain(
        ...     StandardScaler(columns=["X0", "X1"]),
        ...     Concatenator(columns=["X0", "X1"], output_column_name="X"),
        ...     LabelEncoder(label_column="Y")
        ... )
        >>> preprocessor.fit_transform(ds).to_pandas()  # doctest: +SKIP
           Y                                         X
        0  1  [-1.224744871391589, -1.224744871391589]
        1  0                                [0.0, 0.0]
        2  1    [1.224744871391589, 1.224744871391589]

    Args:
        preprocessors: The preprocessors to sequentially compose.
    c                 C   s   d}d}| j D ].}| tjjkr|d7 }|d7 }q| tjjtjjfv r+|d7 }q| tjjks5J q|dkrN||krBtjjS |dkrJtjjS tjjS tjjS )Nr      )preprocessors
fit_statusr   	FitStatusFITTED
NOT_FITTEDPARTIALLY_FITTEDNOT_FITTABLE)selffittable_countfitted_countp r   P/home/ubuntu/.local/lib/python3.10/site-packages/ray/data/preprocessors/chain.pyr   ,   s&   


zChain.fit_statusr
   c                 G   s
   || _ d S N)r
   )r   r
   r   r   r   __init__D   s   
zChain.__init__dsreturnc                 C   s2   | j d d D ]}||}q| j d | | S )N)r
   fit_transformfitr   r   preprocessorr   r   r   _fitG   s   z
Chain._fitc                 C      | j D ]}||}q|S r   )r
   r   r   r   r   r   r   M      
zChain.fit_transformN
batch_sizenum_cpusmemoryconcurrencyc                 C   s$   | j D ]}|j|||||d}q|S )N)r#   r$   r%   r&   )r
   	transform)r   r   r#   r$   r%   r&   r   r   r   r   
_transformR   s   
zChain._transformdfr   c                 C   r!   r   )r
   transform_batch)r   r)   r   r   r   r   _transform_batchd   r"   zChain._transform_batchc                 C   s*   d dd | jD }| jj d| dS )Nz, c                 s   s    | ]}t |V  qd S r   )repr).0r   r   r   r   	<genexpr>j   s    z!Chain.__repr__.<locals>.<genexpr>())joinr
   	__class____name__)r   	argumentsr   r   r   __repr__i   s   zChain.__repr__c                 C   s   | j d  S )Nr   )r
   _determine_transform_to_use)r   r   r   r   r6   m   s   z!Chain._determine_transform_to_use)NNN)r)   r   r   r   )r3   
__module____qualname____doc__r   r   r   r   r    r   r   intfloatr(   r+   r5   r   r6   r   r   r   r   r      s2     	

r   N)typingr   r   "ray.air.util.data_batch_conversionr   ray.datar   ray.data.preprocessorr   ray.air.data_batch_typer   r   r   r   r   r   <module>   s    