o
    ci                     @   sV   d dl mZmZ d dlZd dlZd dlmZ d dl	m
Z
 e
ddG dd deZdS )	    )ListOptionalN)Preprocessor)	PublicAPIalpha)	stabilityc                   @   sl   e Zd ZdZdd dd dd dZdZ	dd	d
dee deee  fddZ	de
jfddZdd Zd	S )
Normalizera
  Scales each sample to have unit norm.

    This preprocessor works by dividing each sample (i.e., row) by the sample's norm.
    The general formula is given by

    .. math::

        s' = \frac{s}{\lVert s \rVert_p}

    where :math:`s` is the sample, :math:`s'` is the transformed sample,
    :math:\lVert s \rVert`, and :math:`p` is the norm type.

    The following norms are supported:

    * `"l1"` (:math:`L^1`): Sum of the absolute values.
    * `"l2"` (:math:`L^2`): Square root of the sum of the squared values.
    * `"max"` (:math:`L^\infty`): Maximum value.

    Examples:
        >>> import pandas as pd
        >>> import ray
        >>> from ray.data.preprocessors import Normalizer
        >>>
        >>> df = pd.DataFrame({"X1": [1, 1], "X2": [1, 0], "X3": [0, 1]})
        >>> ds = ray.data.from_pandas(df)  # doctest: +SKIP
        >>> ds.to_pandas()  # doctest: +SKIP
           X1  X2  X3
        0   1   1   0
        1   1   0   1

        The :math:`L^2`-norm of the first sample is :math:`\sqrt{2}`, and the
        :math:`L^2`-norm of the second sample is :math:`1`.

        >>> preprocessor = Normalizer(columns=["X1", "X2"])
        >>> preprocessor.fit_transform(ds).to_pandas()  # doctest: +SKIP
                 X1        X2  X3
        0  0.707107  0.707107   0
        1  1.000000  0.000000   1

        The :math:`L^1`-norm of the first sample is :math:`2`, and the
        :math:`L^1`-norm of the second sample is :math:`1`.

        >>> preprocessor = Normalizer(columns=["X1", "X2"], norm="l1")
        >>> preprocessor.fit_transform(ds).to_pandas()  # doctest: +SKIP
            X1   X2  X3
        0  0.5  0.5   0
        1  1.0  0.0   1

        The :math:`L^\infty`-norm of the both samples is :math:`1`.

        >>> preprocessor = Normalizer(columns=["X1", "X2"], norm="max")
        >>> preprocessor.fit_transform(ds).to_pandas()  # doctest: +SKIP
            X1   X2  X3
        0  1.0  1.0   0
        1  1.0  0.0   1

        :class:`Normalizer` can also be used in append mode by providing the
        name of the output_columns that should hold the normalized values.

        >>> preprocessor = Normalizer(columns=["X1", "X2"], output_columns=["X1_normalized", "X2_normalized"])
        >>> preprocessor.fit_transform(ds).to_pandas()  # doctest: +SKIP
           X1  X2  X3  X1_normalized  X2_normalized
        0   1   1   0       0.707107       0.707107
        1   1   0   1       1.000000       0.000000

    Args:
        columns: The columns to scale. For each row, these colmumns are scaled to
            unit-norm.
        norm: The norm to use. The supported values are ``"l1"``, ``"l2"``, or
            ``"max"``. Defaults to ``"l2"``.
        output_columns: The names of the transformed columns. If None, the transformed
            columns will be the same as the input columns. If not None, the length of
            ``output_columns`` must match the length of ``columns``, othwerwise an error
            will be raised.

    Raises:
        ValueError: if ``norm`` is not ``"l1"``, ``"l2"``, or ``"max"``.
    c                 C   s   t | jddS N   axis)npabssumcols r   U/home/ubuntu/.local/lib/python3.10/site-packages/ray/data/preprocessors/normalizer.py<lambda>\       zNormalizer.<lambda>c                 C   s   t t | djddS )N   r
   r   )r   sqrtpowerr   r   r   r   r   r   ]   s    c                 C   s   t jt| ddS r	   )r   maxr   r   r   r   r   r   ^   r   )l1l2r   Fr   N)output_columnscolumnsr   c                C   sB   || _ || _|| jvrtd| d| j  t||| _d S )NzNorm z( is not supported.Supported values are: )r   norm	_norm_fns
ValueErrorkeysr   #_derive_and_validate_output_columnsr   )selfr   r   r   r   r   r   __init__c   s   

zNormalizer.__init__dfc                 C   s<   |j d d | jf }| j| j |}|j|dd|| j< |S )Nr   r   )locr   r   r   divr   )r#   r%   r   column_normsr   r   r   _transform_pandasw   s   zNormalizer._transform_pandasc                 C   s&   | j j d| jd| jd| jdS )Nz	(columns=z, norm=z, output_columns=))	__class____name__r   r   r   )r#   r   r   r   __repr__~   s   zNormalizer.__repr__)r   )r,   
__module____qualname____doc__r   _is_fittabler   strr   r$   pd	DataFramer)   r-   r   r   r   r   r   
   s"    P

r   )typingr   r   numpyr   pandasr3   ray.data.preprocessorr   ray.util.annotationsr   r   r   r   r   r   <module>   s    