o
    `۷i                     @  s  d dl mZ d dlZd dlmZmZ d dlmZmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d d	lm Z m!Z! erxd d
l"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ edZ,ed Z-ee-ee, f Z.e ddG dd de
Z/G dd deee, Z0G dd de0d Z1e ddeddG dd deZ2e ddeddddG d d! d!e2Z3e ddeddddG d"d# d#e2Z4e ddeddddG d$d% d%e2Z5e ddeddddG d&d' d'e2Z6eddG d(d) d)Z7G d*d+ d+Z8e ddeddddG d,d- d-e2Z9d\d4d5Z:e!ddd]d6d7Z;d^d8d9Z<e!ddd]d:d;Z=	d_d`d@dAZ>e ddeddddG dBdC dCe2Z?e ddeddddG dDdE dEe2Z@e ddeddddG dFdG dGe2ZAe!dHddadKdLZBe!dHddbdOdPZCe dddcdQdRZDe!ddddSdddWdXZEg dYZFdedZd[ZGdS )f    )annotationsN)ABCabstractmethod)	dataclassfield)Enum)TYPE_CHECKINGAnyCallableDictGenericListOptionalTupleTypeTypeVarUnion)BatchColumn)DataType)DeveloperAPI	PublicAPI_ArrayNamespace_DatetimeNamespace_ListNamespace_StringNamespace_StructNamespaceT).UDFExpralpha)	stabilityc                   @  s\   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdS )	Operationa  Enumeration of supported operations in expressions.

    This enum defines all the binary operations that can be performed
    between expressions, including arithmetic, comparison, and boolean operations.

    Attributes:
        ADD: Addition operation (+)
        SUB: Subtraction operation (-)
        MUL: Multiplication operation (*)
        DIV: Division operation (/)
        FLOORDIV: Floor division operation (//)
        GT: Greater than comparison (>)
        LT: Less than comparison (<)
        GE: Greater than or equal comparison (>=)
        LE: Less than or equal comparison (<=)
        EQ: Equality comparison (==)
        NE: Not equal comparison (!=)
        AND: Logical AND operation (&)
        OR: Logical OR operation (|)
        NOT: Logical NOT operation (~)
        IS_NULL: Check if value is null
        IS_NOT_NULL: Check if value is not null
        IN: Check if value is in a list
        NOT_IN: Check if value is not in a list
    addsubmuldivmodfloordivgtltgeleeqneandornotis_nullis_not_nullinnot_inN)__name__
__module____qualname____doc__ADDSUBMULDIVMODFLOORDIVGTLTGELEEQNEANDORNOTIS_NULLIS_NOT_NULLINNOT_IN rP   rP   J/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/data/expressions.pyr%   )   s*    r%   c                   @  s   e Zd ZdZd!ddZed"d	d
Zed#ddZed$ddZed%ddZ	ed&ddZ
ed'ddZed(ddZed)ddZd S )*_ExprVisitorz<Base visitor with generic dispatch for Ray Data expressions.expr'Expr'returnr!   c                 C  s   t |tr
| |S t |tr| |S t |tr| |S t |tr(| |S t |t	r2| 
|S t |tr<| |S t |trF| |S t |trP| |S tdt| )Nz,Unsupported expression type for conversion: )
isinstance
ColumnExprvisit_columnLiteralExprvisit_literal
BinaryExprvisit_binary	UnaryExprvisit_unary	AliasExprvisit_aliasr"   	visit_udfDownloadExprvisit_downloadStarExpr
visit_star	TypeErrortypeselfrS   rP   rP   rQ   visit]   s"   















z_ExprVisitor.visit'ColumnExpr'c                 C     d S NrP   rh   rP   rP   rQ   rX   q      z_ExprVisitor.visit_column'LiteralExpr'c                 C  rl   rm   rP   rh   rP   rP   rQ   rZ   u   rn   z_ExprVisitor.visit_literal'BinaryExpr'c                 C  rl   rm   rP   rh   rP   rP   rQ   r\   y   rn   z_ExprVisitor.visit_binary'UnaryExpr'c                 C  rl   rm   rP   rh   rP   rP   rQ   r^   }   rn   z_ExprVisitor.visit_unary'AliasExpr'c                 C  rl   rm   rP   rh   rP   rP   rQ   r`      rn   z_ExprVisitor.visit_alias	'UDFExpr'c                 C  rl   rm   rP   rh   rP   rP   rQ   ra      rn   z_ExprVisitor.visit_udf
'StarExpr'c                 C  rl   rm   rP   rh   rP   rP   rQ   re      rn   z_ExprVisitor.visit_star'DownloadExpr'c                 C  rl   rm   rP   rh   rP   rP   rQ   rc      rn   z_ExprVisitor.visit_downloadN)rS   rT   rU   r!   )rS   rk   rU   r!   )rS   ro   rU   r!   )rS   rp   rU   r!   )rS   rq   rU   r!   )rS   rr   rU   r!   )rS   rs   rU   r!   )rS   rt   rU   r!   )rS   ru   rU   r!   )r9   r:   r;   r<   rj   r   rX   rZ   r\   r^   r`   ra   re   rc   rP   rP   rP   rQ   rR   Z   s&    
rR   c                   @  s`   e Zd ZdZdddZdd	d
Zd ddZd!ddZd"ddZd#ddZ	d$ddZ
d%ddZdS )&_PyArrowExpressionVisitorzJVisitor that converts Ray Data expressions to PyArrow compute expressions.rS   rk   rU   'pyarrow.compute.Expression'c                 C     t |jS rm   )pcr   namerh   rP   rP   rQ   rX         z&_PyArrowExpressionVisitor.visit_columnro   c                 C  rx   rm   )ry   scalarvaluerh   rP   rP   rQ   rZ      r{   z'_PyArrowExpressionVisitor.visit_literalrp   c                 C  s   dd l }|jtjtjfv rM| |j}t|jt	r.|jj
}t|tr'||n||g}ntdt|jj dt||}|jtjkrKt|S |S | |j}| |j}ddlm} |j|v rl||j ||S td|j )Nr   zLis_in/not_in operations require the right operand to be a literal list, got ._ARROW_EXPR_OPS_MAPz*Unsupported binary operation for PyArrow: )pyarrowopr%   rN   rO   rj   leftrV   rightrY   r}   listarray
ValueErrorrg   r9   ry   is_ininvert?ray.data._internal.planner.plan_expression.expression_evaluatorr   )ri   rS   par   right_valuer   resultr   rP   rP   rQ   r\      s,   


z&_PyArrowExpressionVisitor.visit_binaryrq   c                 C  s@   |  |j}ddlm} |j|v r||j |S td|j )Nr   r   z)Unsupported unary operation for PyArrow: )rj   operandr   r   r   r   )ri   rS   r   r   rP   rP   rQ   r^      s
   
z%_PyArrowExpressionVisitor.visit_unaryrr   c                 C  s   |  |jS rm   )rj   rS   rh   rP   rP   rQ   r`      r{   z%_PyArrowExpressionVisitor.visit_aliasrs   c                 C     t d)Nz:UDF expressions cannot be converted to PyArrow expressionsrf   rh   rP   rP   rQ   ra         z#_PyArrowExpressionVisitor.visit_udfru   c                 C  r   )Nz?Download expressions cannot be converted to PyArrow expressionsr   rh   rP   rP   rQ   rc      s   z(_PyArrowExpressionVisitor.visit_downloadrt   c                 C  r   )Nz;Star expressions cannot be converted to PyArrow expressionsr   rh   rP   rP   rQ   re      r   z$_PyArrowExpressionVisitor.visit_starN)rS   rk   rU   rw   )rS   ro   rU   rw   )rS   rp   rU   rw   )rS   rq   rU   rw   )rS   rr   rU   rw   )rS   rs   rU   rw   )rS   ru   rU   rw   )rS   rt   rU   rw   )r9   r:   r;   r<   rX   rZ   r\   r^   r`   ra   rc   re   rP   rP   rP   rQ   rv      s    







rv   zpyarrow.compute.ExpressionT)frozenc                   @  s\  e Zd ZU dZded< edddZedddZdddZ	dddZ
dddZdddZdddZdddZdddZdd d!Zdd"d#Zdd$d%Zdd&d'Zdd(d)Zdd*d+Zdd,d-Zdd.d/Zdd0d1Zdd2d3Zdd4d5Zdd6d7Zdd8d9Zdd:d;Zdd<d=Zdd>d?Zdd@dAZ ddBdCZ!ddDdEZ"ddHdIZ#ddJdKZ$ddMdNZ%ddPdQZ&ddRdSZ'ddTdUZ(ddVdWZ)ddXdYZ*ddZd[Z+dd\d]Z,dd^d_Z-dd`daZ.ddbdcZ/ddddeZ0ddfdgZ1ddhdiZ2ddjdkZ3ddldmZ4ddndoZ5ddqdrZ6ddsdtZ7eddvdwZ8eddydzZ9edd|d}Z:edddZ;edddZ<dddZ=dS )Expras  Base class for all expression nodes.

    This is the abstract base class that all expression types inherit from.
    It provides operator overloads for building complex expressions using
    standard Python operators.

    Expressions form a tree structure where each node represents an operation
    or value. The tree can be evaluated against data batches to compute results.

    Example:
        >>> from ray.data.expressions import col, lit
        >>> # Create an expression tree: (col("x") + 5) * col("y")
        >>> expr = (col("x") + lit(5)) * col("y")
        >>> # This creates a BinaryExpr with operation=MUL
        >>> # left=BinaryExpr(op=ADD, left=ColumnExpr("x"), right=LiteralExpr(5))
        >>> # right=ColumnExpr("y")

    Note:
        This class should not be instantiated directly. Use the concrete
        subclasses like ColumnExpr, LiteralExpr, etc.
    r   	data_typerU   
str | Nonec                 C  s   dS )zGet the name associated with this expression.

        Returns:
            The name for expressions that have one (ColumnExpr, AliasExpr),
            None otherwise.
        NrP   ri   rP   rP   rQ   rz      s   z	Expr.nameotherr	   boolc                 C  s   t )z4Compare two expression ASTs for structural equality.)NotImplementedErrorri   r   rP   rP   rQ   structurally_equals   s   zExpr.structurally_equalsrw   c                 C  s   t  | S )ad  Convert this Ray Data expression to a PyArrow compute expression.

        Returns:
            A PyArrow compute expression equivalent to this Ray Data expression.

        Raises:
            ValueError: If the expression contains operations not supported by PyArrow.
            TypeError: If the expression type cannot be converted to PyArrow.
        )rv   rj   r   rP   rP   rQ   
to_pyarrow   s   
zExpr.to_pyarrowstrc                 C  s   ddl m} | | S )u8  Return a tree-structured string representation of the expression.

        Returns:
            A multi-line string showing the expression tree structure using
            box-drawing characters for visual clarity.

        Example:
            >>> from ray.data.expressions import col, lit
            >>> expr = (col("x") + lit(5)) * col("y")
            >>> print(expr)
            MUL
                ├── left: ADD
                │   ├── left: COL('x')
                │   └── right: LIT(5)
                └── right: COL('y')
        r   )_TreeReprVisitor)>ray.data._internal.planner.plan_expression.expression_visitorsr   rj   )ri   r   rP   rP   rQ   __repr__  s   zExpr.__repr__r   r%   rT   c                 C  s   t |ts	t|}t|| |S )ai  Create a binary expression with the given operation.

        Args:
            other: The right operand expression or literal value
            op: The operation to perform

        Returns:
            A new BinaryExpr representing the operation

        Note:
            If other is not an Expr, it will be automatically converted to a LiteralExpr.
        )rV   r   rY   r[   )ri   r   r   rP   rP   rQ   _bin  s   
z	Expr._binc                 C     |  |tjS )zAddition operator (+).)r   r%   r=   r   rP   rP   rQ   __add__4     zExpr.__add__c                 C     t || tjS )z/Reverse addition operator (for literal + expr).)rY   r   r%   r=   r   rP   rP   rQ   __radd__8     zExpr.__radd__c                 C  r   )zSubtraction operator (-).)r   r%   r>   r   rP   rP   rQ   __sub__<  r   zExpr.__sub__c                 C  r   )z2Reverse subtraction operator (for literal - expr).)rY   r   r%   r>   r   rP   rP   rQ   __rsub__@  r   zExpr.__rsub__c                 C  r   )zMultiplication operator (*).)r   r%   r?   r   rP   rP   rQ   __mul__D  r   zExpr.__mul__c                 C  r   )z5Reverse multiplication operator (for literal * expr).)rY   r   r%   r?   r   rP   rP   rQ   __rmul__H  r   zExpr.__rmul__c                 C  r   zModulation operator (%).)r   r%   rA   r   rP   rP   rQ   __mod__L  r   zExpr.__mod__c                 C  r   r   )rY   r   r%   rA   r   rP   rP   rQ   __rmod__P  r   zExpr.__rmod__c                 C  r   )zDivision operator (/).)r   r%   r@   r   rP   rP   rQ   __truediv__T  r   zExpr.__truediv__c                 C  r   )z/Reverse division operator (for literal / expr).)rY   r   r%   r@   r   rP   rP   rQ   __rtruediv__X  r   zExpr.__rtruediv__c                 C  r   )zFloor division operator (//).)r   r%   rB   r   rP   rP   rQ   __floordiv__\  r   zExpr.__floordiv__c                 C  r   )z6Reverse floor division operator (for literal // expr).)rY   r   r%   rB   r   rP   rP   rQ   __rfloordiv__`  r   zExpr.__rfloordiv__c                 C  r   )zGreater than operator (>).)r   r%   rC   r   rP   rP   rQ   __gt__e  r   zExpr.__gt__c                 C  r   )zLess than operator (<).)r   r%   rD   r   rP   rP   rQ   __lt__i  r   zExpr.__lt__c                 C  r   )z$Greater than or equal operator (>=).)r   r%   rE   r   rP   rP   rQ   __ge__m  r   zExpr.__ge__c                 C  r   )z!Less than or equal operator (<=).)r   r%   rF   r   rP   rP   rQ   __le__q  r   zExpr.__le__c                 C  r   )zEquality operator (==).)r   r%   rG   r   rP   rP   rQ   __eq__u  r   zExpr.__eq__c                 C  r   )zNot equal operator (!=).)r   r%   rH   r   rP   rP   rQ   __ne__y  r   zExpr.__ne__c                 C  r   )zLogical AND operator (&).)r   r%   rI   r   rP   rP   rQ   __and__~  r   zExpr.__and__c                 C  r   )zLogical OR operator (|).)r   r%   rJ   r   rP   rP   rQ   __or__  r   zExpr.__or__c                 C     t tj| S )zLogical NOT operator (~).)r]   r%   rK   r   rP   rP   rQ   
__invert__     zExpr.__invert__c                 C  r   )z&Check if the expression value is null.)r]   r%   rL   r   rP   rP   rQ   r5     r   zExpr.is_nullc                 C  r   )z*Check if the expression value is not null.)r]   r%   rM   r   rP   rP   rQ   r6     r   zExpr.is_not_nullvaluesUnion[List[Any], 'Expr']c                 C      t |ts	t|}| |tjS )z5Check if the expression value is in a list of values.)rV   r   rY   r   r%   rN   ri   r   rP   rP   rQ   r        
z
Expr.is_inc                 C  r   )z9Check if the expression value is not in a list of values.)rV   r   rY   r   r%   rO   r   rP   rP   rQ   r8     r   zExpr.not_inrz   c                 C     t | j| |ddS )a  Rename the expression.

        This method allows you to assign a new name to an expression result.
        This is particularly useful when you want to specify the output column name
        directly within the expression rather than as a separate parameter.

        Args:
            name: The new name for the expression

        Returns:
            An AliasExpr that wraps this expression with the specified name

        Example:
            >>> from ray.data.expressions import col, lit
            >>> # Create an expression with a new aliased name
            >>> expr = (col("price") * col("quantity")).alias("total")
            >>> # Can be used with Dataset operations that support named expressions
        F)r   rS   _name
_is_renamer_   r   ri   rz   rP   rP   rQ   alias  s   
z
Expr.aliasrs   c                 C     t tj| S )z'Round values up to the nearest integer.)_create_pyarrow_compute_udfry   ceilr   rP   rP   rQ   r     r   z	Expr.ceilc                 C  r   )z)Round values down to the nearest integer.)r   ry   floorr   rP   rP   rQ   r     r   z
Expr.floorc                 C  r   )z<Round values to the nearest integer using PyArrow semantics.)r   ry   roundr   rP   rP   rQ   r     r   z
Expr.roundc                 C  r   )z'Truncate fractional values toward zero.)r   ry   truncr   rP   rP   rQ   r     r   z
Expr.truncc                 C     t tjt d| S )z0Compute the natural logarithm of the expression.return_dtype)r   ry   lnr   float64r   rP   rP   rQ   r        zExpr.lnc                 C  r   )z0Compute the base-10 logarithm of the expression.r   )r   ry   log10r   r   r   rP   rP   rQ   r        z
Expr.log10c                 C  r   )z/Compute the base-2 logarithm of the expression.r   )r   ry   log2r   r   r   rP   rP   rQ   r     r   z	Expr.log2c                 C  r   )z2Compute the natural exponential of the expression.r   )r   ry   expr   r   r   rP   rP   rQ   r     r   zExpr.expc                 C  r   )z0Compute the sine of the expression (in radians).r   )r   ry   sinr   r   r   rP   rP   rQ   r     r   zExpr.sinc                 C  r   )z2Compute the cosine of the expression (in radians).r   )r   ry   cosr   r   r   rP   rP   rQ   r     r   zExpr.cosc                 C  r   )z3Compute the tangent of the expression (in radians).r   )r   ry   tanr   r   r   rP   rP   rQ   r     r   zExpr.tanc                 C  r   )zHCompute the arcsine (inverse sine) of the expression, returning radians.r   )r   ry   asinr   r   r   rP   rP   rQ   r     r   z	Expr.asinc                 C  r   )zLCompute the arccosine (inverse cosine) of the expression, returning radians.r   )r   ry   acosr   r   r   rP   rP   rQ   r     r   z	Expr.acosc                 C  r   )zNCompute the arctangent (inverse tangent) of the expression, returning radians.r   )r   ry   atanr   r   r   rP   rP   rQ   r     r   z	Expr.atanc                 C  r   )a  Compute the negation of the expression.

        Returns:
            A UDFExpr that computes the negation (multiplies values by -1).

        Example:
            >>> from ray.data.expressions import col
            >>> import ray
            >>> ds = ray.data.from_items([{"x": 5}, {"x": -3}])
            >>> ds = ds.with_column("neg_x", col("x").negate())
            >>> # Result: neg_x = [-5, 3]
        )r   ry   negate_checkedr   rP   rP   rQ   negate     zExpr.negatec                 C  r   )a  Compute the sign of the expression.

        Returns:
            A UDFExpr that returns -1 for negative values, 0 for zero, and 1 for positive values.

        Example:
            >>> from ray.data.expressions import col
            >>> import ray
            >>> ds = ray.data.from_items([{"x": 5}, {"x": -3}, {"x": 0}])
            >>> ds = ds.with_column("sign_x", col("x").sign())
            >>> # Result: sign_x = [1, -1, 0]
        )r   ry   signr   rP   rP   rQ   r     r   z	Expr.signexponentc                 C  s   t tj| |S )aO  Raise the expression to the given power.

        Args:
            exponent: The exponent to raise the expression to.

        Returns:
            A UDFExpr that computes the power operation.

        Example:
            >>> from ray.data.expressions import col, lit
            >>> import ray
            >>> ds = ray.data.from_items([{"x": 2}, {"x": 3}])
            >>> ds = ds.with_column("x_squared", col("x").power(2))
            >>> # Result: x_squared = [4, 9]
            >>> ds = ds.with_column("x_cubed", col("x").power(3))
            >>> # Result: x_cubed = [8, 27]
        )r   ry   power)ri   r   rP   rP   rQ   r   "  s   z
Expr.powerc                 C  r   )a  Compute the absolute value of the expression.

        Returns:
            A UDFExpr that computes the absolute value.

        Example:
            >>> from ray.data.expressions import col
            >>> import ray
            >>> ds = ray.data.from_items([{"x": 5}, {"x": -3}])
            >>> ds = ds.with_column("abs_x", col("x").abs())
            >>> # Result: abs_x = [5, 3]
        )r   ry   abs_checkedr   rP   rP   rQ   abs6  r   zExpr.abs'_ArrayNamespace'c                 C     ddl m} || S )z,Access array operations for this expression.r   r   ),ray.data.namespace_expressions.arr_namespacer   )ri   r   rP   rP   rQ   arrE     zExpr.arr'_ListNamespace'c                 C  r   )a  Access list operations for this expression.

        Returns:
            A _ListNamespace that provides list-specific operations for both
            PyArrow ``List`` and ``FixedSizeList`` columns.

        Example:
            >>> from ray.data.expressions import col
            >>> import ray
            >>> ds = ray.data.from_items([
            ...     {"items": [1, 2, 3]},
            ...     {"items": [4, 5]}
            ... ])
            >>> ds = ds.with_column("num_items", col("items").list.len())
            >>> ds = ds.with_column("first_item", col("items").list[0])
            >>> ds = ds.with_column("slice", col("items").list[1:3])
        r   r   )-ray.data.namespace_expressions.list_namespacer   )ri   r   rP   rP   rQ   r   L  s   z	Expr.list'_StringNamespace'c                 C  r   )a`  Access string operations for this expression.

        Returns:
            A _StringNamespace that provides string-specific operations.

        Example:
            >>> from ray.data.expressions import col
            >>> import ray
            >>> ds = ray.data.from_items([
            ...     {"name": "Alice"},
            ...     {"name": "Bob"}
            ... ])
            >>> ds = ds.with_column("upper_name", col("name").str.upper())
            >>> ds = ds.with_column("name_len", col("name").str.len())
            >>> ds = ds.with_column("starts_a", col("name").str.starts_with("A"))
        r   r   )/ray.data.namespace_expressions.string_namespacer   )ri   r   rP   rP   rQ   r   c  s   zExpr.str'_StructNamespace'c                 C  r   )a  Access struct operations for this expression.

        Returns:
            A _StructNamespace that provides struct-specific operations.

        Example:
            >>> from ray.data.expressions import col
            >>> import ray
            >>> import pyarrow as pa
            >>> ds = ray.data.from_arrow(pa.table({
            ...     "user": pa.array([
            ...         {"name": "Alice", "age": 30}
            ...     ], type=pa.struct([
            ...         pa.field("name", pa.string()),
            ...         pa.field("age", pa.int32())
            ...     ]))
            ... }))
            >>> ds = ds.with_column("age", col("user").struct["age"])  # doctest: +SKIP
        r   r   )/ray.data.namespace_expressions.struct_namespacer    )ri   r    rP   rP   rQ   structy  s   zExpr.struct'_DatetimeNamespace'c                 C  r   )z/Access datetime operations for this expression.r   r   )+ray.data.namespace_expressions.dt_namespacer   )ri   r   rP   rP   rQ   dt  r   zExpr.dtc                 C  s   | S rm   rP   r   rP   rP   rQ   _unalias  s   zExpr._unaliasN)rU   r   r   r	   rU   r   )rU   rw   rU   r   )r   r	   r   r%   rU   rT   )r   r	   rU   rT   )r   r	   rU   rT   )r   r   rU   rT   rz   r   rU   rT   )rU   rs   )r   r	   rU   rs   )rU   r   )rU   r   )rU   r   )rU   r   )rU   r   )>r9   r:   r;   r<   __annotations__propertyrz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r5   r6   r   r8   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rP   rP   rP   rQ   r      s   
 	














































r   F)r   r0   reprc                   @  sT   e Zd ZU dZded< edd ddZded	< edddZdddZ	dddZ
dS )rW   a  Expression that references a column by name.

    This expression type represents a reference to an existing column
    in the dataset. When evaluated, it returns the values from the
    specified column.

    Args:
        name: The name of the column to reference

    Example:
        >>> from ray.data.expressions import col
        >>> # Reference the "age" column
        >>> age_expr = col("age") # Creates ColumnExpr(name="age")
    r   r   c                   C     t tS rm   r   objectrP   rP   rP   rQ   <lambda>      zColumnExpr.<lambda>Fdefault_factoryinitr   r   rU   c                 C     | j S )zGet the column name.r   r   rP   rP   rQ   rz        zColumnExpr.namerz   c                 C  r   )NT)r   r   r   rP   rP   rQ   _rename  s   zColumnExpr._renamer   r	   r   c                 C     t |to
| j|jkS rm   )rV   rW   rz   r   rP   rP   rQ   r     s   zColumnExpr.structurally_equalsNr   rz   r   r   )r9   r:   r;   r<   r   r   r   r   rz   r	  r   rP   rP   rP   rQ   rW     s   
 
rW   c                   @  s>   e Zd ZU dZded< eddZded< dd	 ZdddZdS )rY   aQ  Expression that represents a constant scalar value.

    This expression type represents a literal value that will be broadcast
    to all rows when evaluated. The value can be any Python object.

    Args:
        value: The constant value to represent

    Example:
        >>> from ray.data.expressions import lit
        >>> import numpy as np
        >>> # Create a literal value
        >>> five = lit(5) # Creates LiteralExpr(value=5)
        >>> name = lit("John") # Creates LiteralExpr(value="John")
        >>> numpy_val = lit(np.int32(42)) # Creates LiteralExpr with numpy type
    r	   r}   F)r  r   r   c                 C  s   t | j}t| d| d S )Nr   )r   infer_dtyper}   r   __setattr__)ri   inferred_dtyperP   rP   rQ   __post_init__  s   zLiteralExpr.__post_init__r   rU   r   c                 C  s*   t |to| j|jkot| jt|ju S rm   )rV   rY   r}   rg   r   rP   rP   rQ   r     s
   

zLiteralExpr.structurally_equalsNr   )	r9   r:   r;   r<   r   r   r   r  r   rP   rP   rP   rQ   rY     s   
 rY   c                   @  sL   e Zd ZU dZded< ded< ded< edd d	d
Zded< dddZdS )r[   a  Expression that represents a binary operation between two expressions.

    This expression type represents an operation with two operands (left and right).
    The operation is specified by the `op` field, which must be one of the
    supported operations from the Operation enum.

    Args:
        op: The operation to perform (from Operation enum)
        left: The left operand expression
        right: The right operand expression

    Example:
        >>> from ray.data.expressions import col, lit, Operation
        >>> # Manually create a binary expression (usually done via operators)
        >>> expr = BinaryExpr(Operation.ADD, col("x"), lit(5))
        >>> # This is equivalent to: col("x") + lit(5)
    r%   r   r   r   r   c                   C  r   rm   r   rP   rP   rP   rQ   r    r  zBinaryExpr.<lambda>Fr  r   r   r   r	   rU   r   c                 C  s2   t |to| j|ju o| j|jo| j|jS rm   )rV   r[   r   r   r   r   r   rP   rP   rQ   r     s   

zBinaryExpr.structurally_equalsNr   r9   r:   r;   r<   r   r   r   r   rP   rP   rP   rQ   r[     s   
 r[   c                   @  sD   e Zd ZU dZded< ded< edd dd	Zd
ed< dddZdS )r]   ag  Expression that represents a unary operation on a single expression.

    This expression type represents an operation with one operand.
    Common unary operations include logical NOT, IS NULL, IS NOT NULL, etc.

    Args:
        op: The operation to perform (from Operation enum)
        operand: The operand expression

    Example:
        >>> from ray.data.expressions import col
        >>> # Check if a column is null
        >>> expr = col("age").is_null()  # Creates UnaryExpr(IS_NULL, col("age"))
        >>> # Logical not
        >>> expr = ~(col("active"))  # Creates UnaryExpr(NOT, col("active"))
    r%   r   r   r   c                   C     t  S rm   )r   r   rP   rP   rP   rQ   r  "  r  zUnaryExpr.<lambda>Fr  r   r   r   r	   rU   r   c                 C  s$   t |to| j|ju o| j|jS rm   )rV   r]   r   r   r   r   rP   rP   rQ   r   $  s
   

zUnaryExpr.structurally_equalsNr   r  rP   rP   rP   rQ   r]     s   
 r]   c                   @  s`   e Zd ZU dZded< dZded< eedZded	< ed
dddZ	ded< dd Z
dddZd
S )_CallableClassSpeca  Specification for a callable class UDF.

    This dataclass captures the class type and constructor arguments needed
    to instantiate a callable class UDF on an actor. It consolidates the
    callable class metadata that was previously spread across multiple fields.

    Attributes:
        cls: The original callable class type
        args: Positional arguments for the constructor
        kwargs: Keyword arguments for the constructor
        _cached_key: Pre-computed key that survives serialization
    rg   clsrP   Tuple[Any, ...]args)r  Dict[str, Any]kwargsNF)defaultcomparer   zOptional[Tuple]_cached_keyc                 C  s   | j du rA| jj d| jj }z|| jtt| j f}t	| W n t
y7   |t| jt| jf}Y nw t| d| dS dS )zPre-compute and cache the key at construction time.

        This ensures the same key survives serialization, since the cached
        key tuple (containing the already-computed repr strings) gets pickled
        and unpickled as-is.
        Nr~   r  )r  r  r:   r;   r  tuplesortedr  itemshashrf   r   r   r  )ri   class_idkeyrP   rP   rQ   r  @  s   
z _CallableClassSpec.__post_init__rU   r   c                 C  r  )a  Return the pre-computed hashable key for UDF instance lookup.

        The key uniquely identifies a UDF by its class and constructor arguments.
        This ensures that the same class with different constructor args
        (e.g., Multiplier(2) vs Multiplier(3)) are treated as distinct UDFs.

        Returns:
            A hashable tuple that uniquely identifies this UDF configuration.
        )r  r   rP   rP   rQ   make_keyW  s   
z_CallableClassSpec.make_key)rU   r   )r9   r:   r;   r<   r   r  r   dictr  r  r  r!  rP   rP   rP   rQ   r  ,  s   
 r  c                   @  sJ   e Zd ZdZdd
dZedddZ edddZdddZd ddZdS )!_CallableClassUDFa  A wrapper that makes callable class UDFs appear as regular functions.

    This class wraps callable class UDFs for use in expressions. It provides
    an `init()` method that should be called at actor startup via `init_fn`
    to instantiate the underlying class before any blocks are processed.

    Key responsibilities:
    1. Store the callable class and constructor arguments
    2. Provide init() for actor startup initialization
    3. Handle async bridging for coroutine/async generator UDFs
    4. Reuse the same instance across all calls (actor semantics)

    Example:
        >>> @udf(return_dtype=DataType.int32())
        ... class AddOffset:
        ...     def __init__(self, offset=1):
        ...         self.offset = offset
        ...     def __call__(self, x):
        ...         return pc.add(x, self.offset)
        >>>
        >>> add_five = AddOffset(5)  # Creates _CallableClassUDF internally
        >>> expr = add_five(col("value"))  # Creates UDFExpr with fn=_CallableClassUDF
    r  rg   	ctor_argsr  ctor_kwargsr  r   r   c                 C  s2   || _ || _|| _|| _d| _t|||d| _dS )a!  Initialize the _CallableClassUDF wrapper.

        Args:
            cls: The original callable class
            ctor_args: Constructor positional arguments
            ctor_kwargs: Constructor keyword arguments
            return_dtype: The return data type for schema inference
        N)r  r  r  )_cls
_ctor_args_ctor_kwargs_return_dtype	_instancer  _callable_class_spec)ri   r  r$  r%  r   rP   rP   rQ   __init__}  s   z_CallableClassUDF.__init__rU   r   c                 C  s   | j jS )z2Return the original class name for error messages.)r&  r9   r   rP   rP   rQ   r9     s   z_CallableClassUDF.__name__r  c                 C  r  )zReturn the callable class spec for this UDF.

        Used for deduplication when the same UDF appears multiple times
        in an expression tree.
        )r+  r   rP   rP   rQ   callable_class_spec  s   z%_CallableClassUDF.callable_class_specNonec                 C  s(   | j du r| j| ji | j| _ dS dS )zInitialize the UDF instance. Called at actor startup via init_fn.

        This ensures the callable class is instantiated before any blocks
        are processed, matching the behavior of map_batches callable classes.
        N)r*  r&  r'  r(  r   rP   rP   rQ   r    s   
z_CallableClassUDF.initr  r	   r  c                 O  sB   | j du rtd| jj dddlm} || j g|R i |S )aM  Call the UDF instance.

        Args:
            *args: Evaluated expression arguments (PyArrow arrays, etc.)
            **kwargs: Evaluated expression keyword arguments

        Returns:
            The result of calling the UDF instance

        Raises:
            RuntimeError: If init() was not called before __call__
        Nz_CallableClassUDF 'zr' was not initialized. init() must be called before __call__. This typically happens via init_fn at actor startup.r   )$_call_udf_instance_with_async_bridge)r*  RuntimeErrorr&  r9   ray.data.util.expression_utilsr/  )ri   r  r  r/  rP   rP   rQ   __call__  s   
z_CallableClassUDF.__call__N)r  rg   r$  r  r%  r  r   r   r   )rU   r  )rU   r.  )r  r	   r  r	   rU   r	   )	r9   r:   r;   r<   r,  r   r-  r  r2  rP   rP   rP   rQ   r#  d  s    

	r#  c                   @  sB   e Zd ZU dZded< ded< ded< edd
dZdddZdS )r"   a  Expression that represents a user-defined function call.

    This expression type wraps a UDF with schema inference capabilities,
    allowing UDFs to be used seamlessly within the expression system.

    UDFs operate on batches of data, where each column argument is passed
    as a PyArrow Array containing multiple values from that column across the batch.

    Args:
        fn: The user-defined function to call. For callable classes, this is an
            _CallableClassUDF instance that handles lazy instantiation internally.
        args: List of argument expressions (positional arguments)
        kwargs: Dictionary of keyword argument expressions

    Example:
        >>> from ray.data.expressions import col, udf
        >>> import pyarrow as pa
        >>> import pyarrow.compute as pc
        >>> from ray.data.datatype import DataType
        >>>
        >>> @udf(return_dtype=DataType.int32())
        ... def add_one(x: pa.Array) -> pa.Array:
        ...     return pc.add(x, 1)
        >>>
        >>> # Use in expressions
        >>> expr = add_one(col("value"))

        >>> # Callable class example
        >>> @udf(return_dtype=DataType.int32())
        ... class AddOffset:
        ...     def __init__(self, offset=1):
        ...         self.offset = offset
        ...     def __call__(self, x: pa.Array) -> pa.Array:
        ...         return pc.add(x, self.offset)
        >>>
        >>> # Use callable class
        >>> add_five = AddOffset(5)
        >>> expr = add_five(col("value"))
    Callable[..., BatchColumn]fnz
List[Expr]r  zDict[str, Expr]r  rU   Optional[_CallableClassSpec]c                 C  s   t | jtr
| jjS dS )zReturn callable_class_spec if fn is an _CallableClassUDF, else None.

        This property maintains backward compatibility with code that checks
        for callable_class_spec.
        N)rV   r4  r#  r-  r   rP   rP   rQ   r-    s   zUDFExpr.callable_class_specr   r	   r   c                   s   t  tsdS t jtr t  jtsdS jj jjkrdS nj jkr(dS tjt jkoXtdd tj jD oXj	
  j	
 koXt fddj	
 D S )NFc                 s  s    | ]
\}}| |V  qd S rm   )r   ).0abrP   rP   rQ   	<genexpr>  s    z.UDFExpr.structurally_equals.<locals>.<genexpr>c                 3  s&    | ]}j |  j | V  qd S rm   )r  r   )r6  kr   ri   rP   rQ   r9    s
    
)rV   r"   r4  r#  r-  lenr  allzipr  keysr   rP   r;  rQ   r     s&   
zUDFExpr.structurally_equalsN)rU   r5  r   )r9   r:   r;   r<   r   r   r-  r   rP   rP   rP   rQ   r"     s   
 (
r"   r4  r3  r   r   rU   Callable[..., UDFExpr]c                   s&   d fdd}t |   |_|S )aw  Create a callable that generates UDFExpr when called with expressions.

    Args:
        fn: The user-defined function to wrap. Can be a regular function
            or an _CallableClassUDF instance (for callable classes).
        return_dtype: The return data type of the UDF

    Returns:
        A callable that creates UDFExpr instances when called with expressions
    rU   r"   c                    sx   g }| D ]}t |tr|| q|t| qi }| D ]\}}t |tr-|||< qt|||< qt ||dS )N)r4  r  r  r   )rV   r   appendrY   r  r"   )r  r  	expr_argsargexpr_kwargsr:  vr4  r   rP   rQ   udf_callable-  s    


z*_create_udf_callable.<locals>.udf_callableN)rU   r"   )	functoolsupdate_wrapper_original_fn)r4  r   rG  rP   rF  rQ   _create_udf_callable  s   rK  c                      d fdd}|S )	a
  
    Decorator to convert a UDF into an expression-compatible function.

    This decorator allows UDFs to be used seamlessly within the expression system,
    enabling schema inference and integration with other expressions.

    IMPORTANT: UDFs operate on batches of data, not individual rows. When your UDF
    is called, each column argument will be passed as a PyArrow Array containing
    multiple values from that column across the batch. Under the hood, when working
    with multiple columns, they get translated to PyArrow arrays (one array per column).

    Args:
        return_dtype: The data type of the return value of the UDF

    Returns:
        A callable that creates UDFExpr instances when called with expressions

    Example:
        >>> from ray.data.expressions import col, udf
        >>> import pyarrow as pa
        >>> import pyarrow.compute as pc
        >>> import ray
        >>>
        >>> # UDF that operates on a batch of values (PyArrow Array)
        >>> @udf(return_dtype=DataType.int32())
        ... def add_one(x: pa.Array) -> pa.Array:
        ...     return pc.add(x, 1)  # Vectorized operation on the entire Array
        >>>
        >>> # UDF that combines multiple columns (each as a PyArrow Array)
        >>> @udf(return_dtype=DataType.string())
        ... def format_name(first: pa.Array, last: pa.Array) -> pa.Array:
        ...     return pc.binary_join_element_wise(first, last, " ")  # Vectorized string concatenation
        >>>
        >>> # Callable class UDF
        >>> @udf(return_dtype=DataType.int32())
        ... class AddOffset:
        ...     def __init__(self, offset=1):
        ...         self.offset = offset
        ...     def __call__(self, x: pa.Array) -> pa.Array:
        ...         return pc.add(x, self.offset)
        >>>
        >>> # Use in dataset operations
        >>> ds = ray.data.from_items([
        ...     {"value": 5, "first": "John", "last": "Doe"},
        ...     {"value": 10, "first": "Jane", "last": "Smith"}
        ... ])
        >>>
        >>> # Single column transformation (operates on batches)
        >>> ds_incremented = ds.with_column("value_plus_one", add_one(col("value")))
        >>>
        >>> # Multi-column transformation (each column becomes a PyArrow Array)
        >>> ds_formatted = ds.with_column("full_name", format_name(col("first"), col("last")))
        >>>
        >>> # Callable class usage
        >>> add_five = AddOffset(5)
        >>> ds_with_offset = ds.with_column("value_plus_five", add_five(col("value")))
        >>>
        >>> # Can also be used in complex expressions
        >>> ds_complex = ds.with_column("doubled_plus_one", add_one(col("value")) * 2)
    func_or_class*Union[Callable[..., BatchColumn], Type[T]]rU   	Decoratedc                   sN   t  tr"t tr"G  fddd} j|_ j|_ j|_|S t S )Nc                      s*   e Zd ZdZ fddZfddZdS )z<udf.<locals>.decorator.<locals>.ExpressionAwareCallableClassa  Intercepts callable class instantiation to delay until actor execution.

                Allows natural syntax like:
                    add_five = AddOffset(5)
                    ds.with_column("result", add_five(col("x")))

                When instantiated, creates an _CallableClassUDF that is completely
                self-contained - it handles lazy instantiation and async bridging
                internally. From the planner's perspective, this is just a regular
                callable function.
                c                   s   t  ||d| _d S )N)r  r$  r%  r   )r#  	_expr_udf)ri   r  r  rM  r   rP   rQ   r,    s   zEudf.<locals>.decorator.<locals>.ExpressionAwareCallableClass.__init__c                   s   t | j |i |S rm   )rK  rP  )ri   	call_argscall_kwargsr   rP   rQ   r2    s   zEudf.<locals>.decorator.<locals>.ExpressionAwareCallableClass.__call__N)r9   r:   r;   r<   r,  r2  rP   rQ  rP   rQ   ExpressionAwareCallableClass  s    rT  )rV   rg   
issubclassr
   r9   r;   r:   rK  )rM  rT  r   )rM  rQ   	decorator  s   !
zudf.<locals>.decoratorN)rM  rN  rU   rO  rP   r   rV  rP   r   rQ   udfM  s   ?2rX  c                   s   t   fdd}|S )a  Wrap a PyArrow compute function to auto-convert inputs to PyArrow format.

    This wrapper ensures that pandas Series and numpy arrays are converted to
    PyArrow Arrays before being passed to the function, enabling PyArrow compute
    functions to work seamlessly with any block format.

    Args:
        fn: The PyArrow compute function to wrap

    Returns:
        A wrapped function that handles format conversion
    c                    s   dd l  dd ldd l fddfdd| D }fdd| D }dd |D }d	d | D }td
d |D pKtd
d | D }|i |}|rbt|jjfrb|	 }|S )Nr   c                   sX   t | jjfr| dfS t | jrj| dfS t |  jr(| dfS | dfS )z+Convert a value to PyArrow Array if needed.FT)rV   ArrayChunkedArraySeriesfrom_pandasndarrayr   )val)npr   pdrP   rQ   to_arrow  s   z@_create_pyarrow_wrapper.<locals>.arrow_wrapper.<locals>.to_arrowc                   s   g | ]} |qS rP   rP   )r6  rC  ra  rP   rQ   
<listcomp>      zB_create_pyarrow_wrapper.<locals>.arrow_wrapper.<locals>.<listcomp>c                   s   i | ]	\}}| |qS rP   rP   r6  r:  rE  rb  rP   rQ   
<dictcomp>      zB_create_pyarrow_wrapper.<locals>.arrow_wrapper.<locals>.<dictcomp>c                 S  s   g | ]}|d  qS r   rP   r6  rE  rP   rP   rQ   rc    rd  c                 S  s   i | ]	\}}||d  qS rh  rP   re  rP   rP   rQ   rf    rg  c                 s  s    | ]}|d  V  qdS )   NrP   ri  rP   rP   rQ   r9    s    zA_create_pyarrow_wrapper.<locals>.arrow_wrapper.<locals>.<genexpr>)
numpypandasr   r  anyr   rV   rY  rZ  	to_pandas)r  r  args_resultskwargs_resultsconverted_argsconverted_kwargsinput_was_pandasr   r4  )r_  r   r`  ra  rQ   arrow_wrapper  s   z._create_pyarrow_wrapper.<locals>.arrow_wrapper)rH  wraps)r4  ru  rP   rt  rQ   _create_pyarrow_wrapper  s   #rw  c                   rL  )	aE  Decorator for PyArrow compute functions with automatic format conversion.

    This decorator wraps PyArrow compute functions to automatically convert pandas
    Series and numpy arrays to PyArrow Arrays, ensuring the function works seamlessly
    regardless of the underlying block format (pandas, arrow, or items).

    Used internally by namespace methods (list, str, struct) that wrap PyArrow
    compute functions.

    Args:
        return_dtype: The data type of the return value

    Returns:
        A callable that creates UDFExpr instances with automatic conversion
    funcr3  rU   r@  c                   s   t | }t| S rm   )rw  rK  )rx  
wrapped_fnr   rP   rQ   rV  
  s   
zpyarrow_udf.<locals>.decoratorN)rx  r3  rU   r@  rP   rW  rP   r   rQ   pyarrow_udf  s   rz  pc_funcCallable[..., pyarrow.Array]DataType | NoneCallable[..., 'UDFExpr']c                   s   d fdd	}|S )z>Create an expression UDF backed by a PyArrow compute function.rS   rT   
positionalr	   r  rU   rs   c                   s*   t p| jdd fdd}|| S )Nr   r   pyarrow.ArrayrU   c                   s   | gR i  S rm   rP   )r   )r  r{  r  rP   rQ   rX    r   z9_create_pyarrow_compute_udf.<locals>.wrapper.<locals>.udf)r   r  rU   r  )rz  r   )rS   r  r  rX  r{  r   )r  r  rQ   wrapper  s   z,_create_pyarrow_compute_udf.<locals>.wrapperN)rS   rT   r  r	   r  r	   rU   rs   rP   )r{  r   r  rP   r  rQ   r     s   r   c                   @  sH   e Zd ZU dZded< dZded< edd d	d
Zded< dddZdS )rb   z0Expression that represents a download operation.r   uri_column_nameNz'pyarrow.fs.FileSystem'
filesystemc                   C  r  rm   )r   binaryrP   rP   rP   rQ   r  *  r  zDownloadExpr.<lambda>Fr  r   r   r   r	   rU   r   c                 C  r
  rm   )rV   rb   r  r   rP   rP   rQ   r   ,  s   

z DownloadExpr.structurally_equalsr   )	r9   r:   r;   r<   r   r  r   r   r   rP   rP   rP   rQ   rb   #  s   
 rb   c                   @  sV   e Zd ZU dZded< ded< ded< edd	d
ZdddZdddZdddZ	dS )r_   z6Expression that represents an alias for an expression.r   rS   r   r   r   r   rU   c                 C  r  )zGet the alias name.r  r   rP   rP   rQ   rz   <  r  zAliasExpr.namerz   rT   c                 C  s   t | jj| j|| jdS )N)r   r   )r_   rS   r   r   r   rP   rP   rQ   r   A  s   zAliasExpr.aliasc                 C  r  rm   )rS   r   rP   rP   rQ   r   G  s   zAliasExpr._unaliasr   r	   c                 C  s0   t |to| j|jo| j|jko| j| jkS rm   )rV   r_   rS   r   rz   r   r   rP   rP   rQ   r   J  s   


zAliasExpr.structurally_equalsNr   r   r   r   )
r9   r:   r;   r<   r   r   rz   r   r   r   rP   rP   rP   rQ   r_   3  s   
 

r_   c                   @  s4   e Zd ZU dZedd ddZded< dddZdS )rd   a  Expression that represents all columns from the input.

    This is a special expression used in projections to indicate that
    all existing columns should be preserved at this position in the output.
    It's typically used internally by operations like with_column() and
    rename_columns() to maintain existing columns.

    Example:
        When with_column("new_col", expr) is called, it creates:
        Project(exprs=[star(), expr.alias("new_col")])

        This means: keep all existing columns, then add/overwrite "new_col"
    c                   C  r   rm   r   rP   rP   rP   rQ   r  e  r  zStarExpr.<lambda>Fr  r   r   r   r	   rU   r   c                 C  s
   t |tS rm   )rV   rd   r   rP   rP   rQ   r   g  s   
zStarExpr.structurally_equalsNr   )r9   r:   r;   r<   r   r   r   r   rP   rP   rP   rQ   rd   S  s   
 rd   betarz   r   c                 C     t | S )a  
    Reference an existing column by name.

    This is the primary way to reference columns in expressions.
    The returned expression will extract values from the specified
    column when evaluated.

    Args:
        name: The name of the column to reference

    Returns:
        A ColumnExpr that references the specified column

    Example:
        >>> from ray.data.expressions import col
        >>> # Reference columns in an expression
        >>> expr = col("price") * col("quantity")
        >>>
        >>> # Use with Dataset.with_column()
        >>> import ray
        >>> ds = ray.data.from_items([{"price": 10, "quantity": 2}])
        >>> ds = ds.with_column("total", col("price") * col("quantity"))
    )rW   )rz   rP   rP   rQ   colk  s   r  r}   r	   c                 C  r  )a  
    Create a literal expression from a constant value.

    This creates an expression that represents a constant scalar value.
    The value will be broadcast to all rows when the expression is evaluated.

    Args:
        value: The constant value to represent. Can be any Python object
               (int, float, str, bool, etc.)

    Returns:
        A LiteralExpr containing the specified value

    Example:
        >>> from ray.data.expressions import col, lit
        >>> # Create literals of different types
        >>> five = lit(5)
        >>> pi = lit(3.14159)
        >>> name = lit("Alice")
        >>> flag = lit(True)
        >>>
        >>> # Use in expressions
        >>> expr = col("age") + lit(1) # Add 1 to age column
        >>>
        >>> # Use with Dataset.with_column()
        >>> import ray
        >>> ds = ray.data.from_items([{"age": 25}, {"age": 30}])
        >>> ds = ds.with_column("age_plus_one", col("age") + lit(1))
    )rY   )r}   rP   rP   rQ   lit  s   r  c                   C  s   t  S )a9  
    References all input columns from the input.

    This is a special expression used in projections to preserve all
    existing columns. It's typically used with operations that want to
    add or modify columns while keeping the rest.

    Returns:
        A StarExpr that represents all input columns.
    )rd   rP   rP   rP   rQ   star  s   r  )r  r  r  !Optional['pyarrow.fs.FileSystem']c                C  s   t | |dS )a  
    Create a download expression that downloads content from URIs.

    This creates an expression that will download bytes from URIs stored in
    a specified column. When evaluated, it will fetch the content from each URI
    and return the downloaded bytes.

    Args:
        uri_column_name: The name of the column containing URIs to download from
        filesystem: PyArrow filesystem to use for reading remote files.
            If None, the filesystem is auto-detected from the path scheme.
    Returns:
        A DownloadExpr that will download content from the specified URI column

    Example:
        >>> from ray.data.expressions import download
        >>> import ray
        >>> # Create dataset with URIs
        >>> ds = ray.data.from_items([
        ...     {"uri": "s3://bucket/file1.jpg", "id": "1"},
        ...     {"uri": "s3://bucket/file2.jpg", "id": "2"}
        ... ])
        >>> # Add downloaded bytes column
        >>> ds_with_bytes = ds.with_column("bytes", download("uri"))
    r  r  )rb   r  rP   rP   rQ   download  s   r  )r%   r   rW   rY   r[   r]   r"   rb   r_   rd   rz  rX  r  r  r  r  r   r   r   r    r   c                 C  s   | dkrddl m} |S | dkrddlm} |S | dkr$ddlm} |S | dkr0dd	lm} |S | d
kr<ddlm	} |S t
dtd| )z;Lazy import of namespace classes to avoid circular imports.r   r   r   r   r   r   r   r    r   r   r   zmodule z has no attribute )r   r   r   r   r   r   r   r    r   r   AttributeErrorr9   )rz   r   r   r   r    r   rP   rP   rQ   __getattr__  s    r  )r4  r3  r   r   rU   r@  )r   r   rU   r@  )r4  r3  rU   r3  rm   )r{  r|  r   r}  rU   r~  )rz   r   rU   rW   )r}   r	   rU   rY   )rU   rd   )r  r   r  r  rU   rb   r  )H
__future__r   rH  abcr   r   dataclassesr   r   enumr   typingr   r	   r
   r   r   r   r   r   r   r   r   r   pyarrow.computecomputery   ray.data.blockr   ray.data.datatyper   ray.util.annotationsr   r   r   r   r   r   r   r   r   r   r   r    r!   UDFCallablerO  r%   rR   rv   r   rW   rY   r[   r]   r  r#  r"   rK  rX  rw  rz  r   rb   r_   rd   r  r  r  r  __all__r  rP   rP   rP   rQ   <module>   s    408@   L$""7f
R/
s7"(