o
    $i14                     @  s   d Z ddlmZ ddlmZ ddlmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZ ddlmZ er:ddlmZmZ dddZeG dd dZdS )zCString namespace for expression operations on string-typed columns.    )annotations)	dataclass)TYPE_CHECKINGAnyCallableLiteralN)DataTypepyarrow_udf)ExprUDFExprpc_funcCallable[..., pyarrow.Array]return_dtyper   returnCallable[..., 'UDFExpr']c                   s   d fdd	}|S )a  Helper to create a string UDF that wraps a PyArrow compute function.

    This helper handles all types of PyArrow compute operations:
    - Unary operations (no args): upper(), lower(), reverse()
    - Pattern operations (pattern + args): starts_with(), contains()
    - Multi-argument operations: replace(), replace_slice()

    Args:
        pc_func: PyArrow compute function that takes (array, *positional, **kwargs)
        return_dtype: The return data type

    Returns:
        A callable that creates UDFExpr instances
    exprr   
positionalr   kwargsr   	'UDFExpr'c                   s$   t dd fdd}|| S )Nr   arrpyarrow.Arrayr   c                   s   | gR i  S N r   )r   r   r   r   l/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/data/namespace_expressions/string_namespace.pyudf%      z-_create_str_udf.<locals>.wrapper.<locals>.udfr   r   r   r   r	   )r   r   r   r   r   r   )r   r   r   wrapper$   s   z _create_str_udf.<locals>.wrapperN)r   r   r   r   r   r   r   r   r   )r   r   r!   r   r    r   _create_str_udf   s   r"   c                   @  s  e Zd ZU dZded< dnddZdndd	Zdnd
dZdnddZdnddZ	dnddZ
dnddZdnddZdnddZdnddZdnddZdnddZdnddZdnd d!Zdnd"d#Zdnd$d%Zdnd&d'Zdnd(d)Zdod/d0Zdod1d2Zdod3d4Zdod5d6Zdod7d8Zdod9d:Zdod;d<Zdod=d>Zdod?d@ZdndAdBZ dpdCdDZ!dqdFdGZ"dqdHdIZ#drdMdNZ$dodOdPZ%dodQdRZ&dpdSdTZ'dodUdVZ(dsdXdYZ)	Zdtdud]d^Z*dvdwdbdcZ+dvdwdddeZ,dvdwdfdgZ-	Z	hdxdydldmZ.d_S )z_StringNamespacea  Namespace for string operations on expression columns.

    This namespace provides methods for operating on string-typed columns using
    PyArrow compute functions.

    Example:
        >>> from ray.data.expressions import col
        >>> # Convert to uppercase
        >>> expr = col("name").str.upper()
        >>> # Get string length
        >>> expr = col("name").str.len()
        >>> # Check if string starts with a prefix
        >>> expr = col("name").str.starts_with("A")
    r   _exprr   r   c                 C     t tjt | jS )z,Get the length of each string in characters.)r"   pcutf8_lengthr   int32r$   selfr   r   r   lenB   r   z_StringNamespace.lenc                 C  r%   )z'Get the length of each string in bytes.)r"   r&   binary_lengthr   r(   r$   r)   r   r   r   byte_lenF   r   z_StringNamespace.byte_lenc                 C  r%   )zConvert strings to uppercase.)r"   r&   
utf8_upperr   stringr$   r)   r   r   r   upperK   r   z_StringNamespace.upperc                 C  r%   )zConvert strings to lowercase.)r"   r&   
utf8_lowerr   r/   r$   r)   r   r   r   lowerO   r   z_StringNamespace.lowerc                 C  r%   )z.Capitalize the first character of each string.)r"   r&   utf8_capitalizer   r/   r$   r)   r   r   r   
capitalizeS   r   z_StringNamespace.capitalizec                 C  r%   )zConvert strings to title case.)r"   r&   
utf8_titler   r/   r$   r)   r   r   r   titleW   r   z_StringNamespace.titlec                 C  r%   )z Swap the case of each character.)r"   r&   utf8_swapcaser   r/   r$   r)   r   r   r   swapcase[   r   z_StringNamespace.swapcasec                 C  r%   )z4Check if strings contain only alphabetic characters.)r"   r&   utf8_is_alphar   boolr$   r)   r   r   r   is_alpha`   r   z_StringNamespace.is_alphac                 C  r%   )z6Check if strings contain only alphanumeric characters.)r"   r&   utf8_is_alnumr   r:   r$   r)   r   r   r   is_alnumd   r   z_StringNamespace.is_alnumc                 C  r%   )z%Check if strings contain only digits.)r"   r&   utf8_is_digitr   r:   r$   r)   r   r   r   is_digith   r   z_StringNamespace.is_digitc                 C  r%   )z1Check if strings contain only decimal characters.)r"   r&   utf8_is_decimalr   r:   r$   r)   r   r   r   
is_decimall   r   z_StringNamespace.is_decimalc                 C  r%   )z1Check if strings contain only numeric characters.)r"   r&   utf8_is_numericr   r:   r$   r)   r   r   r   
is_numericp   r   z_StringNamespace.is_numericc                 C  r%   )z)Check if strings contain only whitespace.)r"   r&   utf8_is_spacer   r:   r$   r)   r   r   r   is_spacet   r   z_StringNamespace.is_spacec                 C  r%   )zCheck if strings are lowercase.)r"   r&   utf8_is_lowerr   r:   r$   r)   r   r   r   is_lowerx   r   z_StringNamespace.is_lowerc                 C  r%   )zCheck if strings are uppercase.)r"   r&   utf8_is_upperr   r:   r$   r)   r   r   r   is_upper|   r   z_StringNamespace.is_upperc                 C  r%   )z!Check if strings are title-cased.)r"   r&   utf8_is_titler   r:   r$   r)   r   r   r   is_title   r   z_StringNamespace.is_titlec                 C  r%   )z3Check if strings contain only printable characters.)r"   r&   utf8_is_printabler   r:   r$   r)   r   r   r   is_printable   r   z_StringNamespace.is_printablec                 C  r%   )z/Check if strings contain only ASCII characters.)r"   r&   string_is_asciir   r:   r$   r)   r   r   r   is_ascii   r   z_StringNamespace.is_asciipatternstrargsr   r   c                 O  &   t tjt | j|g|R i |S )z&Check if strings start with a pattern.)r"   r&   starts_withr   r:   r$   r*   rP   rR   r   r   r   r   rT         z_StringNamespace.starts_withc                 O  rS   )z$Check if strings end with a pattern.)r"   r&   	ends_withr   r:   r$   rU   r   r   r   rW      rV   z_StringNamespace.ends_withc                 O  rS   )z%Check if strings contain a substring.)r"   r&   match_substringr   r:   r$   rU   r   r   r   contains   rV   z_StringNamespace.containsc                 O  rS   )z)Match strings against a SQL LIKE pattern.)r"   r&   
match_liker   r:   r$   rU   r   r   r   match   rV   z_StringNamespace.matchc                 O  rS   )z)Find the first occurrence of a substring.)r"   r&   find_substringr   r(   r$   rU   r   r   r   find   rV   z_StringNamespace.findc                 O  rS   )z!Count occurrences of a substring.)r"   r&   count_substringr   r(   r$   rU   r   r   r   count   rV   z_StringNamespace.countc                 O  rS   )z3Find the first occurrence matching a regex pattern.)r"   r&   find_substring_regexr   r(   r$   rU   r   r   r   
find_regex   rV   z_StringNamespace.find_regexc                 O  rS   )z+Count occurrences matching a regex pattern.)r"   r&   count_substring_regexr   r(   r$   rU   r   r   r   count_regex   rV   z_StringNamespace.count_regexc                 O  rS   )z'Check if strings match a regex pattern.)r"   r&   match_substring_regexr   r:   r$   rU   r   r   r   match_regex   rV   z_StringNamespace.match_regexc                 C  r%   )zReverse each string.)r"   r&   utf8_reverser   r/   r$   r)   r   r   r   reverse   r   z_StringNamespace.reversec                 O  s$   t tjt | jg|R i |S )z"Slice strings by codeunit indices.)r"   r&   utf8_slice_codeunitsr   r/   r$   r*   rR   r   r   r   r   slice      z_StringNamespace.slicereplacementc                 O  (   t tjt | j||g|R i |S )z#Replace occurrences of a substring.)r"   r&   replace_substringr   r/   r$   r*   rP   rl   rR   r   r   r   r   replace      z_StringNamespace.replacec                 O  rm   )z-Replace occurrences matching a regex pattern.)r"   r&   replace_substring_regexr   r/   r$   ro   r   r   r   replace_regex   rq   z_StringNamespace.replace_regexstartintstopc                 O  s*   t tjt | j|||g|R i |S )zReplace a slice with a string.)r"   r&   binary_replace_slicer   r/   r$   )r*   rt   rv   rl   rR   r   r   r   r   replace_slice   s   
z_StringNamespace.replace_slicec                 O  &   t tjtt| j|g|R i |S )zSplit strings by a pattern.)r"   r&   split_patternr   objectr$   rU   r   r   r   split   rV   z_StringNamespace.splitc                 O  ry   )z!Split strings by a regex pattern.)r"   r&   split_pattern_regexr   r{   r$   rU   r   r   r   split_regex   rV   z_StringNamespace.split_regexc                 O  s$   t tjtt| jg|R i |S )zSplit strings on whitespace.)r"   r&   utf8_split_whitespacer   r{   r$   ri   r   r   r   split_whitespace   rk   z!_StringNamespace.split_whitespacec                 O  rS   )z-Extract a substring matching a regex pattern.)r"   r&   extract_regexr   r/   r$   rU   r   r   r   extract   rV   z_StringNamespace.extractnc                 O  rS   )zRepeat each string n times.)r"   r&   binary_repeatr   r/   r$   )r*   r   rR   r   r   r   r   repeat   rV   z_StringNamespace.repeat widthpaddingc                 O  rm   )z)Center strings in a field of given width.)r"   r&   utf8_centerr   r/   r$   )r*   r   r   rR   r   r   r   r   center  rq   z_StringNamespace.centerN
characters
str | Nonec                   &   t t dd fdd}|| jS )	zRemove leading and trailing whitespace or specified characters.

        Args:
            characters: Characters to remove. If None, removes whitespace.

        Returns:
            UDFExpr that strips characters from both ends.
        r   r   r   r   c                        d u r	t | S t j|  dS Nr   )r&   utf8_trim_whitespace	utf8_trimr   r   r   r   
_str_strip     
z*_StringNamespace.strip.<locals>._str_stripNr   r
   r   r/   r$   )r*   r   r   r   r   r   strip     

z_StringNamespace.stripc                   r   )	zRemove leading whitespace or specified characters.

        Args:
            characters: Characters to remove. If None, removes whitespace.

        Returns:
            UDFExpr that strips characters from the left.
        r   r   r   r   c                   r   r   )r&   utf8_ltrim_whitespace
utf8_ltrimr   r   r   r   _str_lstrip*  r   z,_StringNamespace.lstrip.<locals>._str_lstripNr   r   )r*   r   r   r   r   r   lstrip   r   z_StringNamespace.lstripc                   r   )	zRemove trailing whitespace or specified characters.

        Args:
            characters: Characters to remove. If None, removes whitespace.

        Returns:
            UDFExpr that strips characters from the right.
        r   r   r   r   c                   r   r   )r&   utf8_rtrim_whitespace
utf8_rtrimr   r   r   r   _str_rstrip=  r   z,_StringNamespace.rstrip.<locals>._str_rstripNr   r   )r*   r   r   r   r   r   rstrip3  r   z_StringNamespace.rstriprightfillcharside Literal['left', 'right', 'both']c                   s*   t t dd fdd}|| jS )	a  Pad strings to a specified width.

        Args:
            width: Target width.
            fillchar: Character to use for padding.
            side: "left", "right", or "both" for padding side.

        Returns:
            UDFExpr that pads strings.
        r   r   r   r   c                   sP   dkrt j|  dS dkrt j|  dS dkr$t j|  dS td)Nr   )r   r   leftbothz'side must be 'left', 'right', or 'both')r&   	utf8_rpad	utf8_lpadr   
ValueErrorr   r   r   r   r   r   _str_padX  s   z&_StringNamespace.pad.<locals>._str_padNr   r   )r*   r   r   r   r   r   r   r   padG  s   

z_StringNamespace.pad)r   r   )rP   rQ   rR   r   r   r   r   r   )rR   r   r   r   r   r   )
rP   rQ   rl   rQ   rR   r   r   r   r   r   )rt   ru   rv   ru   rl   rQ   rR   r   r   r   r   r   )r   ru   rR   r   r   r   r   r   )r   )
r   ru   r   rQ   rR   r   r   r   r   r   r   )r   r   r   r   )r   r   )r   ru   r   rQ   r   r   r   r   )/__name__
__module____qualname____doc____annotations__r+   r-   r0   r2   r4   r6   r8   r;   r=   r?   rA   rC   rE   rG   rI   rK   rM   rO   rT   rW   rY   r[   r]   r_   ra   rc   re   rg   rj   rp   rs   rx   r|   r~   r   r   r   r   r   r   r   r   r   r   r   r   r#   .   s`   
 




































	r#   )r   r   r   r   r   r   )r   
__future__r   dataclassesr   typingr   r   r   r   pyarrowpyarrow.computecomputer&   ray.data.datatyper   ray.data.expressionsr
   r   r   r"   r#   r   r   r   r   <module>   s    
