o
    Q
i;_                     @   sn  d dl mZmZmZmZmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZP d dlQmRZR d dlSZSd dlTmUZU d dlVZVd dlWZXd dlWmYZY d dlZm[Z[ dd Z\eRd	d
Z]dd Z^dd Z_dd Z`dd Zadd Zbdd Zcdd Zddd Zeee  ef d  ZgZhd3ddZid4dddd Zjd!dd"d#d$Zkd%d& Zld5ddd'd(Zmd5ddd)d*Znd+ddd,d-d.Zod/d0 Zpd1d2 ZqdS )6    )PFunctionFunctionOptionsFunctionRegistryHashAggregateFunctionHashAggregateKernelKernelScalarAggregateFunctionScalarAggregateKernelScalarFunctionScalarKernelVectorFunctionVectorKernelArraySortOptionsAssumeTimezoneOptionsCastOptionsCountOptionsCumulativeOptionsCumulativeSumOptionsDayOfWeekOptionsDictionaryEncodeOptionsRunEndEncodeOptionsElementWiseAggregateOptionsExtractRegexOptionsExtractRegexSpanOptionsFilterOptionsIndexOptionsInversePermutationOptionsJoinOptionsListSliceOptionsListFlattenOptionsMakeStructOptionsMapLookupOptionsMatchSubstringOptionsModeOptionsNullOptions
PadOptionsPairwiseOptionsPartitionNthOptionsPivotWiderOptionsQuantileOptionsRandomOptionsRankOptionsRankQuantileOptionsReplaceSliceOptionsReplaceSubstringOptionsRoundBinaryOptionsRoundOptionsRoundTemporalOptionsRoundToMultipleOptionsScalarAggregateOptionsScatterOptionsSelectKOptionsSetLookupOptionsSkewOptionsSliceOptionsSortOptionsSplitOptionsSplitPatternOptionsStrftimeOptionsStrptimeOptionsStructFieldOptionsTakeOptionsTDigestOptionsTrimOptionsUtf8NormalizeOptionsVarianceOptionsWeekOptionsWinsorizeOptionsZeroFillOptionscall_functionfunction_registryget_functionlist_functionscall_tabular_functionregister_scalar_functionregister_tabular_functionregister_aggregate_functionregister_vector_function
UdfContext
Expression)
namedtupleN)dedent)_compute_docstrings)	docscrapec                 C   s   | j jS N)_doc	arg_names)func rZ   C/home/ubuntu/.local/lib/python3.10/site-packages/pyarrow/compute.py_get_arg_namess   s   r\   _OptionsClassDoc)paramsc                 C   s"   | j sd S t| j }t|d S )N
Parameters)__doc__rU   NumpyDocStringr]   )options_classdocrZ   rZ   r[   _scrape_options_class_docz   s   rd   c                 C   s  |j }t|j|j|j|jd| _|| _|| _g }|j	}|s/|jdkr$dnd}d|jd| }|
| d |j}|rD|
| d tj|j}	|
td	 t|}
|
D ]}|jd
v rbd}nd}|
| d| d |
d qX|d urt|}|r|jD ]}|
|j d|j d |jD ]}|
d| d qqn,td|j dt t|}|j D ]}|
td|j d|j d|j d q|
td|j d |
td |	d urt|	d}|
d| d d|| _| S )N)namearityrb   options_required   	argumentsargumentzCall compute function z with the given z.

z

z.        Parameters
        ----------
        )vectorscalar_aggregatez
Array-likezArray-like or scalar-likez : 
z"    Argument to compute function.
z    zOptions class z does not have a docstringz                z. : optional
                    Parameter for z7 constructor. Either `options`
                    or `z@` can be passed, but not both at the same time.
                z&            options : pyarrow.compute.zK, optional
                Alternative way of passing options.
            z        memory_pool : pyarrow.MemoryPool, optional
            If not passed, will allocate memory from the default memory pool.
         ) rW   dictre   rf   rb   rg   __arrow_compute_function____name____qualname__summaryappenddescriptionrT   function_doc_additionsgetrS   r\   kindrd   r^   typedescwarningswarnRuntimeWarninginspect	signature
parametersvaluesstripjoinr`   )wrapperexposed_namerY   rb   cpp_doc
doc_piecesrs   arg_strru   doc_additionrX   arg_namearg_typeoptions_class_docpsoptions_sigstrippedrZ   rZ   r[   _decorate_compute_function   sr   



r   c                 C   sF   | j j}|sd S zt | W S  ty"   td| dt Y d S w )NzPython binding for z not exposed)rW   rb   globalsKeyErrorr{   r|   r}   )rY   
class_namerZ   rZ   r[   _get_options_class   s   r   c                 C   s~   |s|r|d urt d| d||i |S |d ur=t|tr'|di |S t||r.|S t d| d| dt| d S )Nz	Function z@ called with both an 'options' argument and additional argumentsz expected a z parameter, got rZ   )	TypeError
isinstancero   ry   )re   rb   optionsargskwargsrZ   rZ   r[   _handle_options   s"   


r   c                    s@   d u rd d fdd
}|S d d d fdd
}|S )Nmemory_poolc                    sb    t urt| krt d  dt| d|r*t|d tr*tt|S |d | S )N takes  positional argument(s), but  were givenr   )Ellipsislenr   r   rQ   _calllistcall)r   r   )rf   rY   	func_namerZ   r[   r      s   z&_make_generic_wrapper.<locals>.wrapper)r   r   c                    s    t ur&t| k rt d  dt| d| d  }|d   }nd}t|||}|rBt|d trBtt||S ||| S )Nr   r   r   rZ   r   )	r   r   r   r   r   rQ   r   r   r   )r   r   r   r   option_argsrf   rY   r   rb   rZ   r[   r      s    rZ   )r   rY   rb   rf   r   rZ   r   r[   _make_generic_wrapper   s
   r   c                 C   s   ddl m} g }| D ]}||||j q
|D ]}||||j q|d urYt |}|j D ]}|j|j	|j
fv s?J |rH|j|j
d}|| q2||d|j
d d ||d|j
d d t |S )Nr   )	Parameter)rx   r   )defaultr   )r~   r   rt   POSITIONAL_ONLYVAR_POSITIONALr   r   r   rx   POSITIONAL_OR_KEYWORDKEYWORD_ONLYreplace	Signature)rX   var_arg_namesrb   r   r^   re   r   r   rZ   rZ   r[   _make_signature  s,   


r   c                 C   sj   t |}t|}|o|d d}|r| dg}ng }t| |||jd}t||||_t	|| ||S )N*)rf   )
r   r\   
startswithpoplstripr   rf   r   __signature__r   )re   rY   rb   rX   
has_varargr   r   rZ   rZ   r[   _wrap_function*  s   
r   c                  C   s   t  } t }ddd}| D ]1}|||}||}|jdkr"q|jdkr-|jdkr-q|| vs5J |t|| | |< | |< qdS )z
    Make global functions wrapping each compute function.

    Note that some of the automatically-generated wrappers may be overridden
    by custom versions below.
    and_or_)andorhash_aggregaterl   r   N)r   rH   rJ   rw   rI   rx   rf   r   )gregrewritescpp_namere   rY   rZ   rZ   r[   _make_global_functions:  s   

r   utf8_zero_fillc                 C   sh   |dup|du}|r|durt d|du r,tjj|}|du r't|}nt|}td| g||S )a  
    Cast array values to another data type. Can also be invoked as an array
    instance method.

    Parameters
    ----------
    arr : Array-like
    target_type : DataType or str
        Type to cast to
    safe : bool, default True
        Check for overflows or other unsafe conversions
    options : CastOptions, default None
        Additional checks pass by CastOptions
    memory_pool : MemoryPool, optional
        memory pool to use for allocations during function execution.

    Examples
    --------
    >>> from datetime import datetime
    >>> import pyarrow as pa
    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
    >>> arr.type
    TimestampType(timestamp[us])

    You can use ``pyarrow.DataType`` objects to specify the target type:

    >>> cast(arr, pa.timestamp('ms'))
    <pyarrow.lib.TimestampArray object at ...>
    [
      2010-01-01 00:00:00.000,
      2015-01-01 00:00:00.000
    ]

    >>> cast(arr, pa.timestamp('ms')).type
    TimestampType(timestamp[ms])

    Alternatively, it is also supported to use the string aliases for these
    types:

    >>> arr.cast('timestamp[ms]')
    <pyarrow.lib.TimestampArray object at ...>
    [
      2010-01-01 00:00:00.000,
      2015-01-01 00:00:00.000
    ]
    >>> arr.cast('timestamp[ms]').type
    TimestampType(timestamp[ms])

    Returns
    -------
    casted : Array
        The cast result as a new Array
    NzRMust either pass values for 'target_type' and 'safe' or pass a value for 'options'Fcast)	
ValueErrorpatypeslibensure_typer   unsafesaferG   )arrtarget_typer   r   r   safe_vars_passedrZ   rZ   r[   r   \  s   6
r   r   c                C   s   |dur|dur|  ||| } n|  |} n
|dur!|  d|} t|tjs0tj|| jd}n| j|jkr@tj| | jd}t|d}td| g||}|durd| dkrdtj| | t	 d}|S )a  
    Find the index of the first occurrence of a given value.

    Parameters
    ----------
    data : Array-like
    value : Scalar-like object
        The value to search for.
    start : int, optional
    end : int, optional
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    index : int
        the index, or -1 if not found

    Examples
    --------
    >>> import pyarrow as pa
    >>> import pyarrow.compute as pc
    >>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"])
    >>> pc.index(arr, "ipsum")
    <pyarrow.Int64Scalar: 1>
    >>> pc.index(arr, "ipsum", start=2)
    <pyarrow.Int64Scalar: 5>
    >>> pc.index(arr, "amet")
    <pyarrow.Int64Scalar: -1>
    Nr   ry   valueindex)
slicer   r   Scalarscalarry   as_pyr   rG   int64)datar   startendr   r   resultrZ   rZ   r[   r     s   
r   T)boundscheckr   c                C   s   t |d}td| |g||S )a  
    Select values (or records) from array- or table-like data given integer
    selection indices.

    The result will be of the same type(s) as the input, with elements taken
    from the input array (or record batch / table fields) at the given
    indices. If an index is null then the corresponding value in the output
    will be null.

    Parameters
    ----------
    data : Array, ChunkedArray, RecordBatch, or Table
    indices : Array, ChunkedArray
        Must be of integer type
    boundscheck : boolean, default True
        Whether to boundscheck the indices. If False and there is an out of
        bounds index, will likely cause the process to crash.
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    result : depends on inputs
        Selected values for the given indices

    Examples
    --------
    >>> import pyarrow as pa
    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
    >>> indices = pa.array([0, None, 4, 3])
    >>> arr.take(indices)
    <pyarrow.lib.StringArray object at ...>
    [
      "a",
      null,
      "e",
      null
    ]
    )r   take)r?   rG   )r   indicesr   r   r   rZ   rZ   r[   r     s   
(r   c                 C   sV   t |tjtjtjfstj|| jd}n| j|jkr$tj| | jd}td| |gS )ae  Replace each null element in values with a corresponding
    element from fill_value.

    If fill_value is scalar-like, then every null element in values
    will be replaced with fill_value. If fill_value is array-like,
    then the i-th element in values will be replaced with the i-th
    element in fill_value.

    The fill_value's type must be the same as that of values, or it
    must be able to be implicitly casted to the array's type.

    This is an alias for :func:`coalesce`.

    Parameters
    ----------
    values : Array, ChunkedArray, or Scalar-like object
        Each null element is replaced with the corresponding value
        from fill_value.
    fill_value : Array, ChunkedArray, or Scalar-like object
        If not same type as values, will attempt to cast.

    Returns
    -------
    result : depends on inputs
        Values with all null elements replaced

    Examples
    --------
    >>> import pyarrow as pa
    >>> arr = pa.array([1, 2, None, 3], type=pa.int8())
    >>> fill_value = pa.scalar(5, type=pa.int8())
    >>> arr.fill_null(fill_value)
    <pyarrow.lib.Int8Array object at ...>
    [
      1,
      2,
      5,
      3
    ]
    >>> arr = pa.array([1, 2, None, 4, None])
    >>> arr.fill_null(pa.array([10, 20, 30, 40, 50]))
    <pyarrow.lib.Int64Array object at ...>
    [
      1,
      2,
      30,
      4,
      50
    ]
    r   coalesce)	r   r   ArrayChunkedArrayr   r   ry   r   rG   )r   
fill_valuerZ   rZ   r[   	fill_null  s
   3r   c                C   R   |du rg }t | tjtjfr|d ntdd |}t||}td| g||S )a  
    Select the indices of the top-k ordered elements from array- or table-like
    data.

    This is a specialization for :func:`select_k_unstable`. Output is not
    guaranteed to be stable.

    Parameters
    ----------
    values : Array, ChunkedArray, RecordBatch, or Table
        Data to sort and get top indices from.
    k : int
        The number of `k` elements to keep.
    sort_keys : List-like
        Column key names to order by when input is table-like data.
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    result : Array
        Indices of the top-k ordered elements

    Examples
    --------
    >>> import pyarrow as pa
    >>> import pyarrow.compute as pc
    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
    >>> pc.top_k_unstable(arr, k=3)
    <pyarrow.lib.UInt64Array object at ...>
    [
      5,
      4,
      2
    ]
    N)dummy
descendingc                 S      | dfS )Nr   rZ   key_namerZ   rZ   r[   <lambda>d      z top_k_unstable.<locals>.<lambda>select_k_unstabler   r   r   r   rt   mapr5   rG   r   k	sort_keysr   r   rZ   rZ   r[   top_k_unstable:     %
r   c                C   r   )a  
    Select the indices of the bottom-k ordered elements from
    array- or table-like data.

    This is a specialization for :func:`select_k_unstable`. Output is not
    guaranteed to be stable.

    Parameters
    ----------
    values : Array, ChunkedArray, RecordBatch, or Table
        Data to sort and get bottom indices from.
    k : int
        The number of `k` elements to keep.
    sort_keys : List-like
        Column key names to order by when input is table-like data.
    memory_pool : MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.

    Returns
    -------
    result : Array of indices
        Indices of the bottom-k ordered elements

    Examples
    --------
    >>> import pyarrow as pa
    >>> import pyarrow.compute as pc
    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
    >>> pc.bottom_k_unstable(arr, k=3)
    <pyarrow.lib.UInt64Array object at ...>
    [
      0,
      1,
      2
    ]
    N)r   	ascendingc                 S   r   )Nr   rZ   r   rZ   rZ   r[   r     r   z#bottom_k_unstable.<locals>.<lambda>r   r   r   rZ   rZ   r[   bottom_k_unstablei  r   r   system)initializerr   r   c                C   s   t |d}tdg ||| dS )aB  
    Generate numbers in the range [0, 1).

    Generated values are uniformly-distributed, double-precision
    in range [0, 1). Algorithm and seed can be changed via RandomOptions.

    Parameters
    ----------
    n : int
        Number of values to generate, must be greater than or equal to 0
    initializer : int or str
        How to initialize the underlying random generator.
        If an integer is given, it is used as a seed.
        If "system" is given, the random generator is initialized with
        a system-specific source of (hopefully true) randomness.
        Other values are invalid.
    options : pyarrow.compute.RandomOptions, optional
        Alternative way of passing options.
    memory_pool : pyarrow.MemoryPool, optional
        If not passed, will allocate memory from the default memory pool.
    )r   random)length)r*   rG   )nr   r   r   rZ   rZ   r[   r     s   
r   c                  G   sl   t | }|dkr1t| d ttfrt| d S t| d tr&t| d S tdt	| d  t| S )a  Reference a column of the dataset.

    Stores only the field's name. Type and other information is known only when
    the expression is bound to a dataset having an explicit scheme.

    Nested references are allowed by passing multiple names or a tuple of
    names. For example ``('foo', 'bar')`` references the field named "bar"
    inside the field named "foo".

    Parameters
    ----------
    *name_or_index : string, multiple strings, tuple or int
        The name or index of the (possibly nested) field the expression
        references to.

    Returns
    -------
    field_expr : Expression
        Reference to the given field

    Examples
    --------
    >>> import pyarrow.compute as pc
    >>> pc.field("a")
    <pyarrow.compute.Expression a>
    >>> pc.field(1)
    <pyarrow.compute.Expression FieldPath(1)>
    >>> pc.field(("a", "b"))
    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
    >>> pc.field("a", "b")
    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
    rh   r   zCfield reference should be str, multiple str, tuple or integer, got )
r   r   strintrQ   _fieldtuple_nested_fieldr   ry   )name_or_indexr   rZ   rZ   r[   field  s   !

r  c                 C   s
   t | S )a  Expression representing a scalar value.

    Creates an Expression object representing a scalar value that can be used
    in compute expressions and predicates.

    Parameters
    ----------
    value : bool, int, float or string
        Python value of the scalar. This function accepts any value that can be
        converted to a ``pyarrow.Scalar`` using ``pa.scalar()``.

    Notes
    -----
    This function differs from ``pyarrow.scalar()`` in the following way:

    * ``pyarrow.scalar()`` creates a ``pyarrow.Scalar`` object that represents
      a single value in Arrow's memory model.
    * ``pyarrow.compute.scalar()`` creates an ``Expression`` object representing
      a scalar value that can be used in compute expressions, predicates, and
      dataset filtering operations.

    Returns
    -------
    scalar_expr : Expression
        An Expression representing the scalar value
    )rQ   _scalarr   rZ   rZ   r[   r     s   
r   )NNNN)NNrV   )rpyarrow._computer   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   collectionsrR   r~   textwraprS   r{   pyarrowr   rT   pyarrow.vendoredrU   r\   r]   rd   r   r   r   r   r   r   r   r   
utf8_zfillr   r   r   r   r   r   r   r   r  r   rZ   rZ   rZ   r[   <module>   s<   J W
S
E2,;//1