o
    <i2                    @   s  d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+m,Z,m-Z- d d	l.m/Z/m0Z0 d d
l1m2Z2 d dl3m4Z4m5Z5m6Z6m7Z7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZA d dlBmCZCmDZD edZEG dd de
eE eZFededeFeE deEfddZGeGHededeFeE deEfddZIeGHededeFeE deEfddZIeGHe'de'deFeE deEfddZIeGHededeFeE deEfddZIeGHe)de)deFeE deEfddZIeGHe"de"deFeE deEfddZIeGHe(de(deFeE deEfddZIde2ded eJdefd!d"ZKG d#d$ d$eFe ZLG d%d& d&eFeE eZMed'e"deFeE deEfd(d)ZNeNHed'edeMeE deEfd*dZIeNHed'edeMeE deEfd+dZIeNHed'edeMeE deEfd,dZIeNHed'edeMeE deEfd-dZIeNHed'edeMeE deEfd.dZIeNHe d'e deMeE deEfd/dZIeNHed'edeMeE deEfd0dZIeNHed'edeMeE deEfd1dZIeNHed'edeMeE deEfd2dZIeNHed'edeMeE deEfd3dZIeNHed'edeMeE deEfd4dZIeNHed'edeMeE deEfd5dZIeNHe$d'e$deMeE deEfd6dZIeNHe!d'e!deMeE deEfd7dZId'edefd8d9ZOG d:d; d;eFe ZPde2d<ed eJdee8geJf fd=d>ZQG d?d@ d@eMeJ ZRdAZSdAZTdBZUdBZVdCZWdDe<dEeXde	fdFdGZYG dHdI dIeMeJ ZZ	Ad|dJe0de2dKed eJdee,geJf f
dLdMZ[G dNdO dOeFe eZ\G dPdQ dQe\Z]	Ad|de2dRe0d eJdeegef fdSdTZ^G dUdV dVeFe Z_dAe4fd'edWe2d eJdXe`eae	f def
dYdZZbG d[d\ d\eFecea  Zdd'edecea fd]d^ZeG d_d` d`eFefedaf  Zgd'edefedaf fdbdcZhG ddde deeMeiefejeje	f   Zk	Bd}dfefedaf dgeJdeieiefejeje	f   fdhdiZlG djdk dkeMeJ eZmG dldm dmemZn	Ad|de2dRe0d eJdeegef fdndoZoG dpdq dqe\ZpG drds dsemZqG dtdu dueMe eZrG dvdw dwerZsG dxdy dyesZtdRe0d'ed eJde2desf
dzd{ZudS )~    N)ABCabstractmethod)Callable)singledispatch)AnyGenericSupportsFloatTypeVar)
from_bytes)AlwaysFalse
AlwaysTrueAndBooleanExpressionBoundEqualToBoundGreaterThanBoundGreaterThanOrEqualBoundIn
BoundIsNaNBoundIsNullBoundLessThanBoundLessThanOrEqualBoundLiteralPredicateBoundNotEqualTo
BoundNotInBoundNotNaNBoundNotNullBoundNotStartsWithBoundPredicateBoundSetPredicateBoundStartsWith	BoundTermBoundUnaryPredicateNotOrUnboundPredicate)DataFileManifestFilePartitionFieldSummary)UNPARTITIONED_PARTITION_SPECPartitionSpec)Schema)
EMPTY_DICTLLiteralValueRecordStructProtocol)
DoubleType	FloatTypeIcebergTypeNestedFieldPrimitiveType
StructTypeTimestampTypeTimestamptzType)micros_to_timestampmicros_to_timestamptzTc                   @   s   e Zd ZedefddZedefddZededefddZed	ed
edefddZed	ed
edefddZ	ede
defddZededefddZdS )BooleanExpressionVisitorreturnc                 C      dS )zVisit method for an AlwaysTrue boolean expression.

        Note: This visit method has no arguments since AlwaysTrue instances have no context.
        N selfr>   r>   [/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/pyiceberg/expressions/visitors.py
visit_trueM       z#BooleanExpressionVisitor.visit_truec                 C   r=   )zVisit method for an AlwaysFalse boolean expression.

        Note: This visit method has no arguments since AlwaysFalse instances have no context.
        Nr>   r?   r>   r>   rA   visit_falseT   rC   z$BooleanExpressionVisitor.visit_falsechild_resultc                 C   r=   )zVisit method for a Not boolean expression.

        Args:
            child_result (T): The result of visiting the child of the Not boolean expression.
        Nr>   r@   rE   r>   r>   rA   	visit_not[   rC   z"BooleanExpressionVisitor.visit_notleft_resultright_resultc                 C   r=   )zVisit method for an And boolean expression.

        Args:
            left_result (T): The result of visiting the left side of the expression.
            right_result (T): The result of visiting the right side of the expression.
        Nr>   r@   rH   rI   r>   r>   rA   	visit_andc   rC   z"BooleanExpressionVisitor.visit_andc                 C   r=   )zVisit method for an Or boolean expression.

        Args:
            left_result (T): The result of visiting the left side of the expression.
            right_result (T): The result of visiting the right side of the expression.
        Nr>   rJ   r>   r>   rA   visit_orl   rC   z!BooleanExpressionVisitor.visit_or	predicatec                 C   r=   )zVisit method for an unbound predicate in an expression tree.

        Args:
            predicate (UnboundPredicate): An instance of an UnboundPredicate.
        Nr>   r@   rM   r>   r>   rA   visit_unbound_predicateu   rC   z0BooleanExpressionVisitor.visit_unbound_predicatec                 C   r=   )zVisit method for a bound predicate in an expression tree.

        Args:
            predicate (BoundPredicate): An instance of a BoundPredicate.
        Nr>   rN   r>   r>   rA   visit_bound_predicate}   rC   z.BooleanExpressionVisitor.visit_bound_predicateN)__name__
__module____qualname__r   r:   rB   rD   rG   rK   rL   r$   rO   r   rP   r>   r>   r>   rA   r;   L   s    r;   objvisitorr<   c                 C      t d|  )a  Apply a boolean expression visitor to any point within an expression.

    The function traverses the expression in post-order fashion.

    Args:
        obj (BooleanExpression): An instance of a BooleanExpression.
        visitor (BooleanExpressionVisitor[T]): An instance of an implementation of the generic
            BooleanExpressionVisitor base class.

    Raises:
        NotImplementedError: If attempting to visit an unsupported expression.
    z%Cannot visit unsupported expression: )NotImplementedErrorrT   rU   r>   r>   rA   visit   s   rY   _c                 C      |  S )zPVisit an AlwaysTrue boolean expression with a concrete BooleanExpressionVisitor.)rB   rZ   rU   r>   r>   rA   rZ         c                 C   r[   )zQVisit an AlwaysFalse boolean expression with a concrete BooleanExpressionVisitor.)rD   r\   r>   r>   rA   rZ      r]   c                 C   s   t | j|d}|j|dS )zHVisit a Not boolean expression with a concrete BooleanExpressionVisitor.rU   )rE   )rY   childrG   )rT   rU   rE   r>   r>   rA   rZ      s   c                 C   *   t | j|d}t | j|d}|j||dS )zIVisit an And boolean expression with a concrete BooleanExpressionVisitor.r^   rH   rI   )rY   leftrightrK   rT   rU   rH   rI   r>   r>   rA   rZ         c                 C      |j | dS )zMVisit an unbound boolean expression with a concrete BooleanExpressionVisitor.rM   )rO   rX   r>   r>   rA   rZ         c                 C   rf   )zJVisit a bound boolean expression with a concrete BooleanExpressionVisitor.rg   rP   rX   r>   r>   rA   rZ      rh   c                 C   r`   )zHVisit an Or boolean expression with a concrete BooleanExpressionVisitor.r^   ra   )rY   rb   rc   rL   rd   r>   r>   rA   rZ      re   schema
expressioncase_sensitivec                 C   s   t |t| |S )a  Travers over an expression to bind the predicates to the schema.

    Args:
      schema (Schema): A schema to use when binding the expression.
      expression (BooleanExpression): An expression containing UnboundPredicates that can be bound.
      case_sensitive (bool): Whether to consider case when binding a reference to a field in a schema, defaults to True.

    Raises:
        TypeError: In the case a predicate is already bound.
    )rY   BindVisitor)rj   rk   rl   r>   r>   rA   bind   s   rn   c                   @   s   e Zd ZU dZeed< eed< dededdfddZdefdd	Z	defd
dZ
dedefddZdededefddZdededefddZdedefddZdedefddZdS )rm   a  Rewrites a boolean expression by replacing unbound references with references to fields in a struct schema.

    Args:
      schema (Schema): A schema to use when binding the expression.
      case_sensitive (bool): Whether to consider case when binding a reference to a field in a schema, defaults to True.

    Raises:
        TypeError: In the case a predicate is already bound.
    rj   rl   r<   Nc                 C   s   || _ || _d S N)rj   rl   )r@   rj   rl   r>   r>   rA   __init__   s   
zBindVisitor.__init__c                 C      t  S ro   r   r?   r>   r>   rA   rB         zBindVisitor.visit_truec                 C   rq   ro   r   r?   r>   r>   rA   rD      rs   zBindVisitor.visit_falserE   c                 C   
   t |dS N)r_   r"   rF   r>   r>   rA   rG         
zBindVisitor.visit_notrH   rI   c                 C      t ||dS N)rb   rc   r   rJ   r>   r>   rA   rK         zBindVisitor.visit_andc                 C   ry   rz   r#   rJ   r>   r>   rA   rL      r|   zBindVisitor.visit_orrM   c                 C      |j | j| jdS )Nrl   )rn   rj   rl   rN   r>   r>   rA   rO         z#BindVisitor.visit_unbound_predicatec                 C      t d| )NzFound already bound predicate: 	TypeErrorrN   r>   r>   rA   rP         z!BindVisitor.visit_bound_predicate)rQ   rR   rS   __doc__r*   __annotations__boolrp   r   rB   rD   rG   rK   rL   r$   rO   r   rP   r>   r>   r>   rA   rm      s   
 
rm   c                   @   s  e Zd Zededee defddZededee defddZ	ededefdd	Z
ededefd
dZededefddZededefddZedededefddZedededefddZedededefddZedededefddZedededefddZedededefddZedefddZedefdd Zed!edefd"d#Zed$ed%edefd&d'Zed$ed%edefd(d)Zedededefd*d+Zedededefd,d-Zd.edefd/d0Zd.edefd1d2Zd3S )4BoundBooleanExpressionVisitortermliteralsr<   c                 C   r=   )zVisit a bound In predicate.Nr>   r@   r   r   r>   r>   rA   visit_in   rC   z&BoundBooleanExpressionVisitor.visit_inc                 C   r=   )zVisit a bound NotIn predicate.Nr>   r   r>   r>   rA   visit_not_in  rC   z*BoundBooleanExpressionVisitor.visit_not_inc                 C   r=   )zVisit a bound IsNan predicate.Nr>   r@   r   r>   r>   rA   visit_is_nan  rC   z*BoundBooleanExpressionVisitor.visit_is_nanc                 C   r=   )zVisit a bound NotNan predicate.Nr>   r   r>   r>   rA   visit_not_nan	  rC   z+BoundBooleanExpressionVisitor.visit_not_nanc                 C   r=   )zVisit a bound IsNull predicate.Nr>   r   r>   r>   rA   visit_is_null  rC   z+BoundBooleanExpressionVisitor.visit_is_nullc                 C   r=   )z Visit a bound NotNull predicate.Nr>   r   r>   r>   rA   visit_not_null  rC   z,BoundBooleanExpressionVisitor.visit_not_nullliteralc                 C   r=   )zVisit a bound Equal predicate.Nr>   r@   r   r   r>   r>   rA   visit_equal  rC   z)BoundBooleanExpressionVisitor.visit_equalc                 C   r=   )z!Visit a bound NotEqual predicate.Nr>   r   r>   r>   rA   visit_not_equal  rC   z-BoundBooleanExpressionVisitor.visit_not_equalc                 C   r=   )+Visit a bound GreaterThanOrEqual predicate.Nr>   r   r>   r>   rA   visit_greater_than_or_equal  rC   z9BoundBooleanExpressionVisitor.visit_greater_than_or_equalc                 C   r=   )z$Visit a bound GreaterThan predicate.Nr>   r   r>   r>   rA   visit_greater_than!  rC   z0BoundBooleanExpressionVisitor.visit_greater_thanc                 C   r=   )z!Visit a bound LessThan predicate.Nr>   r   r>   r>   rA   visit_less_than%  rC   z-BoundBooleanExpressionVisitor.visit_less_thanc                 C   r=   )z(Visit a bound LessThanOrEqual predicate.Nr>   r   r>   r>   rA   visit_less_than_or_equal)  rC   z6BoundBooleanExpressionVisitor.visit_less_than_or_equalc                 C   r=   )zVisit a bound True predicate.Nr>   r?   r>   r>   rA   rB   -  rC   z(BoundBooleanExpressionVisitor.visit_truec                 C   r=   )zVisit a bound False predicate.Nr>   r?   r>   r>   rA   rD   1  rC   z)BoundBooleanExpressionVisitor.visit_falserE   c                 C   r=   )zVisit a bound Not predicate.Nr>   rF   r>   r>   rA   rG   5  rC   z'BoundBooleanExpressionVisitor.visit_notrH   rI   c                 C   r=   )zVisit a bound And predicate.Nr>   rJ   r>   r>   rA   rK   9  rC   z'BoundBooleanExpressionVisitor.visit_andc                 C   r=   )zVisit a bound Or predicate.Nr>   rJ   r>   r>   rA   rL   =  rC   z&BoundBooleanExpressionVisitor.visit_orc                 C   r=   )z!Visit bound StartsWith predicate.Nr>   r   r>   r>   rA   visit_starts_withA  rC   z/BoundBooleanExpressionVisitor.visit_starts_withc                 C   r=   )z$Visit bound NotStartsWith predicate.Nr>   r   r>   r>   rA   visit_not_starts_withE  rC   z3BoundBooleanExpressionVisitor.visit_not_starts_withrM   c                 C   r   )zVisit an unbound predicate.

        Args:
            predicate (UnboundPredicate): An unbound predicate.
        Raises:
            TypeError: This always raises since an unbound predicate is not expected in a bound boolean expression.
        zNot a bound predicate: r   rN   r>   r>   rA   rO   I  s   z5BoundBooleanExpressionVisitor.visit_unbound_predicatec                 C   s
   t || S )zkVisit a bound predicate.

        Args:
            predicate (BoundPredicate): A bound predicate.
        ri   rN   r>   r>   rA   rP   S  s   
z3BoundBooleanExpressionVisitor.visit_bound_predicateN) rQ   rR   rS   r   r    setr,   r:   r   r   r   r   r   r   r-   r   r   r   r   r   r   rB   rD   rG   rK   rL   r   r   r$   rO   r   rP   r>   r>   r>   rA   r      sR    
r   exprc                 C   rV   )NzUnknown predicate: r   )r   rZ   r>   r>   rA   rP   \     rP   c                 C   r~   N)r   r   )r   r   	value_setr   rU   r>   r>   rA   rZ   a     c                 C   r~   r   )r   r   r   r   r>   r>   rA   rZ   f  r   c                 C      |j | jdS N)r   )r   r   r   r>   r>   rA   rZ   k  r   c                 C   r   r   )r   r   r   r>   r>   rA   rZ   p  r   c                 C   r   r   )r   r   r   r>   r>   rA   rZ   u  r   c                 C   r   r   )r   r   r   r>   r>   rA   rZ   z  r   c                 C   r~   Nr   r   )r   r   r   r   r>   r>   rA   rZ     r   c                 C   r~   r   )r   r   r   r   r>   r>   rA   rZ     r   c                 C   r~   )r   r   )r   r   r   r   r>   r>   rA   rZ        c                 C   r~   r   )r   r   r   r   r>   r>   rA   rZ     r   c                 C   r~   r   )r   r   r   r   r>   r>   rA   rZ     r   c                 C   r~   r   )r   r   r   r   r>   r>   rA   rZ     r   c                 C   r~   r   )r   r   r   r   r>   r>   rA   rZ     r   c                 C   r~   r   )r   r   r   r   r>   r>   rA   rZ     r   c                 C      t | t S ro   )rY   _RewriteNotVisitorr   r>   r>   rA   rewrite_not  r|   r   c                   @   s   e Zd ZdZdefddZdefddZdedefdd	Zd
ededefddZd
ededefddZ	de
defddZdedefddZdS )r   zInverts the negations.r<   c                 C   rq   ro   rr   r?   r>   r>   rA   rB     rs   z_RewriteNotVisitor.visit_truec                 C   rq   ro   rt   r?   r>   r>   rA   rD     rs   z_RewriteNotVisitor.visit_falserE   c                 C   s   | S ro   r>   rF   r>   r>   rA   rG     rs   z_RewriteNotVisitor.visit_notrH   rI   c                 C   ry   rz   r{   rJ   r>   r>   rA   rK     r|   z_RewriteNotVisitor.visit_andc                 C   ry   rz   r}   rJ   r>   r>   rA   rL     r|   z_RewriteNotVisitor.visit_orrM   c                 C      |S ro   r>   rN   r>   r>   rA   rO        z*_RewriteNotVisitor.visit_unbound_predicatec                 C   r   ro   r>   rN   r>   r>   rA   rP     r   z(_RewriteNotVisitor.visit_bound_predicateN)rQ   rR   rS   r   r   rB   rD   rG   rK   rL   r$   rO   r   rP   r>   r>   r>   rA   r     s    r   unboundc                 C      t | ||jS ro   )_ExpressionEvaluatoreval)rj   r   rl   r>   r>   rA   expression_evaluator  r   r   c                   @   s  e Zd ZU eed< eed< dededefddZdedefd	d
Z	de
dee defddZde
dee defddZde
defddZde
defddZde
defddZde
defddZde
dedefddZde
dedefddZde
dedefddZde
dedefd d!Zde
dedefd"d#Zde
dedefd$d%Zde
dedefd&d'Zde
dedefd(d)Zdefd*d+Zdefd,d-Zd.edefd/d0Zd1ed2edefd3d4Zd1ed2edefd5d6Z d7S )8r   boundstructrj   r   rl   c                 C   s   t |||| _d S ro   )rn   r   )r@   rj   r   rl   r>   r>   rA   rp     r   z_ExpressionEvaluator.__init__r<   c                 C   s   || _ t| j| S ro   )r   rY   r   )r@   r   r>   r>   rA   r     s   z_ExpressionEvaluator.evalr   r   c                 C   s   | | j|v S ro   r   r   r   r>   r>   rA   r        z_ExpressionEvaluator.visit_inc                 C   s   | | j|vS ro   r   r   r>   r>   rA   r     r   z!_ExpressionEvaluator.visit_not_inc                 C   s   | | j}||kS ro   r   r@   r   valr>   r>   rA   r        z!_ExpressionEvaluator.visit_is_nanc                 C   s   | | j}||kS ro   r   r   r>   r>   rA   r     r   z"_ExpressionEvaluator.visit_not_nanc                 C   s   | | jd u S ro   r   r   r>   r>   rA   r     r   z"_ExpressionEvaluator.visit_is_nullc                 C   s   | | jd uS ro   r   r   r>   r>   rA   r     r   z#_ExpressionEvaluator.visit_not_nullr   c                 C   s   | | j|jkS ro   r   r   valuer   r>   r>   rA   r     r   z _ExpressionEvaluator.visit_equalc                 C   s   | | j|jkS ro   r   r   r>   r>   rA   r     r   z$_ExpressionEvaluator.visit_not_equalc                 C   s   | | j}|d uo||jkS ro   r   r@   r   r   r   r>   r>   rA   r        z0_ExpressionEvaluator.visit_greater_than_or_equalc                 C   s   | | j}|d uo||jkS ro   r   r   r>   r>   rA   r     r   z'_ExpressionEvaluator.visit_greater_thanc                 C   s   | | j}|d uo||jk S ro   r   r   r>   r>   rA   r     r   z$_ExpressionEvaluator.visit_less_thanc                 C   s   | | j}|d uo||jkS ro   r   r   r>   r>   rA   r     r   z-_ExpressionEvaluator.visit_less_than_or_equalc                 C   s(   | | j}|d uot|t|jS ro   )r   r   str
startswithr   r@   r   r   eval_resr>   r>   rA   r     s   z&_ExpressionEvaluator.visit_starts_withc                 C   s   |  || S ro   )r   r   r>   r>   rA   r     r   z*_ExpressionEvaluator.visit_not_starts_withc                 C   r=   )NTr>   r?   r>   r>   rA   rB     r   z_ExpressionEvaluator.visit_truec                 C   r=   NFr>   r?   r>   r>   rA   rD     r   z _ExpressionEvaluator.visit_falserE   c                 C      | S ro   r>   rF   r>   r>   rA   rG     rs   z_ExpressionEvaluator.visit_notrH   rI   c                 C      |o|S ro   r>   rJ   r>   r>   rA   rK        z_ExpressionEvaluator.visit_andc                 C      |p|S ro   r>   rJ   r>   r>   rA   rL     r   z_ExpressionEvaluator.visit_orN)!rQ   rR   rS   r   r   r/   r*   r   rp   r   r    r   r,   r   r   r   r   r   r   r-   r   r   r   r   r   r   r   r   rB   rD   rG   rK   rL   r>   r>   r>   rA   r     s0   
 r   TF   
field_typer   c                 C   s&   t | tstdt|  t| |S )NzExpected a PrimitiveType, got: )
isinstancer4   
ValueErrortyper
   )r   r   r>   r>   rA   _from_byte_buffer  s   

r   c                   @   s  e Zd ZU ee ed< eed< dedededdfddZ	d	e
defd
dZdedee defddZdedee defddZdedefddZdedefddZdedefddZdedefddZdededefddZdededefddZdededefdd Zdededefd!d"Zdededefd#d$Zdededefd%d&Zdededefd'd(Zdededefd)d*Zdefd+d,Zdefd-d.Zd/edefd0d1Z d2ed3edefd4d5Z!d2ed3edefd6d7Z"dS )8_ManifestEvalVisitorpartition_fieldspartition_filterpartition_struct_schemarl   r<   Nc                 C   s   t |t||| _d S ro   )rn   r   r   )r@   r   r   rl   r>   r>   rA   rp   &  s   z_ManifestEvalVisitor.__init__manifestc                 C   s    |j  }r|| _t| j| S tS ro   )
partitionsr   rY   r   ROWS_MIGHT_MATCH)r@   r   r   r>   r>   rA   r   )  s   
z_ManifestEvalVisitor.evalr   r   c                    s   |  jj}| j| }|jd u rtS t|tkrtS t	|  j
j|j t fdd|D r1tS |jd urMt	|  j
j|jtfdd|D rMtS tS )Nc                 3   s    | ]} |kV  qd S ro   r>   .0r   lowerr>   rA   	<genexpr>=      z0_ManifestEvalVisitor.visit_in.<locals>.<genexpr>c                 3   s    | ]} |k V  qd S ro   r>   r   upperr>   rA   r   B  r   )refaccessorpositionr   lower_boundROWS_CANNOT_MATCHlenIN_PREDICATE_LIMITr   r   fieldr   allupper_bound)r@   r   r   posr   r>   r   r   rA   r   1  s   


z_ManifestEvalVisitor.visit_inc                 C      t S ro   r   r   r>   r>   rA   r   G     z!_ManifestEvalVisitor.visit_not_inc                 C   s(   |  jj}| j| }|jdu rtS tS r   )r   r   r   r   contains_nanr   r   r@   r   r   r   r>   r>   rA   r   L  s
   

z!_ManifestEvalVisitor.visit_is_nanc                 C   s<   |  jj}| j| }|jdu r|jdu r|jd u rtS tS NTF)	r   r   r   r   r   contains_nullr   r   r   r   r>   r>   rA   r   U  s
   
z"_ManifestEvalVisitor.visit_not_nanc                 C   s$   |  jj}| j| jdu rtS tS r   )r   r   r   r   r   r   r   )r@   r   r   r>   r>   rA   r   ^     z"_ManifestEvalVisitor.visit_is_nullc                 C   sb   |  jj}| j| jdu o| j| jd u }|r+t|  jjt	t
fr+| j| jdu }|r/tS tS r   )r   r   r   r   r   r   r   r   r   r0   r1   r   r   r   )r@   r   r   all_nullr>   r>   rA   r   f  s    z#_ManifestEvalVisitor.visit_not_nullr   c                 C   sv   |  jj}| j| }|jd u s|jd u rtS t|  jj	|j}||j
kr(tS t|  jj	|j}|j
|kr9tS tS ro   )r   r   r   r   r   r   r   r   r   r   r   r   )r@   r   r   r   r   r   r   r>   r>   rA   r   w  s   


z _ManifestEvalVisitor.visit_equalc                 C   r   ro   r   r   r>   r>   rA   r     r   z$_ManifestEvalVisitor.visit_not_equalc                 C   sJ   |  jj}| j| }|jd u rtS t|  jj|j}|j	|kr#tS t
S ro   r   r   r   r   r   r   r   r   r   r   r   r@   r   r   r   r   r   r>   r>   rA   r        


z0_ManifestEvalVisitor.visit_greater_than_or_equalc                 C   sJ   |  jj}| j| }|jd u rtS t|  jj|j}|j	|kr#tS t
S ro   r   r   r>   r>   rA   r     r   z'_ManifestEvalVisitor.visit_greater_thanc                 C   sJ   |  jj}| j| }|jd u rtS t|  jj|j}|j	|kr#tS t
S ro   r   r   r   r   r   r   r   r   r   r   r   r@   r   r   r   r   r   r>   r>   rA   r     r   z$_ManifestEvalVisitor.visit_less_thanc                 C   sJ   |  jj}| j| }|jd u rtS t|  jj|j}|j	|k r#tS t
S ro   r   r   r>   r>   rA   r     r   z-_ManifestEvalVisitor.visit_less_than_or_equalc           	      C   s   |  jj}| j| }t|j}t|}|jd u rtS t	|  j
j|j}|d ur3|d | |kr3tS |jd u r:tS t	|  j
j|j}|d urR|d | |k rRtS tS ro   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   	r@   r   r   r   r   prefix
len_prefixr   r   r>   r>   rA   r     s   



z&_ManifestEvalVisitor.visit_starts_withc           	      C   s   |  jj}| j| }t|j}t|}|js!|jd u s!|j	d u r#t
S t|  jj|j}t|  jj|j	}|d ura|d urat||k rGt
S |d | |krat||k rWt
S |d | |kratS t
S ro   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r>   rA   r     s"   

z*_ManifestEvalVisitor.visit_not_starts_withc                 C   r   ro   r   r?   r>   r>   rA   rB     r   z_ManifestEvalVisitor.visit_truec                 C   r   ro   r   r?   r>   r>   rA   rD      r   z _ManifestEvalVisitor.visit_falserE   c                 C   r   ro   r>   rF   r>   r>   rA   rG     rs   z_ManifestEvalVisitor.visit_notrH   rI   c                 C   r   ro   r>   rJ   r>   r>   rA   rK     r   z_ManifestEvalVisitor.visit_andc                 C   r   ro   r>   rJ   r>   r>   rA   rL   	  r   z_ManifestEvalVisitor.visit_or)#rQ   rR   rS   listr'   r   r   r*   r   rp   r&   r   r    r   r,   r   r   r   r   r   r   r-   r   r   r   r   r   r   r   r   rB   rD   rG   rK   rL   r>   r>   r>   rA   r   "  s0   
 		r   partition_specr   c                 C   s&   |  |}t|j }t|||}|jS ro   )partition_typer*   fieldsr   r   )r   rj   r   rl   r   partition_schema	evaluatorr>   r>   rA   manifest_evaluator  s   

r  c                   @   s   e Zd ZU eed< eed< eed< dededefddZdedefdd	Z	defd
dZ
defddZdedefddZdededefddZdededefddZdedefddZdS )ProjectionEvaluatorrj   specrl   c                 C      || _ || _|| _d S ro   rj   r  rl   )r@   rj   r  rl   r>   r>   rA   rp        
zProjectionEvaluator.__init__r   r<   c                 C   s   t t| jt|| j| S ro   )rY   rn   rj   r   rl   )r@   r   r>   r>   rA   project   s   zProjectionEvaluator.projectc                 C   rq   ro   rr   r?   r>   r>   rA   rB   (  rs   zProjectionEvaluator.visit_truec                 C   rq   ro   rt   r?   r>   r>   rA   rD   +  rs   zProjectionEvaluator.visit_falserE   c                 C   r   )Nz4Cannot project not expression, should be rewritten: r   rF   r>   r>   rA   rG   .  r   zProjectionEvaluator.visit_notrH   rI   c                 C   
   t ||S ro   r{   rJ   r>   r>   rA   rK   1  rx   zProjectionEvaluator.visit_andc                 C   r
  ro   r}   rJ   r>   r>   rA   rL   4  rx   zProjectionEvaluator.visit_orrM   c                 C   r   )Nz"Cannot project unbound predicate: r	  rN   r>   r>   rA   rO   7  r   z+ProjectionEvaluator.visit_unbound_predicateN)rQ   rR   rS   r*   r   r)   r   rp   r   r  rB   rD   rG   rK   rL   r$   rO   r>   r>   r>   rA   r    s   
 r  c                   @      e Zd ZdedefddZdS )InclusiveProjectionrM   r<   c                 C   N   | j |j jj}t }|D ]}|jj|j	|d}|d ur$t
||}q|S N)namepred)r  fields_by_source_idr   r   r   field_idr   	transformr  r  r   )r@   rM   partsresultpartincl_projectionr>   r>   rA   rP   <  s   
z)InclusiveProjection.visit_bound_predicateNrQ   rR   rS   r   r   rP   r>   r>   r>   rA   r  ;      r  r  c                 C   r   ro   )r  r  r  r>   r>   rA   inclusive_projectionO     r  c                	   @   s   e Zd ZU dZeed< eed< eee	f ed< e
fdededeee	f ddfddZdefd	d
ZdefddZdedefddZdededefddZdededefddZdedefddZdedefddZdS )_ColumnNameTranslatora  Converts the column names with the ones in the actual file.

    Args:
      file_schema (Schema): The schema of the file.
      case_sensitive (bool): Whether to consider case when binding a reference to a field in a schema, defaults to True.
      projected_field_values (Dict[int, Any]): Values for projected fields not present in the data file.

    Raises:
        TypeError: In the case of an UnboundPredicate.
        ValueError: When a column name cannot be found.
    file_schemarl   projected_field_valuesr<   Nc                 C   r  ro   )r  rl   r  )r@   r  rl   r  r>   r>   rA   rp   f  r  z_ColumnNameTranslator.__init__c                 C   rq   ro   rr   r?   r>   r>   rA   rB   k  rs   z _ColumnNameTranslator.visit_truec                 C   rq   ro   rt   r?   r>   r>   rA   rD   n  rs   z!_ColumnNameTranslator.visit_falserE   c                 C   ru   rv   rw   rF   r>   r>   rA   rG   q  rx   z_ColumnNameTranslator.visit_notrH   rI   c                 C   ry   rz   r{   rJ   r>   r>   rA   rK   t  r|   z_ColumnNameTranslator.visit_andc                 C   ry   rz   r}   rJ   r>   r>   rA   rL   w  r|   z_ColumnNameTranslator.visit_orrM   c                 C   s   t d|j )NzExpected Bound Predicate, got: )r   r   rN   r>   r>   rA   rO   z  r   z-_ColumnNameTranslator.visit_unbound_predicatec                 C   s  |j  j}|j}| j|}|d u rdt|tr||j	}n#t|t
r-||j	|j}nt|tr;||j	|j}ntd| |j| jv rM| j| n|j}tt||| jdt|rat S t S t|trn||S t|t
rz|||jS t|tr|||jS td| )NzUnsupported predicate: r   )r   r   r   r  r  find_column_namer   r!   
as_unboundr  r   r   r   r   r   r  initial_defaultr   r*   rl   r.   r   r   )r@   rM   r   r  file_column_namer  field_valuer>   r>   rA   rP   }  s2   






z+_ColumnNameTranslator.visit_bound_predicate)rQ   rR   rS   r   r*   r   r   dictintr   r+   rp   r   rB   rD   rG   rK   rL   r$   rO   r   rP   r>   r>   r>   rA   r  U  s   
 &r  r  r  c                 C   s   t | t|||S ro   )rY   r  )r   r  rl   r  r>   r>   rA   translate_column_names  r   r&  c                   @   s   e Zd ZdZdee fddZdee fddZdee dee fdd	Zd
ee dee dee fddZ	d
ee dee dee fddZ
dedee fddZdedee fddZdS )_ExpressionFieldIDsz5Extracts the field IDs used in the BooleanExpression.r<   c                 C   rq   ro   r   r?   r>   r>   rA   rB     rs   z_ExpressionFieldIDs.visit_truec                 C   rq   ro   r(  r?   r>   r>   rA   rD     rs   z_ExpressionFieldIDs.visit_falserE   c                 C   r   ro   r>   rF   r>   r>   rA   rG     r   z_ExpressionFieldIDs.visit_notrH   rI   c                 C   
   | |S ro   unionrJ   r>   r>   rA   rK     rx   z_ExpressionFieldIDs.visit_andc                 C   r)  ro   r*  rJ   r>   r>   rA   rL     rx   z_ExpressionFieldIDs.visit_orrM   c                 C      t d)NzOnly works on bound recordsr	  rN   r>   r>   rA   rO     r   z+_ExpressionFieldIDs.visit_unbound_predicatec                 C   s   |j  jjhS ro   )r   r   r   r  rN   r>   r>   rA   rP     r   z)_ExpressionFieldIDs.visit_bound_predicateN)rQ   rR   rS   r   r   r%  rB   rD   rG   rK   rL   r$   rO   r   rP   r>   r>   r>   rA   r'    s    ""r'  c                 C   r   ro   )rY   r'  r   r>   r>   rA   extract_field_ids  r|   r-  c                   @   s   e Zd Zdeedf fddZdeedf fddZdeedf deedf fdd	Zd
eedf deedf deedf fddZd
eedf deedf deedf fddZ	de
deedf fddZdedeedf fddZdS )_RewriteToDNFr<   .c                 C      t  fS ro   rr   r?   r>   r>   rA   rB     r   z_RewriteToDNF.visit_truec                 C   r/  ro   rt   r?   r>   r>   rA   rD     r   z_RewriteToDNF.visit_falserE   c                 C   r   )Nz!Not expressions are not allowed: r	  rF   r>   r>   rA   rG     r   z_RewriteToDNF.visit_notrH   rI   c                    s   t  fdd|D S )Nc                 3   s$    | ]} D ]}t ||V  qqd S ro   r{   )r   lererI   r>   rA   r     s   " z*_RewriteToDNF.visit_and.<locals>.<genexpr>)tuplerJ   r>   r2  rA   rK     s   z_RewriteToDNF.visit_andc                 C      || S ro   r>   rJ   r>   r>   rA   rL     r]   z_RewriteToDNF.visit_orrM   c                 C      |fS ro   r>   rN   r>   r>   rA   rO     rs   z%_RewriteToDNF.visit_unbound_predicatec                 C   r5  ro   r>   rN   r>   r>   rA   rP     rs   z#_RewriteToDNF.visit_bound_predicateN)rQ   rR   rS   r3  r   rB   rD   rG   rK   rL   r$   rO   r   rP   r>   r>   r>   rA   r.    s(    "



	



r.  .c                 C   s   t | }t|t S ro   )r   rY   r.  )r   expr_without_notr>   r>   rA   rewrite_to_dnf  s   r7  c                
   @   s&  e Zd ZU eed< d6deddfddZdedeee B deee B fd	d
Z	de
dee deeeeef  fddZde
dee deeeeef  fddZde
deeeeef  fddZde
deeeeef  fddZde
deeeeef  fddZde
deeeeef  fddZde
dedeeeeef  fddZde
dedeeeeef  fddZde
dedeeeeef  fddZde
dedeeeeef  fdd Zde
dedeeeeef  fd!d"Zde
dedeeeeef  fd#d$Zde
dedeeeeef  fd%d&Zde
dedeeeeef  fd'd(Zdeeeeef  fd)d*Zdeeeeef  fd+d,Zd-eeeeef  deeeeef  fd.d/Z d0eeeeef  d1eeeeef  deeeeef  fd2d3Z!d0eeeeef  d1eeeeef  deeeeef  fd4d5Z"dS )7ExpressionToPlainFormatcast_int_to_dateFr<   Nc                 C   s
   || _ d S ro   )r9  )r@   r9  r>   r>   rA   rp     rx   z ExpressionToPlainFormat.__init__iceberg_typer   c                    sR   | j r't|}tttti}||v r'||  t|tr# fdd|D S  |S |S )Nc                    s   h | ]} |qS r>   r>   r   litconversion_functionr>   rA   	<setcomp>  s    z=ExpressionToPlainFormat._cast_if_necessary.<locals>.<setcomp>)r9  r   r6   r8   r7   r9   r   r   )r@   r:  r   iceberg_type_classconversionsr>   r=  rA   _cast_if_necessary  s   
z*ExpressionToPlainFormat._cast_if_necessaryr   r   c                 C   s(   |  j}|  jjd| |j|fgS )Ninr   r   r  rB  r   r@   r   r   r   r>   r>   rA   r      s   
z ExpressionToPlainFormat.visit_inc                 C   s"   |  j}|jd| |j|fgS )Nznot inrD  rE  r>   r>   rA   r     s   
z$ExpressionToPlainFormat.visit_not_inc                 C      |  jjdtdfgS )N==nanr   r   r  floatr   r>   r>   rA   r        z$ExpressionToPlainFormat.visit_is_nanc                 C   rF  )N!=rH  rI  r   r>   r>   rA   r     rK  z%ExpressionToPlainFormat.visit_not_nanc                 C      |  jjdd fgS NrG  r   r   r  r   r>   r>   rA   r        z%ExpressionToPlainFormat.visit_is_nullc                 C   rM  NrL  rO  r   r>   r>   rA   r     rP  z&ExpressionToPlainFormat.visit_not_nullc                 C   &   |  jjd| |  jj|jfgS rN  r   r   r  rB  r   r   r   r>   r>   rA   r        &z#ExpressionToPlainFormat.visit_equalc                 C   rR  rQ  rS  r   r>   r>   rA   r     rT  z'ExpressionToPlainFormat.visit_not_equalc                 C   rR  )Nz>=rS  r   r>   r>   rA   r     rT  z3ExpressionToPlainFormat.visit_greater_than_or_equalc                 C   rR  )N>rS  r   r>   r>   rA   r     rT  z*ExpressionToPlainFormat.visit_greater_thanc                 C   rR  )N<rS  r   r>   r>   rA   r      rT  z'ExpressionToPlainFormat.visit_less_thanc                 C   rR  )Nz<=rS  r   r>   r>   rA   r   #  rT  z0ExpressionToPlainFormat.visit_less_than_or_equalc                 C      g S ro   r>   r   r>   r>   rA   r   &  r   z)ExpressionToPlainFormat.visit_starts_withc                 C   rW  ro   r>   r   r>   r>   rA   r   )  r   z-ExpressionToPlainFormat.visit_not_starts_withc                 C   rW  ro   r>   r?   r>   r>   rA   rB   ,  r   z"ExpressionToPlainFormat.visit_truec                 C   r,  )NzNot supported: AlwaysFalser	  r?   r>   r>   rA   rD   /  r   z#ExpressionToPlainFormat.visit_falserE   c                 C   r   )NNot allowed: r	  rF   r>   r>   rA   rG   2  r   z!ExpressionToPlainFormat.visit_notrH   rI   c                 C   r4  ro   r>   rJ   r>   r>   rA   rK   5  r]   z!ExpressionToPlainFormat.visit_andc                 C   s   t d| d| )NrX  z || r	  rJ   r>   r>   rA   rL   :  s   z ExpressionToPlainFormat.visit_orF)#rQ   rR   rS   r   r   rp   r2   r,   r   rB  r    r   r3  r   r   r   r   r   r   r   r   r-   r   r   r   r   r   r   r   r   rB   rD   rG   rK   rL   r>   r>   r>   rA   r8    sF   
 &((    $$$$$$$$.
r8  expressionscast_int_to_datetimec                    s   t |  fdd| D S )a  Format a Disjunctive Normal Form expression.

    These are the formats that the expression can be fed into:

    - https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
    - https://docs.dask.org/en/stable/generated/dask.dataframe.read_parquet.html

    Contrary to normal DNF that may contain Not expressions, but here they should have
    been rewritten. This can be done using ``rewrite_not(...)``.

    Keep in mind that this is only used for page skipping, and still needs to filter
    on a row level.

    Args:
        expressions: Expression in Disjunctive Normal Form.

    Returns:
        Formatter filter compatible with Dask and PyArrow.
    c                    s   g | ]}t | qS r>   )rY   )r   rk   r^   r>   rA   
<listcomp>X  s    z.expression_to_plain_format.<locals>.<listcomp>)r8  )rZ  r[  r>   r^   rA   expression_to_plain_format@  s   r]  c                   @   s   e Zd ZU eeef ed< eeef ed< eeef ed< eeef ed< eeef ed< defddZdefd	d
Z	dedefddZ
dededefddZdededefddZdedefddZdedefddZdedefddZdS )_MetricsEvaluatorvalue_countsnull_counts
nan_countslower_boundsupper_boundsr<   c                 C   r   ro   r   r?   r>   r>   rA   rB   b     z_MetricsEvaluator.visit_truec                 C   r   ro   r   r?   r>   r>   rA   rD   f  rd  z_MetricsEvaluator.visit_falserE   c                 C   r   )NzNOT should be rewritten: r	  rF   r>   r>   rA   rG   j  r   z_MetricsEvaluator.visit_notrH   rI   c                 C   r   ro   r>   rJ   r>   r>   rA   rK   m  r   z_MetricsEvaluator.visit_andc                 C   r   ro   r>   rJ   r>   r>   rA   rL   p  r   z_MetricsEvaluator.visit_orr  c                 C   ,   | j | }r| j| }r||kS dS r   )r_  getr`  )r@   r  value_count
null_countr>   r>   rA   _contains_nulls_onlys      z&_MetricsEvaluator._contains_nulls_onlyc                 C   re  r   ra  rf  r_  r@   r  	nan_countrg  r>   r>   rA   _contains_nans_onlyx  rj  z%_MetricsEvaluator._contains_nans_onlyr   c                 C   s"   zt |W S  ty   Y dS w r   )mathisnanr   )r@   r   r>   r>   rA   _is_nan}  s
   z_MetricsEvaluator._is_nanN)rQ   rR   rS   r$  r%  r   bytesr   rB   rD   rG   rK   rL   ri  rn  r   rq  r>   r>   r>   rA   r^  [  s   
 r^  c                   @   s  e Zd ZU eed< eed< 	d3dededededd	f
d
dZde	defddZ
dedefddZdedefddZdedefddZdedefddZdedefddZdedefddZdededefddZdededefd d!Zdededefd"d#Zdededefd$d%Zdededefd&d'Zdededefd(d)Zded*ee defd+d,Zded*ee defd-d.Zdededefd/d0Zdededefd1d2Zd	S )4_InclusiveMetricsEvaluatorr   r   TFrj   rl   include_empty_filesr<   Nc                 C   &   |  | _|| _t|t||| _d S ro   	as_structr   rt  rn   r   r   r@   rj   r   rl   rt  r>   r>   rA   rp        
z#_InclusiveMetricsEvaluator.__init__filec                 C   sj   | j s
|jdkr
tS |jdk rtS |jpt| _|jpt| _|jp!t| _	|j
p't| _
|jp-t| _t| j| S )zDTest whether the file may contain records that match the expression.r   )rt  record_countr   r   r_  r+   null_value_countsr`  nan_value_countsra  rb  rc  rY   r   r@   rz  r>   r>   rA   r     s   
z_InclusiveMetricsEvaluator.evalr  c                 C   s$   | j d u p|| j v o| j |d uS ro   r`  rf  )r@   r  r>   r>   rA   _may_contain_null  s   $z,_InclusiveMetricsEvaluator._may_contain_nullc                 C   re  r   rk  rl  r>   r>   rA   rn    rj  z._InclusiveMetricsEvaluator._contains_nans_onlyr   c                 C   s$   |  jj}| j|dkrtS tS Nr   )r   r   r  r`  rf  r   r   r@   r   r  r>   r>   rA   r     r   z(_InclusiveMetricsEvaluator.visit_is_nullc                 C      |  jj}| |rtS tS ro   )r   r   r  ri  r   r   r  r>   r>   rA   r        
z)_InclusiveMetricsEvaluator.visit_not_nullc                 C   s2   |  jj}| j|dkrtS | |rtS tS r  )r   r   r  ra  rf  r   ri  r   r  r>   r>   rA   r     s   
z'_InclusiveMetricsEvaluator.visit_is_nanc                 C   r  ro   )r   r   r  rn  r   r   r  r>   r>   rA   r        
z(_InclusiveMetricsEvaluator.visit_not_nanr   c                 C   s   |  j}|j}| |s| |rtS t|jts"t	d|j | j
| }r>t|j|}| |r7tS ||jkr>tS tS NExpected PrimitiveType: r   r   r  ri  rn  r   r   r   r4   r   rb  rf  r
   rq  r   r   r@   r   r   r   r  lower_bound_bytesr   r>   r>   rA   r     s   


z*_InclusiveMetricsEvaluator.visit_less_thanc                 C   s   |  j}|j}| |s| |rtS t|jts"t	d|j | j
| }r>t|j|}| |r7tS ||jkr>tS tS r  r  r  r>   r>   rA   r     s   


z3_InclusiveMetricsEvaluator.visit_less_than_or_equalc                 C   s   |  j}|j}| |s| |rtS t|jts"t	d|j | j
| }r>t|j|}||jkr>| |r<tS tS tS r  r   r   r  ri  rn  r   r   r   r4   r   rc  rf  r
   r   rq  r   r@   r   r   r   r  upper_bound_bytesr   r>   r>   rA   r        


z-_InclusiveMetricsEvaluator.visit_greater_thanc                 C   s   |  j}|j}| |s| |rtS t|jts"t	d|j | j
| }r>t|j|}||jk r>| |r<tS tS tS r  r  r  r>   r>   rA   r     r  z6_InclusiveMetricsEvaluator.visit_greater_than_or_equalc           	      C   s   |  j}|j}| |s| |rtS t|jts"t	d|j | j
| }r>t|j|}| |r7tS ||jkr>tS | j| }rZt|j|}| |rStS ||jk rZtS tS r  )r   r   r  ri  rn  r   r   r   r4   r   rb  rf  r
   rq  r   r   rc  )	r@   r   r   r   r  r  r   r  r   r>   r>   rA   r   '  s&   




z&_InclusiveMetricsEvaluator.visit_equalc                 C   r   ro   r   r   r>   r>   rA   r   E  r   z*_InclusiveMetricsEvaluator.visit_not_equalr   c                    s   |  j}|j}| |s| |rtS t|tkrtS t	|j
ts*td|j
 | j| }rPt|j
| |  r?tS  fdd|D }t|dkrPtS | j| }rvt|j
|| retS fdd|D }t|dkrvtS tS )Nr  c                       h | ]} |kr|qS r>   r>   r;  )r   r>   rA   r?  \      z6_InclusiveMetricsEvaluator.visit_in.<locals>.<setcomp>r   c                       h | ]} |kr|qS r>   r>   r;  )r   r>   rA   r?  f  r  )r   r   r  ri  rn  r   r   r   r   r   r   r4   r   rb  rf  r
   rq  rc  )r@   r   r   r   r  r  r  r>   )r   r   rA   r   H  s.   


z#_InclusiveMetricsEvaluator.visit_inc                 C   r   ro   r   r   r>   r>   rA   r   l  r   z'_InclusiveMetricsEvaluator.visit_not_inc                 C   s   |  j}|j}| |rtS t|jtstd|j t	|j
}t|}| j| }rBt	t|j|}|rB|d | |krBtS | j| }	r`t	t|j|	}
|
d ur`|
d | |k r`tS tS r  )r   r   r  ri  r   r   r   r4   r   r   r   r   rb  rf  r
   rc  r   )r@   r   r   r   r  r   r   r  r   r  r   r>   r>   rA   r   q  s"   


z,_InclusiveMetricsEvaluator.visit_starts_withc                 C   s   |  j}|j}| |rtS t|jtstd|j t	|j
}t|}| j| }rh| j| }rht	t|j|}	t	t|j|}
t|	|k rNtS |	d | |krht|
|k r^tS |
d | |krhtS tS r  )r   r   r  r  r   r   r   r4   r   r   r   r   rb  rf  rc  r
   r   )r@   r   r   r   r  r   r   r  r  r   r   r>   r>   rA   r     s&   


 z0_InclusiveMetricsEvaluator.visit_not_starts_withTF) rQ   rR   rS   r5   r   r   r*   r   rp   r%   r   r%  r  rn  r    r   r   r   r   r-   r   r   r   r   r   r   r   r,   r   r   r   r   r>   r>   r>   rA   rs    s@   
 

$rs  c                 C   r   ro   )StrictProjectionr  r  r>   r>   rA   strict_projection  r  r  c                   @   r  )r  rM   r<   c                 C   r  r  )r  r  r   r   r   r  r   r  strict_projectr  r#   )r@   rM   r  r  r  r  r>   r>   rA   rP     s   
z&StrictProjection.visit_bound_predicateNr  r>   r>   r>   rA   r    r  r  c                   @   s  e Zd ZU eed< eed< 	d5dededededd	f
d
dZde	defddZ
dedefddZdedefddZdedefddZdedefddZdededefddZdededefddZdededefddZdededefdd Zdededefd!d"Zdededefd#d$Zded%ee defd&d'Zded%ee defd(d)Zdededefd*d+Zdededefd,d-Zd.edefd/d0Zd.edefd1d2Z d.edefd3d4Z!d	S )6_StrictMetricsEvaluatorr   r   TFrj   rl   rt  r<   Nc                 C   ru  ro   rv  rx  r>   r>   rA   rp     ry  z _StrictMetricsEvaluator.__init__rz  c                 C   sV   |j dkrtS |jpt| _|jpt| _|jpt| _|jpt| _|j	p#t| _	t
| j| S )zTest whether all records within the file match the expression.

        Args:
            file: A data file

        Returns: false if the file may contain any row that doesn't match
                    the expression, true otherwise.
        r   )r{  ROWS_MUST_MATCHr_  r+   r|  r`  r}  ra  rb  rc  rY   r   r~  r>   r>   rA   r     s   
	z_StrictMetricsEvaluator.evalr   c                 C   r  ro   )r   r   r  ri  r  ROWS_MIGHT_NOT_MATCHr  r>   r>   rA   r     r  z%_StrictMetricsEvaluator.visit_is_nullc                 C   s0   |  jj}| j| }d ur|dkrtS tS r  )r   r   r  r`  rf  r  r  )r@   r   r  rh  r>   r>   rA   r     s   z&_StrictMetricsEvaluator.visit_not_nullc                 C   r  ro   )r   r   r  rn  r  r  r  r>   r>   rA   r     r  z$_StrictMetricsEvaluator.visit_is_nanc                 C   s>   |  jj}| j| }d ur|dkrtS | |rtS tS r  )r   r   r  ra  rf  r  ri  r  )r@   r   r  rm  r>   r>   rA   r     s   
z%_StrictMetricsEvaluator.visit_not_nanr   c                 C   s\   |  jj}| |s| |rtS | j| }r,| |}t	|j
|}||jk r,tS tS ro   r   r   r  _can_contain_nulls_can_contain_nansr  rc  rf  
_get_fieldr   r   r   r  r@   r   r   r  upper_bytesr   r   r>   r>   rA   r        

z'_StrictMetricsEvaluator.visit_less_thanc                 C   s\   |  jj}| |s| |rtS | j| }r,| |}t	|j
|}||jkr,tS tS ro   r  r  r>   r>   rA   r   "  r  z0_StrictMetricsEvaluator.visit_less_than_or_equalc                 C   sj   |  jj}| |s| |rtS | j| }r3| |}t	|j
|}| |r,tS ||jkr3tS tS ro   r   r   r  r  r  r  rb  rf  r  r   r   rq  r   r  r@   r   r   r  lower_bytesr   r   r>   r>   rA   r   3  s   


z*_StrictMetricsEvaluator.visit_greater_thanc                 C   sj   |  jj}| |s| |rtS | j| }r3| |}t	|j
|}| |r,tS ||jkr3tS tS ro   r  r  r>   r>   rA   r   I  s   


z3_StrictMetricsEvaluator.visit_greater_than_or_equalc           	      C   s   |  jj}| |s| |rtS | j| }rA| j| }rA| 	|}t
|j|}t
|j|}||jks=||jkr?tS tS tS ro   )r   r   r  r  r  r  rb  rf  rc  r  r   r   r   r  )	r@   r   r   r  r  r  r   r   r   r>   r>   rA   r   ^  s    
z#_StrictMetricsEvaluator.visit_equalc           	      C   s   |  jj}| |s| |rtS | |}| j| }r3t	|j
|}| |r,tS ||jkr3tS | j| }rHt	|j
|}||jk rHtS tS ro   )r   r   r  r  r  r  r  rb  rf  r   r   rq  r  r   rc  )	r@   r   r   r  r   r  r   r  r   r>   r>   rA   r   q  s   



z'_StrictMetricsEvaluator.visit_not_equalr   c           	      C   s   |  jj}| |s| |rtS | |}| j| }rG| j	| }rGt
|j|}||vr3tS t
|j|}||vr?tS ||krEtS tS tS ro   )r   r   r  r  r  r  r  rb  rf  rc  r   r   r  )	r@   r   r   r  r   r  r  r   r   r>   r>   rA   r     s   
 z _StrictMetricsEvaluator.visit_inc                    s   |  jj}| |s| |rtS | |}| j| }r=t	|j
| |  r,tS  fdd|D }t|dkr=tS | j| }r\t	|j
|fdd|D }t|dkr\tS tS )Nc                    r  r>   r>   r   r   r>   rA   r?    r  z7_StrictMetricsEvaluator.visit_not_in.<locals>.<setcomp>r   c                    r  r>   r>   r   r   r>   rA   r?    r  )r   r   r  r  r  r  r  rb  rf  r   r   rq  r  r   rc  )r@   r   r   r  r   r  r  r>   r   rA   r     s"   

z$_StrictMetricsEvaluator.visit_not_inc                 C   r   ro   r  r   r>   r>   rA   r     r   z)_StrictMetricsEvaluator.visit_starts_withc                 C   r   ro   r  r   r>   r>   rA   r     r   z-_StrictMetricsEvaluator.visit_not_starts_withr  c                 C   s(   | j j|d}|d u rtd| |S )N)r  z/Cannot find field, might be nested or missing: )r   r   r   )r@   r  r   r>   r>   rA   r    s   z"_StrictMetricsEvaluator._get_fieldc                 C      | j | }d uo|dkS r  r  )r@   r  rh  r>   r>   rA   r       z*_StrictMetricsEvaluator._can_contain_nullsc                 C   r  r  )ra  rf  )r@   r  rm  r>   r>   rA   r    r  z)_StrictMetricsEvaluator._can_contain_nansr  )"rQ   rR   rS   r5   r   r   r*   r   rp   r%   r   r    r   r   r   r   r-   r   r   r   r   r   r   r   r,   r   r   r   r   r%  r3   r  r  r  r>   r>   r>   rA   r    sB   
 


r  c                
       s  e Zd ZU dZeed< eed< eed< eed< dededededdf
dd	Z	d
e
defddZdefddZdefddZdedefddZdededefddZdededefddZdedefddZdedefddZdedefdd Zdedefd!d"Zded#edefd$d%Zded#edefd&d'Zded#edefd(d)Zded#edefd*d+Zded#edefd,d-Zded#edefd.d/Zded0ee defd1d2Zded0ee defd3d4Z ded#edefd5d6Z!ded#edefd7d8Z"d9e#def fd:d;Z$d9e%defd<d=Z&  Z'S )>ResidualVisitora+  Finds the residuals for an Expression the partitions in the given PartitionSpec.

    A residual expression is made by partially evaluating an expression using partition values.
    For example, if a table is partitioned by day(utc_timestamp) and is read with a filter expression
    utc_timestamp > a and utc_timestamp < b, then there are 4 possible residuals expressions
    for the partition data, d:


    1. If d > day(a) and d &lt; day(b), the residual is always true
    2. If d == day(a) and d != day(b), the residual is utc_timestamp > a
    3. if d == day(b) and d != day(a), the residual is utc_timestamp < b
    4. If d == day(a) == day(b), the residual is utc_timestamp > a and utc_timestamp < b
    Partition data is passed using StructLike. Residuals are returned by residualFor(StructLike).
    rj   r  rl   r   r<   Nc                 C   s   || _ || _|| _|| _d S ro   )rj   r  rl   r   )r@   rj   r  rl   r   r>   r>   rA   rp     s   
zResidualVisitor.__init__partition_datac                 C   s   || _ t| j| dS )Nr^   )r   rY   r   r@   r  r>   r>   rA   r     s   zResidualVisitor.evalc                 C   rq   ro   rr   r?   r>   r>   rA   rB     rs   zResidualVisitor.visit_truec                 C   rq   ro   rt   r?   r>   r>   rA   rD     rs   zResidualVisitor.visit_falserE   c                 C   s   t |S ro   rw   rF   r>   r>   rA   rG     r   zResidualVisitor.visit_notrH   rI   c                 C   r
  ro   r{   rJ   r>   r>   rA   rK     rx   zResidualVisitor.visit_andc                 C   r
  ro   r}   rJ   r>   r>   rA   rL     rx   zResidualVisitor.visit_orr   c                 C   s   | | jd u rt S t S ro   r   r   r   r   r   r>   r>   rA   r   
     zResidualVisitor.visit_is_nullc                 C   s   | | jd urt S t S ro   r  r   r>   r>   rA   r     r  zResidualVisitor.visit_not_nullc                 C   s0   | | j}t|trt|r|  S |  S ro   r   r   r   r   ro  rp  rB   rD   r   r>   r>   rA   r        zResidualVisitor.visit_is_nanc                 C   s0   | | j}t|trt|s|  S |  S ro   r  r   r>   r>   rA   r     r  zResidualVisitor.visit_not_nanr   c                 C   s"   | | j|jk r|  S |  S ro   r   r   r   rB   rD   r   r>   r>   rA   r   $     zResidualVisitor.visit_less_thanc                 C   s"   | | j|jkr|  S |  S ro   r  r   r>   r>   rA   r   *  r  z(ResidualVisitor.visit_less_than_or_equalc                 C   s"   | | j|jkr|  S |  S ro   r  r   r>   r>   rA   r   0  r  z"ResidualVisitor.visit_greater_thanc                 C   s"   | | j|jkr|  S |  S ro   r  r   r>   r>   rA   r   6  r  z+ResidualVisitor.visit_greater_than_or_equalc                 C   s"   | | j|jkr|  S |  S ro   r  r   r>   r>   rA   r   <  r  zResidualVisitor.visit_equalc                 C   s"   | | j|jkr|  S |  S ro   r  r   r>   r>   rA   r   B  r  zResidualVisitor.visit_not_equalr   c                 C   s    | | j|v r|  S |  S ro   r   r   rB   rD   r   r>   r>   rA   r   H     zResidualVisitor.visit_inc                 C   s    | | j|vr|  S |  S ro   r  r   r>   r>   rA   r   N  r  zResidualVisitor.visit_not_inc                 C   s4   | | j}|d urt|t|jrt S t S ro   )r   r   r   r   r   r   r   r   r>   r>   rA   r   T  s   z!ResidualVisitor.visit_starts_withc                 C   s   |  ||s	t S t S ro   )r   r   r   r   r>   r>   rA   r   [  s   z%ResidualVisitor.visit_not_starts_withrM   c                    s  | j |j jj}|g kr|S dtdtfdd}|D ]l}|j	|j
|}d}|durI|j|| j | j| jd}t|trGt |}n|}t|trSt   S |j|j
|}d}	|dur~|j|| j | j| jd}
t|
tr|t |
}	n|
}	t|	trt   S q|S )a  
        If there is no strict projection or if it evaluates to false, then return the predicate.

        Get the strict projection and inclusive projection of this predicate in partition data,
        then use them to determine whether to return the original predicate. The strict projection
        returns true iff the original predicate would have returned true, so the predicate can be
        eliminated if the strict projection evaluates to true. Similarly the inclusive projection
        returns false iff the original predicate would have returned false, so the predicate can
        also be eliminated if the inclusive projection evaluates to false.

        r   r<   c                 S   s
   t | j S ro   )r*   r   )r   r>   r>   rA   struct_to_schemaq  rx   z?ResidualVisitor.visit_bound_predicate.<locals>.struct_to_schemaNr   )r  r  r   r   r   r  r5   r*   r  r  r  rn   r   rj   rl   r   r   superrP   r   r  r   )r@   rM   r  r  r  r  strict_resultr   r  inclusive_resultbound_inclusive	__class__r>   rA   rP   a  s:   





z%ResidualVisitor.visit_bound_predicatec                 C   sB   |j | j| jd}t|tr| j|d}t|ttfs|S |S |S )Nr   rg   )rn   rj   rl   r   r   rP   r   r   )r@   rM   r   bound_residualr>   r>   rA   rO     s   
z'ResidualVisitor.visit_unbound_predicate)(rQ   rR   rS   r   r*   r   r)   r   r   rp   r.   r   rB   rD   rG   rK   rL   r    r   r   r   r   r-   r   r   r   r   r   r   r   r,   r   r   r   r   r   rP   r$   rO   __classcell__r>   r>   r  rA   r    s:   
 6r  c                   @   r  )ResidualEvaluatorr  r<   c                 C   s
   |  |S ro   )r   r  r>   r>   rA   residual_for  rx   zResidualEvaluator.residual_forN)rQ   rR   rS   r.   r   r  r>   r>   r>   rA   r    r  r  c                       s8   e Zd Zdedef fddZdedefddZ  ZS )	UnpartitionedResidualEvaluatorrj   r   c                    s   t  j|t|dd || _d S )NF)rj   r  r   rl   )r  rp   r(   r   )r@   rj   r   r  r>   rA   rp     s   
z'UnpartitionedResidualEvaluator.__init__r  r<   c                 C   s   | j S ro   r   r  r>   r>   rA   r    rs   z+UnpartitionedResidualEvaluator.residual_for)	rQ   rR   rS   r*   r   rp   r.   r  r  r>   r>   r  rA   r    s    r  c                 C   s$   |   r
t||dS t| |||dS )N)rj   r   )r  r   rj   rl   )is_unpartitionedr  r  )r  r   rl   rj   r>   r>   rA   residual_evaluator_of  s
   r  )TrY  )vro  abcr   r   collections.abcr   	functoolsr   typingr   r   r   r	   pyiceberg.conversionsr
   pyiceberg.expressionsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   pyiceberg.manifestr%   r&   r'   pyiceberg.partitioningr(   r)   pyiceberg.schemar*   pyiceberg.typedefr+   r,   r-   r.   r/   pyiceberg.typesr0   r1   r2   r3   r4   r5   r6   r7   pyiceberg.utils.datetimer8   r9   r:   r;   rY   registerrZ   r   rn   rm   r   rP   r   r   r   r   r   r  r   r  r   rr  r   r   r  r  r  r  r  r$  r%  r&  r   r'  r-  r3  r.  r7  r   r   r8  r]  r^  rs  r  r  r  r  r  r  r  r>   r>   r>   rA   <module>   s>  p(
:(`$L m
	%
Q

"S

*  .
   L
