o
    ˳i#                     @   s  U d dl mZ d dlmZ d dlmZ d dlmZmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ ejejejhZee ed
< ejejhZee ed< dedededee de
de e!e ee" f fddZ#dedee" dedB de$e"ee f dB dedB dede%fddZ&dedededB de$e"ee f dB dedB dee fdd Z'dedededB deddf
d!d"Z(dedededB de$e"ee f dB dedB dee fd#d$Z)dedededB dedB ddf
d%d&Z*dS )'    )Iterator)ValidationException)BooleanExpression)ROWS_CANNOT_MATCH_InclusiveMetricsEvaluator)ManifestContentManifestEntryManifestEntryStatusManifestFile)Schema)Table)	OperationSnapshotancestors_between)Record$VALIDATE_DATA_FILES_EXIST_OPERATIONS$VALIDATE_ADDED_DATA_FILES_OPERATIONStablefrom_snapshotto_snapshotmatching_operationsmanifest_content_filterreturnc           	         s   g }t  }d}t||| jD ]0}j}|du r!td d|j|vr'q|j | fdd	| j
D  q|durM|j|jkrMtd||fS )a<  Return newly added manifests and snapshot IDs between the starting snapshot and parent snapshot.

    Args:
        table: Table to get the history from
        from_snapshot: Parent snapshot to get the history from
        to_snapshot: Starting snapshot
        matching_operations: Operations to match on
        manifest_content_filter: Manifest content type to filter

    Raises:
        ValidationException: If no matching snapshot is found or only one snapshot is found

    Returns:
        List of manifest files and set of snapshots ID's matching conditions
    NzNo summary found for snapshot !c                    s&   g | ]}|j jkr|j kr|qS  )added_snapshot_idsnapshot_idcontent).0manifestr   snapshotr   S/home/ubuntu/.local/lib/python3.10/site-packages/pyiceberg/table/update/validate.py
<listcomp>E   s
    z'_validation_history.<locals>.<listcomp>zNo matching snapshot found.)setr   metadatasummaryr   	operationaddr   extend	manifestsio)	r   r   r   r   r   manifests_files	snapshotslast_snapshotr&   r   r    r"   _validation_history    s&   

r/   entrysnapshot_idsdata_filterNpartition_setentry_statusschemac           	      C   s~   | j |vrdS |dur| j|krdS |dur%t||}|| jtu r%dS |dur=| jj}| jj}||vs;||| vr=dS dS )a  Filter manifest entries based on data filter and partition set.

    Args:
        entry: Manifest entry to filter
        snapshot_ids: set of snapshot ids to match data files
        data_filter: Optional filter to match data files
        partition_set: Optional set of partitions to match data files
        entry_status: Optional status to match data files
        schema: schema for filtering

    Returns:
        True if the entry should be included, False otherwise
    FNT)r   statusr   eval	data_filer   	partitionspec_id)	r0   r1   r2   r3   r4   r5   	evaluatorr9   r:   r   r   r"   _filter_manifest_entriesR   s   

r<   starting_snapshotparent_snapshotc           	   
   c   sf    |du rdS t | ||ttj\}}|D ]}|j| jddD ]}t||||tj| 	 r/|V  qqdS )a  Find deleted data files matching a filter since a starting snapshot.

    Args:
        table: Table to validate
        starting_snapshot: Snapshot current at the start of the operation
        data_filter: Expression used to find deleted data files
        partition_set: dict of {spec_id: set[partition]} to filter on
        parent_snapshot: Ending snapshot on the branch being validated

    Returns:
        List of conflicting manifest-entries
    NF)discard_deleted)
r/   r   r   DATAfetch_manifest_entryr+   r<   r	   DELETEDr5   	r   r=   r2   r3   r>   r*   r1   r   r0   r   r   r"   _deleted_data_files{   s&   rD   c                 C   :   t | ||d|}t|rdd |D }td| ddS )ag  Validate that no files matching a filter have been deleted from the table since a starting snapshot.

    Args:
        table: Table to validate
        starting_snapshot: Snapshot current at the start of the operation
        data_filter: Expression used to find deleted data files
        parent_snapshot: Ending snapshot on the branch being validated

    Nc                 S   s   h | ]}|j qS r   r   r   r0   r   r   r"   	<setcomp>   s    z/_validate_deleted_data_files.<locals>.<setcomp>z@Deleted data files were found matching the filter for snapshots r   )rD   anyr   r   r=   r2   r>   conflicting_entriesconflicting_snapshotsr   r   r"   _validate_deleted_data_files   
   rM   c           	   
   c   s`    |du rdS t | ||ttj\}}|D ]}|| jD ]}t||||d|  r,|V  qqdS )a  Return manifest entries for data files added between the starting snapshot and parent snapshot.

    Args:
        table: Table to get the history from
        starting_snapshot: Starting snapshot to get the history from
        data_filter: Optional filter to match data files
        partition_set: Optional set of partitions to match data files
        parent_snapshot: Parent snapshot to get the history from

    Returns:
        Iterator of manifest entries for added data files matching the conditions
    N)r/   r   r   r@   rA   r+   r<   r5   rC   r   r   r"   _added_data_files   s"   rO   c                 C   rE   )aa  Validate that no files matching a filter have been added to the table since a starting snapshot.

    Args:
        table: Table to validate
        starting_snapshot: Snapshot current at the start of the operation
        data_filter: Expression used to find added data files
        parent_snapshot: Ending snapshot on the branch being validated

    Nc                 S   s   h | ]
}|j d ur|j qS )NrF   rG   r   r   r"   rH      s    z-_validate_added_data_files.<locals>.<setcomp>z>Added data files were found matching the filter for snapshots r   )rO   rI   r   rJ   r   r   r"   _validate_added_data_files   rN   rP   )+collections.abcr   pyiceberg.exceptionsr   pyiceberg.expressionsr   pyiceberg.expressions.visitorsr   r   pyiceberg.manifestr   r   r	   r
   pyiceberg.schemar   pyiceberg.tabler   pyiceberg.table.snapshotsr   r   r   pyiceberg.typedefr   	OVERWRITEREPLACEDELETEr   r$   __annotations__APPENDr   tuplelistintr/   dictboolr<   rD   rM   rO   rP   r   r   r   r"   <module>   s   
2
)
'

$