o
    <ieK                     @  s  d dl mZ d dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
 d dlmZmZ d dlmZmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZmZ d dlmZ er\d dlmZ d dl m!Z! dZ"dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,dZ-dZ.dZ/dZ0dZ1dZ2dZ3d Z4d!Z5d"Z6d#Z7d$Z8d%Z9d&Z:d'Z;d Z<G d(d) d)e
Z=G d*d+ d+Z>G d,d- d-e!ee?e?f Z@G d.d/ d/e!ZAG d0d1 d1e!ZBG d2d3 d3e!ZCG d4d5 d5ZDdSdTd:d;ZEdUdCdDZFdVdJdKZGdWdNdOZHdXdQdRZIdS )Y    )annotationsN)defaultdict)IterableMapping)Enum)TYPE_CHECKINGAny)FieldPrivateAttrmodel_serializer)FileIO)DataFileDataFileContentManifestFile
_manifests)UNPARTITIONED_PARTITION_SPECPartitionSpec)Schema)TableMetadata)IcebergBaseModelzadded-data-fileszadded-delete-fileszadded-equality-deleteszadded-files-sizezadded-position-deleteszadded-position-delete-fileszadded-recordszdeleted-data-fileszdeleted-recordszadded-equality-delete-fileszremoved-delete-fileszremoved-equality-deleteszremoved-equality-delete-fileszremoved-files-sizezremoved-position-deleteszremoved-position-delete-filesztotal-equality-deletesztotal-position-deletesztotal-data-filesztotal-delete-filesztotal-recordsztotal-files-sizezchanged-partition-countzpartitions.zpartition-summaries-included	operationc                   @  s*   e Zd ZdZdZdZdZdZddd	Zd
S )	Operationa+  Describes the operation.

    Possible operation values are:
        - append: Only data files were added and no files were removed.
        - replace: Data and delete files were added and removed without changing table data;
            i.e., compaction, changing the data file format, or relocating data files.
        - overwrite: Data and delete files were added and removed in a logical overwrite operation.
        - delete: Data files were removed and their contents logically deleted and/or delete files
            were added to delete rows.
    appendreplace	overwritedeletereturnstrc                 C  s   d| j  S )z8Return the string representation of the Operation class.z
Operation.)nameself r!   V/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/pyiceberg/table/snapshots.py__repr__T   s   zOperation.__repr__Nr   r   )	__name__
__module____qualname____doc__APPENDREPLACE	OVERWRITEDELETEr#   r!   r!   r!   r"   r   C   s    r   c                   @  s   e Zd ZU ded< ded< ded< ded< ded< ded< ded< ded	< ded
< ded< ded< ded< ded< ded< ded< ded< d ddZd!ddZd!ddZd"ddZdS )#UpdateMetricsintadded_file_sizeremoved_file_sizeadded_data_filesremoved_data_filesadded_eq_delete_filesremoved_eq_delete_filesadded_pos_delete_filesremoved_pos_delete_filesadded_delete_filesremoved_delete_filesadded_recordsdeleted_recordsadded_pos_deletesremoved_pos_deletesadded_eq_deletesremoved_eq_deletesr   Nonec                 C  sd   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d| _
d| _d| _d| _d| _d| _d S Nr   )r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r   r!   r!   r"   __init__k   s    
zUpdateMetrics.__init__	data_filer   c                 C     |  j |j7  _ |jtjkr|  jd7  _|  j|j7  _d S |jtjkr=|  j	d7  _	|  j
d7  _
|  j|j7  _d S |jtjkr[|  j	d7  _	|  jd7  _|  j|j7  _d S td|j N   zUnknown data file content: )r/   file_size_in_bytescontentr   DATAr1   r9   record_countPOSITION_DELETESr7   r5   r;   EQUALITY_DELETESr3   r=   
ValueErrorr    rB   r!   r!   r"   add_file}      zUpdateMetrics.add_filec                 C  rC   rD   )r0   rF   rG   r   rH   r2   r:   rI   rJ   r8   r6   r<   rK   r4   r>   rL   rM   r!   r!   r"   remove_file   rO   zUpdateMetrics.remove_filedict[str, str]c                 C  s   i }t || jt t || jt t || jt t || jt t || j	t
 t || jt t || jt t || jt t || jt t || jt t || jt t || jt t || jt t || jt t || jt t || jt  |S N)!set_when_positiver/   ADDED_FILE_SIZEr0   REMOVED_FILE_SIZEr1   ADDED_DATA_FILESr2   DELETED_DATA_FILESr3   ADDED_EQUALITY_DELETE_FILESr4   REMOVED_EQUALITY_DELETE_FILESr5   ADDED_POSITION_DELETE_FILESr6   REMOVED_POSITION_DELETE_FILESr7   ADDED_DELETE_FILESr8   REMOVED_DELETE_FILESr9   ADDED_RECORDSr:   DELETED_RECORDSr;   ADDED_POSITION_DELETESr<   REMOVED_POSITION_DELETESr=   ADDED_EQUALITY_DELETESr>   REMOVED_EQUALITY_DELETES)r    
propertiesr!   r!   r"   to_dict   s$   zUpdateMetrics.to_dictN)r   r?   )rB   r   r   r?   r   rQ   )r%   r&   r'   __annotations__rA   rN   rP   re   r!   r!   r!   r"   r-   Y   s*   
 


r-   c                      s   e Zd ZU dZe Zded< e Zded< d$d% fddZ	d&ddZ
d'ddZd(ddZed)ddZed)ddZd*ddZd+d"d#Z  ZS ),Summaryu   A class that stores the summary information for a Snapshot.

    The snapshot summary’s operation field is used by some operations,
    like snapshot expiration, to skip processing certain snapshots.
    r   r   rQ   _additional_propertiesNOperation | Nonedatar   r   r?   c                   s<   |d u rt jddd tj}t jdd|i| || _d S )NzSEncountered invalid snapshot summary: operation is missing, defaulting to overwrite   )
stacklevelr   r!   )warningswarnr   r+   superrA   ri   )r    r   rk   	__class__r!   r"   rA      s
   
zSummary.__init___Summary__keyr   
Any | Nonec                 C  s   |  dkr	| jS | j|S )zReturn a key as it is a map.r   )lowerr   ri   get)r    rs   r!   r!   r"   __getitem__   s   zSummary.__getitem__keyvaluec                 C  s$   |  dkr|| _dS || j|< dS )zSet a key as it is a map.r   N)ru   r   ri   )r    rx   ry   r!   r!   r"   __setitem__   s   
zSummary.__setitem__r.   c                 C  s   dt | j S )z)Return the number of keys in the summary.rE   )lenri   r   r!   r!   r"   __len__   s   zSummary.__len__c                 C  s   dt | jji| jS )Nr   )r   r   ry   ri   r   r!   r!   r"   	ser_model   s   zSummary.ser_modelc                 C  s   | j S rR   )ri   r   r!   r!   r"   additional_properties   s   zSummary.additional_propertiesc                 C  s0   | j rdt| j  nd}dt| j | dS )z6Return the string representation of the Summary class.z, ** zSummary())ri   reprr   )r    repr_propertiesr!   r!   r"   r#      s   zSummary.__repr__otherboolc                 C  s&   t |tr| j|jko| j|jkS dS )z3Compare if the summary is equal to another summary.F)
isinstancerh   r   r~   )r    r   r!   r!   r"   __eq__   s
   zSummary.__eq__rR   )r   rj   rk   r   r   r?   )rs   r   r   rt   )rx   r   ry   r   r   r?   )r   r.   rf   r$   )r   r   r   r   )r%   r&   r'   r(   r	   r   rg   r
   ri   rA   rw   rz   r|   r   r}   propertyr~   r#   r   __classcell__r!   r!   rq   r"   rh      s   
 



rh   c                   @  s   e Zd ZU eddZded< edddZded	< ed
edZded< eddd dZ	ded< edddZ
ded< eddZded< edddZded< eddddZded< eddd dZded!< d,d#d$Zd,d%d&Zd-d*d+ZdS ).Snapshotsnapshot-idaliasr.   snapshot_idzparent-snapshot-idN)r   defaultz
int | Noneparent_snapshot_idzsequence-numbersequence_numbertimestamp-msc                   C  s   t t d S )Ni  )r.   timer!   r!   r!   r"   <lambda>   s    zSnapshot.<lambda>)r   default_factorytimestamp_mszmanifest-listz-Location of the snapshot's manifest list file)r   descriptionr   manifest_list)r   zSummary | Nonesummaryz	schema-id	schema_idzfirst-row-idzFassigned to the first row in the first data file in the first manifest)r   r   r   first_row_idz
added-rowsz;The upper bound of the number of rows with assigned row IDs
added_rowsr   c                 C  sd   | j r
| j j dnd}| jrd| j nd}| jdur"d| j nd}| d| j | | }|S )7Return the string representation of the Snapshot class.z: r   z, parent_id=Nz, schema_id=zid=)r   r   r   r   r   )r    r   	parent_idr   
result_strr!   r!   r"   __str__   s
   zSnapshot.__str__c              
   C  s   d| j  d| j d| j d| j d| j d| jr%dt| j nd| jdur1d	| j nd| jdur=d
| j nd| j	durId| j	 ndg	}dd |D }dd
| dS )r   zsnapshot_id=zparent_snapshot_id=zsequence_number=ztimestamp_ms=zmanifest_list=''zsummary=Nz
schema_id=zfirst_row_id=zadded_rows=c                 S  s   g | ]}|d ur|qS rR   r!   ).0fieldr!   r!   r"   
<listcomp>  s    z%Snapshot.__repr__.<locals>.<listcomp>z	Snapshot(z, r   )r   r   r   r   r   r   r   r   r   r   join)r    fieldsfiltered_fieldsr!   r!   r"   r#     s   



zSnapshot.__repr__ior   list[ManifestFile]c                 C  s   t t|| jS )z,Return the manifests for the given snapshot.)listr   r   )r    r   r!   r!   r"   	manifests  s   zSnapshot.manifestsr$   )r   r   r   r   )r%   r&   r'   r	   r   rg   r   INITIAL_SEQUENCE_NUMBERr   r   r   r   r   r   r   r   r#   r   r!   r!   r!   r"   r      s"   
 

r   c                   @  s2   e Zd ZU eddZded< eddZded< dS )	MetadataLogEntryzmetadata-filer   r   metadata_filer   r.   r   N)r%   r&   r'   r	   r   rg   r   r!   r!   r!   r"   r        
 r   c                   @  s2   e Zd ZU eddZded< eddZded< dS )SnapshotLogEntryr   r   r.   r   r   r   N)r%   r&   r'   r	   r   rg   r   r!   r!   r!   r"   r   "  r   r   c                   @  sv   e Zd ZU ded< ded< ded< d'd(ddZd)ddZefd*ddZefd*ddZd+ddZ	d,d d!Z
d-d$d%Zd&S ).SnapshotSummaryCollectorr-   metricszdefaultdict[str, UpdateMetrics]partition_metricsr.   $max_changed_partitions_for_summariesr   partition_summary_limitr   r?   c                 C  s   t  | _tt | _|| _d S rR   )r-   r   r   r   r   )r    r   r!   r!   r"   rA   ,  s   

z!SnapshotSummaryCollector.__init__limitc                 C  s
   || _ d S rR   )r   )r    r   r!   r!   r"   set_partition_summary_limit1  s   
z4SnapshotSummaryCollector.set_partition_summary_limitrB   r   schemar   partition_specr   c                 C  4   | j | t|jdkr| j||d|d d S d S )Nr   Tr   fileis_add_filer   )r   rN   r{   	partitionupdate_partition_metricsr    rB   r   r   r!   r!   r"   rN   4  s   z!SnapshotSummaryCollector.add_filec                 C  r   )Nr   Fr   )r   rP   r{   r   r   r   r!   r!   r"   rP   9  s   z$SnapshotSummaryCollector.remove_filer   r   r   c                 C  s8   | |j|}| j| }|r|| d S || d S rR   )partition_to_pathr   r   rN   rP   )r    r   r   r   r   partition_pathr   r!   r!   r"   r   @  s
   
z1SnapshotSummaryCollector.update_partition_metricsrQ   c                 C  sx   | j  }t| j}t||t || jkr:|dkrd|t< | j D ]\}}| 	| }r9t|dkr9||t
| < q"|S )Nr   true)r   re   r{   r   rS   CHANGED_PARTITION_COUNT_PROPr   PARTITION_SUMMARY_PROPitems_partition_summaryCHANGED_PARTITION_PREFIX)r    rd   changed_partitions_sizer   update_metrics_partitionr   r!   r!   r"   buildI  s   


zSnapshotSummaryCollector.buildupdate_metricsr   c                 C  s   d dd |  D S )N,c                 S  s   g | ]\}}| d | qS )=r!   )r   propvalr!   r!   r"   r   W  s    z?SnapshotSummaryCollector._partition_summary.<locals>.<listcomp>)r   re   r   )r    r   r!   r!   r"   r   V  s   z+SnapshotSummaryCollector._partition_summaryN)r   )r   r.   r   r?   )r   r.   r   r?   )rB   r   r   r   r   r   r   r?   )
r   r   r   r   r   r   r   r   r   r?   rf   )r   r-   r   r   )r%   r&   r'   rg   rA   r   r   rN   rP   r   r   r   r!   r!   r!   r"   r   '  s   
 


	r   r   previous_summaryMapping[str, str] | Noner   c                   s   j tjtjtjhvrtdj   s#tdtdtdt	dt
dtdi d fd	d
}|tttd |tttd |tttd |t	ttd |t
ttd |tttd S )NzOperation not implemented: 0total_propertyr   added_propertyremoved_propertyr   r?   c              
     s     |  }rSz(t|}|dkr | }r|t|7 }|dkr. | }r.|t|8 }W n tyF } ztd|  d| |d }~ww |dkrUt|| < d S d S d S )Nr   z!Could not parse summary property z to an int: )rv   r.   rL   r   )r   r   r   previous_total_str	new_totaladdedremoveder   r   r!   r"   _update_totalsh  s    
z1update_snapshot_summaries.<locals>._update_totals)r   r   r   )r   r   r   r   r   r   r   r?   )r   r   r)   r+   r,   rL   TOTAL_DATA_FILESTOTAL_DELETE_FILESTOTAL_RECORDSTOTAL_FILE_SIZETOTAL_POSITION_DELETESTOTAL_EQUALITY_DELETESrV   rW   r\   r]   r^   r_   rT   rU   r`   ra   rb   rc   )r   r   r   r!   r   r"   update_snapshot_summariesZ  sT   	r   rd   rQ   numr.   property_namer   r?   c                 C  s   |dkrt || |< d S d S r@   )r   )rd   r   r   r!   r!   r"   rS     s   rS   current_snapshotSnapshot | Nonetable_metadatar   Iterable[Snapshot]c                 c  s>    | }|dur|V  |j du rdS ||j }|dusdS dS )z6Get the ancestors of and including the given snapshot.N)r   snapshot_by_id)r   r   snapshotr!   r!   r"   ancestors_of  s   
r   from_snapshotto_snapshotc                 c  sF    | durt ||D ]}|V  || kr dS q
dS t ||E dH  dS )zXGet the ancestors of and including the given snapshot between the to and from snapshots.N)r   )r   r   r   r   r!   r!   r"   ancestors_between  s   r   r   c                 C  sD   d}d}t |  | D ]}||j  kr|krn q|}|j}q|S )aC  Find the latest ancestor snapshot whose timestamp is before the provided timestamp.

    Args:
        table_metadata: The table metadata for a table
        timestamp_ms: lookup snapshots strictly before this timestamp

    Returns:
        The latest ancestor snapshot older than the timestamp, or None if not found.
    Nr   )r   r   r   )r   r   resultresult_timestampancestorr!   r!   r"    latest_ancestor_before_timestamp  s   
r   rR   )r   rh   r   r   r   rh   )rd   rQ   r   r.   r   r   r   r?   )r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r.   r   r   )J
__future__r   r   rn   collectionsr   collections.abcr   r   enumr   typingr   r   pydanticr	   r
   r   pyiceberg.ior   pyiceberg.manifestr   r   r   r   pyiceberg.partitioningr   r   pyiceberg.schemar   pyiceberg.table.metadatar   pyiceberg.typedefr   rV   r\   rb   rT   r`   rZ   r^   rW   r_   rX   r]   rc   rY   rU   ra   r[   r   r   r   r   r   r   r   r   r   	OPERATIONr   r   r-   r   rh   r   r   r   r   r   rS   r   r   r   r!   r!   r!   r"   <module>   sl   [=,3
>


