o
    uyi$J                     @  s  d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZmZ d dlmZmZmZ d dlmZ d dlmZmZmZmZ d d	lmZmZ d d
lm Z  d dl!m"Z" e	rfd dl#m$Z$ d dl%m&Z& dZ'dZ(dZ)dZ*dZ+dZ,dZ-dZ.dZ/dZ0dZ1dZ2dZ3dZ4dZ5dZ6dZ7dZ8d Z9d!Z:d"Z;d#Z<d$Z=d%Z>d&Z?d'Z@d ZAG d(d) d)eZBG d*d+ d+ZCG d,d- d-e&eeDeDf ZEG d.d/ d/e&ZFG d0d1 d1e&ZGG d2d3 d3e&ZHG d4d5 d5ZIdVd:d;ZJ	<dWdXd@dAZKdYdIdJZLdZdPdQZMd[dTdUZNdS )\    )annotationsN)defaultdict)Enum)TYPE_CHECKINGAnyDefaultDictDictIterableListMappingOptional)FieldPrivateAttrmodel_serializer)FileIO)DataFileDataFileContentManifestFile
_manifests)UNPARTITIONED_PARTITION_SPECPartitionSpec)Schema)deprecation_message)TableMetadata)IcebergBaseModelzadded-data-fileszadded-delete-fileszadded-equality-deleteszadded-files-sizezadded-position-deleteszadded-position-delete-fileszadded-recordszdeleted-data-fileszdeleted-recordszadded-equality-delete-fileszremoved-delete-fileszremoved-equality-deleteszremoved-equality-delete-fileszremoved-files-sizezremoved-position-deleteszremoved-position-delete-filesztotal-equality-deletesztotal-position-deletesztotal-data-filesztotal-delete-filesztotal-recordsztotal-files-sizezchanged-partition-countzpartitions.zpartition-summaries-included	operationc                   @  s*   e Zd ZdZdZdZdZdZddd	Zd
S )	Operationa  Describes the operation.

    Possible operation values are:
        - append: Only data files were added and no files were removed.
        - replace: Data and delete files were added and removed without changing table data; i.e., compaction, changing the data file format, or relocating data files.
        - overwrite: Data and delete files were added and removed in a logical overwrite operation.
        - delete: Data files were removed and their contents logically deleted and/or delete files were added to delete rows.
    appendreplace	overwritedeletereturnstrc                 C  s   d| j  S )z8Return the string representation of the Operation class.z
Operation.)nameself r&   \/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/pyiceberg/table/snapshots.py__repr__R   s   zOperation.__repr__Nr!   r"   )	__name__
__module____qualname____doc__APPENDREPLACE	OVERWRITEDELETEr(   r&   r&   r&   r'   r   C   s    	r   c                   @  s   e Zd ZU ded< ded< ded< ded< ded< ded< ded< ded	< ded
< ded< ded< ded< ded< ded< ded< ded< d ddZd!ddZd!ddZd"ddZdS )#UpdateMetricsintadded_file_sizeremoved_file_sizeadded_data_filesremoved_data_filesadded_eq_delete_filesremoved_eq_delete_filesadded_pos_delete_filesremoved_pos_delete_filesadded_delete_filesremoved_delete_filesadded_recordsdeleted_recordsadded_pos_deletesremoved_pos_deletesadded_eq_deletesremoved_eq_deletesr!   Nonec                 C  sd   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d| _
d| _d| _d| _d| _d| _d S Nr   )r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   r$   r&   r&   r'   __init__i   s    
zUpdateMetrics.__init__	data_filer   c                 C     |  j |j7  _ |jtjkr|  jd7  _|  j|j7  _d S |jtjkr=|  j	d7  _	|  j
d7  _
|  j|j7  _d S |jtjkr[|  j	d7  _	|  jd7  _|  j|j7  _d S td|j N   zUnknown data file content: )r4   file_size_in_bytescontentr   DATAr6   r>   record_countPOSITION_DELETESr<   r:   r@   EQUALITY_DELETESr8   rB   
ValueErrorr%   rG   r&   r&   r'   add_file{      zUpdateMetrics.add_filec                 C  rH   rI   )r5   rK   rL   r   rM   r7   r?   rN   rO   r=   r;   rA   rP   r9   rC   rQ   rR   r&   r&   r'   remove_file   rT   zUpdateMetrics.remove_fileDict[str, str]c                 C  s   i }t || jt t || jt t || jt t || jt t || j	t
 t || jt t || jt t || jt t || jt t || jt t || jt t || jt t || jt t || jt t || jt t || jt  |S N)!set_when_positiver4   ADDED_FILE_SIZEr5   REMOVED_FILE_SIZEr6   ADDED_DATA_FILESr7   DELETED_DATA_FILESr8   ADDED_EQUALITY_DELETE_FILESr9   REMOVED_EQUALITY_DELETE_FILESr:   ADDED_POSITION_DELETE_FILESr;   REMOVED_POSITION_DELETE_FILESr<   ADDED_DELETE_FILESr=   REMOVED_DELETE_FILESr>   ADDED_RECORDSr?   DELETED_RECORDSr@   ADDED_POSITION_DELETESrA   REMOVED_POSITION_DELETESrB   ADDED_EQUALITY_DELETESrC   REMOVED_EQUALITY_DELETES)r%   
propertiesr&   r&   r'   to_dict   s$   zUpdateMetrics.to_dictN)r!   rD   )rG   r   r!   rD   r!   rV   )r*   r+   r,   __annotations__rF   rS   rU   rj   r&   r&   r&   r'   r2   W   s*   
 


r2   c                      s   e Zd ZU dZe Zded< e Zded< d$d% fddZ	d&ddZ
d'ddZd(ddZed)ddZed)ddZd*ddZd+d"d#Z  ZS ),Summaryu   A class that stores the summary information for a Snapshot.

    The snapshot summary’s operation field is used by some operations,
    like snapshot expiration, to skip processing certain snapshots.
    r   r   rV   _additional_propertiesNOptional[Operation]datar   r!   rD   c                   s8   |d u rt d tj}t jdd|i| || _d S )NzSEncountered invalid snapshot summary: operation is missing, defaulting to overwriter   r&   )warningswarnr   r0   superrF   rn   )r%   r   rp   	__class__r&   r'   rF      s
   

zSummary.__init___Summary__keyr"   Optional[Any]c                 C  s   |  dkr	| jS | j|S )zReturn a key as it is a map.r   )lowerr   rn   get)r%   rv   r&   r&   r'   __getitem__   s   zSummary.__getitem__keyvaluec                 C  s$   |  dkr|| _dS || j|< dS )zSet a key as it is a map.r   N)rx   r   rn   )r%   r{   r|   r&   r&   r'   __setitem__   s   
zSummary.__setitem__r3   c                 C  s   dt | j S )z)Return the number of keys in the summary.rJ   )lenrn   r$   r&   r&   r'   __len__   s   zSummary.__len__c                 C  s   dt | jji| jS )Nr   )r"   r   r|   rn   r$   r&   r&   r'   	ser_model   s   zSummary.ser_modelc                 C  s   | j S rW   )rn   r$   r&   r&   r'   additional_properties   s   zSummary.additional_propertiesc                 C  s0   | j rdt| j  nd}dt| j | dS )z6Return the string representation of the Summary class.z, ** zSummary())rn   reprr   )r%   repr_propertiesr&   r&   r'   r(      s   zSummary.__repr__otherboolc                 C  s&   t |tr| j|jko| j|jkS dS )z3Compare if the summary is equal to another summary.F)
isinstancerm   r   r   )r%   r   r&   r&   r'   __eq__   s
   zSummary.__eq__rW   )r   ro   rp   r   r!   rD   )rv   r"   r!   rw   )r{   r"   r|   r   r!   rD   )r!   r3   rk   r)   )r   r   r!   r   )r*   r+   r,   r-   r   r   rl   r   rn   rF   rz   r}   r   r   r   propertyr   r(   r   __classcell__r&   r&   rt   r'   rm      s   
 



rm   c                   @  s   e Zd ZU eddZded< edddZded	< ed
edZded< eddd dZ	ded< edddZ
ded< eddZded< edddZded< d#ddZd$d!d"ZdS )%Snapshotsnapshot-idaliasr3   snapshot_idzparent-snapshot-idN)r   defaultzOptional[int]parent_snapshot_idzsequence-numbersequence_numbertimestamp-msc                   C  s   t t d S )Ni  )r3   timer&   r&   r&   r'   <lambda>   s    zSnapshot.<lambda>)r   default_factorytimestamp_mszmanifest-listz-Location of the snapshot's manifest list file)r   descriptionr"   manifest_list)r   zOptional[Summary]summaryz	schema-id	schema_idr!   c                 C  sd   | j r
| j j dnd}| jrd| j nd}| jdur"d| j nd}| d| j | | }|S )z7Return the string representation of the Snapshot class.z: r   z, parent_id=Nz, schema_id=zid=)r   r   r   r   r   )r%   r   	parent_idr   
result_strr&   r&   r'   __str__   s
   zSnapshot.__str__ior   List[ManifestFile]c                 C  s   t t|| jS )z,Return the manifests for the given snapshot.)listr   r   )r%   r   r&   r&   r'   	manifests   s   zSnapshot.manifestsr)   )r   r   r!   r   )r*   r+   r,   r   r   rl   r   INITIAL_SEQUENCE_NUMBERr   r   r   r   r   r   r   r&   r&   r&   r'   r      s   
 
r   c                   @  s2   e Zd ZU eddZded< eddZded< dS )	MetadataLogEntryzmetadata-filer   r"   metadata_filer   r3   r   N)r*   r+   r,   r   r   rl   r   r&   r&   r&   r'   r        
 r   c                   @  s2   e Zd ZU eddZded< eddZded< dS )SnapshotLogEntryr   r   r3   r   r   r   N)r*   r+   r,   r   r   rl   r   r&   r&   r&   r'   r   
  r   r   c                   @  sv   e Zd ZU ded< ded< ded< d'd(ddZd)ddZefd*ddZefd*ddZd+ddZ	d,d d!Z
d-d$d%Zd&S ).SnapshotSummaryCollectorr2   metricszDefaultDict[str, UpdateMetrics]partition_metricsr3   $max_changed_partitions_for_summariesr   partition_summary_limitr!   rD   c                 C  s   t  | _tt | _|| _d S rW   )r2   r   r   r   r   )r%   r   r&   r&   r'   rF     s   

z!SnapshotSummaryCollector.__init__limitc                 C  s
   || _ d S rW   )r   )r%   r   r&   r&   r'   set_partition_summary_limit  s   
z4SnapshotSummaryCollector.set_partition_summary_limitrG   r   schemar   partition_specr   c                 C  4   | j | t|jdkr| j||d|d d S d S )Nr   Tr   fileis_add_filer   )r   rS   r~   	partitionupdate_partition_metricsr%   rG   r   r   r&   r&   r'   rS     s   z!SnapshotSummaryCollector.add_filec                 C  r   )Nr   Fr   )r   rU   r~   r   r   r   r&   r&   r'   rU   !  s   z$SnapshotSummaryCollector.remove_filer   r   r   c                 C  s8   | |j|}| j| }|r|| d S || d S rW   )partition_to_pathr   r   rS   rU   )r%   r   r   r   r   partition_pathr   r&   r&   r'   r   (  s
   
z1SnapshotSummaryCollector.update_partition_metricsrV   c                 C  sx   | j  }t| j}t||t || jkr:|dkrd|t< | j D ]\}}| 	| }r9t|dkr9||t
| < q"|S )Nr   true)r   rj   r~   r   rX   CHANGED_PARTITION_COUNT_PROPr   PARTITION_SUMMARY_PROPitems_partition_summaryCHANGED_PARTITION_PREFIX)r%   ri   changed_partitions_sizer   update_metrics_partitionr   r&   r&   r'   build1  s   


zSnapshotSummaryCollector.buildupdate_metricsr"   c                 C  s   d dd |  D S )N,c                 S  s   g | ]\}}| d | qS )=r&   ).0propvalr&   r&   r'   
<listcomp>?  s    z?SnapshotSummaryCollector._partition_summary.<locals>.<listcomp>)joinrj   r   )r%   r   r&   r&   r'   r   >  s   z+SnapshotSummaryCollector._partition_summaryN)r   )r   r3   r!   rD   )r   r3   r!   rD   )rG   r   r   r   r   r   r!   rD   )
r   r   r   r   r   r   r   r   r!   rD   rk   )r   r2   r!   r"   )r*   r+   r,   rl   rF   r   r   rS   rU   r   r   r   r&   r&   r&   r'   r     s   
 


	r   r   previous_summaryMapping[str, str]r!   c                   s   t ttttthD ]}d| |< qd fdd}|t  }r"t|| t< |t }r.t|| t< |t }r:t|| t	< |t }rFt|| t
< |t }rRt|| t< |t }r^t|| t< | S )	N0r   r"   r!   r3   c              
     sH     | pd}zt|W S  ty# } ztd|  d| |d }~ww )Nr   !Could not parse summary property  to an int: )ry   r3   rQ   )r   r|   er   r&   r'   get_propM  s   
z)_truncate_table_summary.<locals>.get_prop)r   r"   r!   r3   )TOTAL_DATA_FILESTOTAL_DELETE_FILESTOTAL_RECORDSTOTAL_FILE_SIZETOTAL_POSITION_DELETESTOTAL_EQUALITY_DELETESr"   r\   rb   rd   rZ   rf   rh   )r   r   r   r   r|   r&   r   r'   _truncate_table_summaryB  s,   
r   FOptional[Mapping[str, str]]truncate_full_tabler   c                   s   j tjtjtjhvrtdj  |r+j tjkr+ d ur+tdddd t  s;tdt	dt
dtdtdtdi d fdd}|tttd |t	ttd |t
ttd |tttd |tttd |tttd S )NzOperation not implemented: z0.10.0z0.11.0z*The truncate-full-table shouldn't be used.)deprecated_in
removed_inhelp_messager   total_propertyr"   added_propertyremoved_propertyr!   rD   c              
     s     |  }rSz(t|}|dkr | }r|t|7 }|dkr. | }r.|t|8 }W n tyF } ztd|  d| |d }~ww |dkrUt|| < d S d S d S )Nr   r   r   )ry   r3   rQ   r"   )r   r   r   previous_total_str	new_totaladdedremovedr   r   r   r&   r'   _update_totals|  s    
z1update_snapshot_summaries.<locals>._update_totals)r   r   r   )r   r"   r   r"   r   r"   r!   rD   )r   r   r.   r0   r1   rQ   r   r   r   r   r   r   r   r   r[   r\   ra   rb   rc   rd   rY   rZ   re   rf   rg   rh   )r   r   r   r   r&   r   r'   update_snapshot_summariesd  sb   
	r   ri   rV   numr3   property_namer"   rD   c                 C  s   |dkrt || |< d S d S rE   )r"   )ri   r   r   r&   r&   r'   rX     s   rX   current_snapshotOptional[Snapshot]table_metadatar   Iterable[Snapshot]c                 c  s>    | }|dur|V  |j du rdS ||j }|dusdS dS )z6Get the ancestors of and including the given snapshot.N)r   snapshot_by_id)r   r   snapshotr&   r&   r'   ancestors_of  s   
r   from_snapshotto_snapshotc                 c  sF    | durt ||D ]}|V  || kr dS q
dS t ||E dH  dS )zXGet the ancestors of and including the given snapshot between the to and from snapshots.N)r   )r   r   r   r   r&   r&   r'   ancestors_between  s   r   )r   rm   r   r   r!   rm   )NF)r   rm   r   r   r   r   r!   rm   )ri   rV   r   r3   r   r"   r!   rD   )r   r   r   r   r!   r   )r   r   r   r   r   r   r!   r   )O
__future__r   r   rq   collectionsr   enumr   typingr   r   r   r   r	   r
   r   r   pydanticr   r   r   pyiceberg.ior   pyiceberg.manifestr   r   r   r   pyiceberg.partitioningr   r   pyiceberg.schemar   pyiceberg.utils.deprecatedr   pyiceberg.table.metadatar   pyiceberg.typedefr   r[   ra   rg   rY   re   r_   rc   r\   rd   r]   rb   rh   r^   rZ   rf   r`   r   r   r   r   r   r   r   r   r   	OPERATIONr   r   r2   r"   rm   r   r   r   r   r   r   rX   r   r   r&   r&   r&   r'   <module>   sn   ([=
3#
H

