o
    ˳i)                     @  s  U d dl mZ d dlZd dlZd dlmZmZ d dlmZ d dl	m	Z	 d dl
mZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d dlm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4 dZ5dZ6dZ7de8d< d Z9G dd de:eZ;G dd de:eZ<G dd de:eZ=G dd  d e>eZ?e4e1d!d"e3 d#d$d%e1d&d'e3 d#d(d%e1d)d*e4 d#d+d%e1d,d-e/ d#d.d%e1d/d0e/ d#d1d%e1d2d3e/ d#d4e6d5e1d6d7e0d8e- d9e/ d:d;d<d%e1d=d>e0d?e- d@e/ d:d;dAd%e1dBdCe0dDe- dEe/ d:d;dFd%e1dGdHe0dIe- dJe/ d:d;dKd%e1dLdMe0dNe- dOe+ d:d;dPd%e1dQdRe0dSe- dTe+ d:d;dUd%e1dVdWe+ d;dXd%e1dYdZe.d[e/ d#d\d;d]d%e1d^d_e- d;d`d%e4e1dadbe- d#d(e;j@dce1d!d"e3 d#d$d%e1d&d'e3 d#d(d%e1d)d*e4 d#d+d%e1d,d-e/ d#d.d%e1d/d0e/ d#d1d%e1d6d7e0d8e- d9e/ d:d;d<d%e1d=d>e0d?e- d@e/ d:d;dAd%e1dBdCe0dDe- dEe/ d:d;dFd%e1dGdHe0dIe- dJe/ d:d;dKd%e1dLdMe0dNe- dOe+ d:d;dPd%e1dQdRe0dSe- dTe+ d:d;dUd%e1dVdWe+ d;dXd%e1dYdZe.d[e/ d#d\d;d]d%e1dddee.dfe/ d#d\d;dgd%e1d^d_e- d;dhd%e4e1dadbe- d#d(e;j@dce1d!d"e3 d#d$d%e1d&d'e3 d#d(d%e1d)d*e4 d#d+d%e1d,d-e/ d#d.d%e1d/d0e/ d#d1d%e1d6d7e0d8e- d9e/ d:d;d<d%e1d=d>e0d?e- d@e/ d:d;dAd%e1dBdCe0dDe- dEe/ d:d;dFd%e1dGdHe0dIe- dJe/ d:d;dKd%e1dLdMe0dNe- dOe+ d:d;dPd%e1dQdRe0dSe- dTe+ d:d;dUd%e1dVdWe+ d;dXd%e1dYdZe.d[e/ d#d\d;d]d%e1dddee.dfe/ d#d\d;dgd%e1d^d_e- d;dhd%e1didje/ d;dkd%e1dldme3 d;dnd%e1dodpe/ d;dqd%e1drdse/ d;dtd%duZAdve8dw< dd}d~ZBG dd de(ZCe&e1d de- d#de1dde/ d#de1ddeAd d#de&e1d de- d#de1dde/ d;de1dde/ d;de1dde/ d;de1ddeAd d#de&e1d de- d#de1dde/ d;de1dde/ d;de1dde/ d;de1ddeAd d#dduZDdd eDE D ZFdddZGG dd de(ZHe4e1dde, d#de1dde, d;de1dde+ d;de1dde+ d;dZIG dd de(ZJG dd dZKdddZLe&e1dde3 d#d$de1dde/ d#de1dde- d#de1dde/ d#de1dde- d;de1dde- d;de1dde- d;de1dde/ d;de1dde/ d;de1dde/ d;de1dde.deId#dd;de1ddWe+ d;de&e1dde3 d#d$de1dde/ d#de1dde- d#de1ddbe- d#e<j@de1dde/ d#d de1dde/ d#d de1dde/ d#de1dde- d#de1dde- d#de1dde- d#de1dde/ d#de1dde/ d#de1dde/ d#de1dde.deId#dd;de1ddWe+ d;de&e1dde3 d#d$de1dde/ d#de1dde- d#de1ddbe- d#e<j@de1dde/ d#d de1dde/ d#d de1dde/ d#de1dde- d#de1dde- d#de1dde- d#de1dde/ d#de1dde/ d#de1dde/ d#de1dde.deId#dd;de1ddWe+ d;de1ddje/ d;dduZMde8d< dd eME D ZNe&e1dd"e3 e1dde- ZOG dd˄ de(ZPedQd̍ZQde8d< eR ZSdddՄZTdddڄZUdddބZVG dd deZWG dd deWZXG dd deWZYdddZZG dd deZ[G dd de[Z\G dd de[Z]dddZ^dS )    )annotationsN)ABCabstractmethod)Iterator)copy)Enum)TracebackType)AnyLiteral)LRUCache)to_json)AVRO_CODEC_KEYAvroCompressionCodec)AvroFileAvroOutputFile)to_bytes)ValidationError)FileIO	InputFile
OutputFile)PartitionSpec)Schema)RecordTableVersion)

BinaryTypeBooleanTypeIntegerTypeListTypeLongTypeMapTypeNestedFieldPrimitiveType
StringType
StructTypei      z
Literal[2]DEFAULT_READ_VERSIONc                   @  s0   e Zd ZdZdZdZdddZedd	d
ZdS )DataFileContentr      r%   returnstrc                 C     d| j  S )z>Return the string representation of the DataFileContent class.zDataFileContent.nameself r0   F/home/ubuntu/.local/lib/python3.10/site-packages/pyiceberg/manifest.py__repr__C      zDataFileContent.__repr__content_typec                 C  s0   t jt jt jd}| |vrtd|  ||  S )a  Convert REST API content type string to DataFileContent.

        Args:
            content_type: REST API content type.

        Returns:
            The corresponding DataFileContent enum value.

        Raises:
            ValueError: If the content type is unknown.
        )datazposition-deleteszequality-deleteszInvalid file content value: )r'   DATAPOSITION_DELETESEQUALITY_DELETES
ValueError)r4   mappingr0   r0   r1   from_rest_typeG   s   zDataFileContent.from_rest_typeNr)   r*   )r4   r*   r)   r'   )	__name__
__module____qualname__r6   r7   r8   r2   staticmethodr;   r0   r0   r0   r1   r'   >   s    
r'   c                   @  s   e Zd ZdZdZdddZdS )	ManifestContentr   r(   r)   r*   c                 C  r+   )z>Return the string representation of the ManifestContent class.zManifestContent.r,   r.   r0   r0   r1   r2   b   r3   zManifestContent.__repr__Nr<   )r=   r>   r?   r6   DELETESr2   r0   r0   r0   r1   rA   ^   s    rA   c                   @  s"   e Zd ZdZdZdZd	ddZdS )
ManifestEntryStatusr   r(   r%   r)   r*   c                 C  r+   )zBReturn the string representation of the ManifestEntryStatus class.zManifestEntryStatus.r,   r.   r0   r0   r1   r2   l   r3   zManifestEntryStatus.__repr__Nr<   )r=   r>   r?   EXISTINGADDEDDELETEDr2   r0   r0   r0   r1   rC   g   s
    rC   c                   @  s4   e Zd ZdZdZdZdZedd	d
ZdddZ	dS )
FileFormatAVROPARQUETORCPUFFINvalueobjectr)   
None | strc                 C  s(   | D ]}|j t| kr|  S qd S N)rL   r*   upper)clsrL   memberr0   r0   r1   	_missing_w   s
   zFileFormat._missing_r*   c                 C  r+   )z9Return the string representation of the FileFormat class.zFileFormat.r,   r.   r0   r0   r1   r2   ~   r3   zFileFormat.__repr__N)rL   rM   r)   rN   r<   )
r=   r>   r?   rH   rI   rJ   rK   classmethodrS   r2   r0   r0   r0   r1   rG   q   s    rG   d   	file_pathTzLocation URI with FS schemefield_idr-   
field_typerequireddoce   file_formatz'File format name: avro, orc, or parquetf   	partition8Partition data tuple, schema based on the partition specg   record_countzNumber of records in the fileh   file_size_in_byteszTotal file size in bytesi   block_size_in_bytesz=Deprecated. Always write a default in v1. Do not write in v2.)rX   r-   rY   rZ   r[   write_defaultl   column_sizesu   v   )key_idkey_typevalue_id
value_typeFz&Map of column id to total size on diskm   value_countsw   x   z7Map of column id to total count, including null and NaNn   null_value_countsy   z   z$Map of column id to null value count   nan_value_counts      z6Map of column id to number of NaN values in the column}   lower_bounds~      zMap of column id to lower bound   upper_bounds      zMap of column id to upper bound   key_metadatazEncryption key metadata blob   split_offsets   )
element_idelement_typeelement_requiredzSplittable offsets   sort_order_idzSort order ID   content)rX   r-   rY   rZ   r[   initial_default   equality_ids   zBField ids used to determine row equality in equality delete files.z(ID representing sort order for this file   first_row_idz/The _row_id for the first row in the data file.   referenced_data_filezWFully qualified location (URI with FS scheme) of a data file that all deletes reference   content_offsetz0The offset in the file where the content starts.   content_size_in_bytesz\The length of a referenced content stored in the file; required if content_offset is present)r(   r%      zdict[int, StructType]DATA_FILE_TYPEpartition_typer#   format_versionr   r)   c                   s0   t dd | jD   t  fddt| jD  S )Nc                 S  s$   g | ]}t |j|j|j|jd qS ))rX   r-   rY   rZ   )r    rX   r-   rY   rZ   .0fieldr0   r0   r1   
<listcomp>  s    z,data_file_with_partition.<locals>.<listcomp>c              	     s,   g | ]}|j d krtd d dddn|qS )r^   r_   Tr`   rW   rX   r    r   data_file_partition_typer0   r1   r     s    

)r#   fieldsr   )r   r   r0   r   r1   data_file_with_partition  s   

r   c                      sP  e Zd ZU eefdA fddZedBd	d
ZedCddZedDddZ	edEddZ
edFddZedFddZedGddZedGddZedGddZedGd d!ZedHd#d$ZedHd%d&ZedId(d)ZedJd+d,ZedJd-d.ZedKd0d1Zded2< edFd3d4ZejdLd7d4ZdM fd9d:ZdFd;d<ZdNd?d@Z  ZS )ODataFile_table_format_versionr   	argumentsr	   r)   c                   s   t | }t j|fi |S rO   )r   super_bind)rQ   r   r   struct	__class__r0   r1   	from_args  s   zDataFile.from_argsr'   c                 C  
   | j d S Nr   _datar.   r0   r0   r1   r        
zDataFile.contentr*   c                 C  r   Nr(   r   r.   r0   r0   r1   rV     r   zDataFile.file_pathrG   c                 C  r   Nr%   r   r.   r0   r0   r1   r]     r   zDataFile.file_formatr   c                 C  r   Nr   r   r.   r0   r0   r1   r_     r   zDataFile.partitionintc                 C  r   N   r   r.   r0   r0   r1   rb     r   zDataFile.record_countc                 C  r   N   r   r.   r0   r0   r1   rd     r   zDataFile.file_size_in_bytesdict[int, int]c                 C  r   N   r   r.   r0   r0   r1   ri     r   zDataFile.column_sizesc                 C  r   N   r   r.   r0   r0   r1   rq     r   zDataFile.value_countsc                 C  r   N   r   r.   r0   r0   r1   ru     r   zDataFile.null_value_countsc                 C  r   N	   r   r.   r0   r0   r1   ry     r   zDataFile.nan_value_countsdict[int, bytes]c                 C  r   N
   r   r.   r0   r0   r1   r}     r   zDataFile.lower_boundsc                 C  r   N   r   r.   r0   r0   r1   r     r   zDataFile.upper_boundsbytes | Nonec                 C  r   N   r   r.   r0   r0   r1   r     r   zDataFile.key_metadatalist[int] | Nonec                 C  r   N   r   r.   r0   r0   r1   r   
  r   zDataFile.split_offsetsc                 C  r   N   r   r.   r0   r0   r1   r     r   zDataFile.equality_ids
int | Nonec                 C  r   )N   r   r.   r0   r0   r1   r     r   zDataFile.sort_order_id_spec_idc                 C  s   | j S rO   r   r.   r0   r0   r1   spec_id  s   zDataFile.spec_idrL   Nonec                 C  s
   || _ d S rO   r   r/   rL   r0   r0   r1   r     r   r-   c                   s"   |dkrt | }t || dS )z!Assign a key/value to a DataFile.r]   N)rG   r   __setattr__)r/   r-   rL   r   r0   r1   r   !  s   zDataFile.__setattr__c                 C  
   t | jS )z!Return the hash of the file path.)hashrV   r.   r0   r0   r1   __hash__(  r   zDataFile.__hash__otherboolc                 C     t |tr| j|jkS dS )zxCompare the datafile with another object.

        If it is a datafile, it will compare based on the file_path.
        F)
isinstancer   rV   r/   r   r0   r0   r1   __eq__,  s   zDataFile.__eq__)r   r   r   r	   r)   r   )r)   r'   r<   )r)   rG   )r)   r   r)   r   )r)   r   )r)   r   r)   r   )r)   r   r)   r   rL   r   r)   r   )r-   r*   rL   r	   r)   r   r   r	   r)   r   )r=   r>   r?   rT   r&   r   propertyr   rV   r]   r_   rb   rd   ri   rq   ru   ry   r}   r   r   r   r   r   __annotations__r   setterr   r   r   __classcell__r0   r0   r   r1   r     sV   
 
r   statusrZ   r(   snapshot_id	data_filer   sequence_numberr   file_sequence_numberc                 C     i | ]	\}}||  qS r0   	as_structr   r   schemar0   r0   r1   
<dictcomp>J      r   r   c                   s   t  fddt|  jD  S )Nc                   s*   g | ]}|j d krtd d ddn|qS )r%   r   Tr   r   r   r   r0   r1   r   O  s    z8manifest_entry_schema_with_data_file.<locals>.<listcomp>)r   MANIFEST_ENTRY_SCHEMASr   r   r   r0   r   r1   $manifest_entry_schema_with_data_fileM  s
   
r   c                      s   e Zd Zeefd fddZedd	d
Zejddd
Zed ddZ	e	jd!ddZ	ed ddZ
e
jd!ddZ
ed ddZejd!ddZed"ddZejd#ddZ  ZS )$ManifestEntryr   r   r   r	   r)   c                      t  jdi |dt| iS Nr   r0   )r   r   MANIFEST_ENTRY_SCHEMAS_STRUCTrQ   r   r   r   r0   r1   r   W     zManifestEntry.from_argsrC   c                 C  r   r   r   r.   r0   r0   r1   r   [  r   zManifestEntry.statusrL   r   c                 C     || j d< d S r   r   r   r0   r0   r1   r   _     r   c                 C  r   r   r   r.   r0   r0   r1   r   c  r   zManifestEntry.snapshot_idr   c                 C  r  r   r   r   r0   r0   r1   r   g  r  c                 C  r   r   r   r.   r0   r0   r1   r   k  r   zManifestEntry.sequence_numberc                 C  r  r   r   r   r0   r0   r1   r   o  r  c                 C  r   r   r   r.   r0   r0   r1   r   s  r   z"ManifestEntry.file_sequence_numberc                 C  r  r   r   r   r0   r0   r1   r   w  r  r   c                 C  r   r   r   r.   r0   r0   r1   r   {  r   zManifestEntry.data_filec                 C  r  r   r   r   r0   r0   r1   r     r  )r   r   r   r	   r)   r   )r)   rC   )rL   rC   r)   r   r   r   )r)   r   )rL   r   r)   r   )r=   r>   r?   rT   r&   r   r   r   r   r   r   r   r   r   r0   r0   r   r1   r   V  s.    r   i  contains_nulli  contains_nani  lower_boundi  upper_boundc                      sZ   e Zd Zed fddZedddZedd
dZedddZedddZ	  Z
S )PartitionFieldSummaryr   r	   r)   c                   s   t  jdi |dtiS r  )r   r   PARTITION_FIELD_SUMMARY_TYPE)rQ   r   r   r0   r1   r        zPartitionFieldSummary.from_argsr   c                 C  r   r   r   r.   r0   r0   r1   r    r   z#PartitionFieldSummary.contains_nullbool | Nonec                 C  r   r   r   r.   r0   r0   r1   r	    r   z"PartitionFieldSummary.contains_nanr   c                 C  r   r   r   r.   r0   r0   r1   r
    r   z!PartitionFieldSummary.lower_boundc                 C  r   r   r   r.   r0   r0   r1   r    r   z!PartitionFieldSummary.upper_bound)r   r	   r)   r  r)   r   )r)   r  r   )r=   r>   r?   rT   r   r   r  r	  r
  r  r   r0   r0   r   r1   r    s    r  c                   @  sT   e Zd ZU ded< ded< ded< ded< ded< dddZdddZdddZdS )PartitionFieldStatsr!   _typer   _contains_null_contains_nanz
Any | None_min_maxiceberg_typer)   r   c                 C  s"   || _ d| _d| _d | _d | _d S )NF)r  r  r  r  r  )r/   r  r0   r0   r1   __init__  s
   
zPartitionFieldStats.__init__r  c                 C  sD   t | j| j| jd urt| j| jnd | jd urt| j| jS d S rO   )r  r  r  r  r   r  r  r.   r0   r0   r1   
to_summary  s   zPartitionFieldStats.to_summaryrL   r	   c                 C  sj   |d u r	d| _ d S t|trt|rd| _d S | jd u r%|| _|| _d S t| j|| _t	| j|| _d S )NT)
r  r   floatmathisnanr  r  r  maxminr   r0   r0   r1   update  s   



zPartitionFieldStats.updateN)r  r!   r)   r   )r)   r  )rL   r	   r)   r   )r=   r>   r?   r   r  r  r  r0   r0   r0   r1   r    s   
 

r  specr   r   
partitionslist[Record]list[PartitionFieldSummary]c           	      C  s|   dd |  |jD }dd |D }|D ]"}t|D ]\}}t|ts*td| || }|| | qqdd |D S )Nc                 S  s   g | ]}|j qS r0   )rY   r   r0   r0   r1   r     s    z1construct_partition_summaries.<locals>.<listcomp>c                 S  s   g | ]}t |qS r0   )r  )r   rY   r0   r0   r1   r         z7Expected a primitive type for the partition field, got c                 S  s   g | ]}|  qS r0   )r  r   r0   r0   r1   r     r$  )r   r   	enumerater   r!   r9   r  )	r   r   r!  typesfield_statspartition_keysirY   partition_keyr0   r0   r1   construct_partition_summaries  s   
r+  i  manifest_path)rZ   r[   i  manifest_lengthi  partition_spec_idi  added_snapshot_idi  added_files_counti  existing_files_counti  deleted_files_counti   added_rows_counti  existing_rows_counti  deleted_rows_counti    )r   i    )rZ   r   i  i  min_sequence_numberi  zdict[int, Schema]MANIFEST_LIST_FILE_SCHEMASc                 C  r   r0   r   r   r0   r0   r1   r     r   iiposc                      sL  e Zd ZeefdA fddZedBd	d
ZedCddZedCddZ	edDddZ
edCddZejdEddZedCddZejdEddZedFddZedFddZedFd d!ZedFd"d#ZedFd$d%ZedFd&d'ZedFd(d)ZedGd+d,ZedHd.d/ZdId1d2ZdId3d4ZdJdKd:d;ZdLd=d>ZdCd?d@Z  ZS )MManifestFiler   r   r   r	   r)   c                   r  r  )r   r   r9  r  r   r0   r1   r     r  zManifestFile.from_argsr*   c                 C  r   r   r   r.   r0   r0   r1   r,    r   zManifestFile.manifest_pathr   c                 C  r   r   r   r.   r0   r0   r1   r-    r   zManifestFile.manifest_lengthc                 C  r   r   r   r.   r0   r0   r1   r.    r   zManifestFile.partition_spec_idrA   c                 C  r   r   r   r.   r0   r0   r1   r     r   zManifestFile.contentc                 C  r   r   r   r.   r0   r0   r1   r   #  r   zManifestFile.sequence_numberrL   r   c                 C  r  r   r   r   r0   r0   r1   r   '  r  c                 C  r   r   r   r.   r0   r0   r1   r8  +  r   z ManifestFile.min_sequence_numberc                 C  r  r   r   r   r0   r0   r1   r8  /  r  r   c                 C  r   r   r   r.   r0   r0   r1   r/  3  r   zManifestFile.added_snapshot_idc                 C  r   r   r   r.   r0   r0   r1   r0  7  r   zManifestFile.added_files_countc                 C  r   r   r   r.   r0   r0   r1   r1  ;  r   z!ManifestFile.existing_files_countc                 C  r   r   r   r.   r0   r0   r1   r2  ?  r   z ManifestFile.deleted_files_countc                 C  r   r   r   r.   r0   r0   r1   r3  C  r   zManifestFile.added_rows_countc                 C  r   r   r   r.   r0   r0   r1   r4  G  r   z ManifestFile.existing_rows_countc                 C  r   r   r   r.   r0   r0   r1   r5  K  r   zManifestFile.deleted_rows_count"list[PartitionFieldSummary] | Nonec                 C  r   r   r   r.   r0   r0   r1   r!  O  r   zManifestFile.partitionsr   c                 C  r   r   r   r.   r0   r0   r1   r   S  r   zManifestFile.key_metadatar   c                 C     | j d u p	| j dkS r   )r0  r.   r0   r0   r1   has_added_filesW     zManifestFile.has_added_filesc                 C  r=  r   )r1  r.   r0   r0   r1   has_existing_filesZ  r?  zManifestFile.has_existing_filesTior   discard_deletedlist[ManifestEntry]c                   sj   | j}tt |tt ttdttt	dd} fdd|D W  d   S 1 s.w   Y  dS )z
        Read the manifest entries from the manifest file.

        Args:
            io: The FileIO to fetch the file.
            discard_deleted: Filter on live entries.

        Returns:
            An Iterator of manifest entries.
        )r$   r%   )r   r\   r   
read_types
read_enumsc                   s&   g | ]} r|j tjkrt|qS r0   )r   rC   rF   _inherit_from_manifest)r   entryrB  r/   r0   r1   r   o  s    z5ManifestFile.fetch_manifest_entry.<locals>.<listcomp>N)
	new_inputr,  r   r   r   r&   r   rC   rG   r'   )r/   rA  rB  
input_filereaderr0   rI  r1   fetch_manifest_entry]  s   
$z!ManifestFile.fetch_manifest_entryr   c                 C  r   )z?Return the equality of two instances of the ManifestFile class.F)r   r;  r,  r   r0   r0   r1   r   u  r  zManifestFile.__eq__c                 C  r   )z!Return the hash of manifest_path.)r   r,  r.   r0   r0   r1   r   y  r   zManifestFile.__hash__)r   r   r   r	   r)   r;  r<   r   r)   rA   r   r   )r)   r<  r   r  )T)rA  r   rB  r   r)   rC  r   )r=   r>   r?   rT   r&   r   r   r,  r-  r.  r   r   r   r8  r/  r0  r1  r2  r3  r4  r5  r!  r   r>  r@  rM  r   r   r   r0   r0   r   r1   r;    sT    


r;  )maxsizezLRUCache[str, ManifestFile]_manifest_cacherA  r   manifest_listr*   tuple[ManifestFile, ...]c                 C  s   |  |}tt|}g }t( |D ]}|j}|tv r#|t|  q|t|< || qW d   t|S 1 s:w   Y  t|S )u  Read manifests from a manifest list, deduplicating ManifestFile objects via cache.

    Caches individual ManifestFile objects by manifest_path. This is memory-efficient
    because consecutive manifest lists typically share most of their manifests:

        ManifestList1: [ManifestFile1]
        ManifestList2: [ManifestFile1, ManifestFile2]
        ManifestList3: [ManifestFile1, ManifestFile2, ManifestFile3]

    With per-ManifestFile caching, each ManifestFile is stored once and reused.

    Note: The manifest list file is re-read on each call. This is intentional to
    keep the implementation simple and avoid O(N²) memory growth from caching
    overlapping manifest list tuples. Re-reading is cheap since manifest lists
    are small metadata files.

    Args:
        io: FileIO instance for reading the manifest list.
        manifest_list: Path to the manifest list file.

    Returns:
        A tuple of ManifestFile objects.
    N)rJ  listread_manifest_list_manifest_cache_lockr,  rP  appendtuple)rA  rQ  filemanifest_filesresultmanifest_filer,  r0   r0   r1   
_manifests  s   

		r\  rK  r   Iterator[ManifestFile]c                 c  sT    t t | tt ttddtid}|E dH  W d   dS 1 s#w   Y  dS )z
    Read the manifests from the manifest list.

    Args:
        input_file: The input file where the stream can be read from.

    Returns:
        An iterator of ManifestFiles that are part of the list.
    )r$   r6  r7  rD  N)r   r;  r9  r&   r  rA   )rK  rL  r0   r0   r1   rT    s   
"rT  rH  manifestc                 C  sz   | j du r|jdur|j| _ | jdu r"|jdks| jtjkr"|j| _| jdu r6|jdks2| jtjkr6|j| _|j| j_	| S )a  
    Inherits properties from manifest file.

    The properties that will be inherited are:
    - sequence numbers
    - partition spec id.

    More information about inheriting sequence numbers: https://iceberg.apache.org/spec/#sequence-number-inheritance

    Args:
        entry: The manifest entry.
        manifest: The manifest file.

    Returns:
        The manifest entry with properties inherited.
    Nr   )
r   r/  r   r   rC   rE   r   r.  r   r   )rH  r^  r0   r0   r1   rG    s     
rG  c                   @  s&  e Zd ZU ded< ded< ded< ded< d	ed
< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< dJd d!ZdKd"d#ZdLd*d+ZedMd-d.Ze	edNd0d1Z
e	dOd3d4ZdPd6d7ZdQd8d9ZedRd<d=ZdSd?d@ZdTdAdBZdTdCdDZdTdEdFZdTdGdHZdIS )UManifestWriterr   closedr   _specr   _schemar   _output_fileAvroOutputFile[ManifestEntry]_writerr   _snapshot_id_added_files_added_rows_existing_files_existing_rows_deleted_files_deleted_rowsr   _min_sequence_numberr"  _partitionsr   _compressionr   r   output_filer   avro_compressionr)   r   c                 C  sX   d| _ || _|| _|| _|| _d| _d| _d| _d| _d| _	d| _
d | _g | _|| _d S )NFr   )r`  ra  rb  rc  rf  rg  rh  ri  rj  rk  rl  rm  rn  ro  r/   r   r   rp  r   rq  r0   r0   r1   r    s   
zManifestWriter.__init__c                 C  s   |   | _| j  | S )zOpen the writer.)
new_writerre  	__enter__r.   r0   r0   r1   rt    s   

zManifestWriter.__enter__exc_typetype[BaseException] | None	exc_valueBaseException | None	tracebackTracebackType | Nonec                 C  s8   | j | j | j dkrtdd| _| j||| dS )Close the writer.r   z'An empty manifest file has been writtenTN)rg  ri  rk  r9   r`  re  __exit__r/   ru  rw  ry  r0   r0   r1   r|    s   zManifestWriter.__exit__rA   c                 C     d S rO   r0   r.   r0   r0   r1   r   &     zManifestWriter.contentr   c                 C  r~  rO   r0   r.   r0   r0   r1   version)     zManifestWriter.versiondict[str, str]c              
   C  s<   d| j  dt| jjddt| jjdt| jt	| j
iS )Nr   zpartition-speczutf-8zpartition-spec-idformat-version)rb  model_dump_jsonr   ra  r   decoder*   r   r  r   ro  r.   r0   r0   r1   _meta-  s   

zManifestWriter._metar   c                 C  s"   t || j| jd}t||dS )N)r   r   r   )r   ra  r   rb  r   )r/   r   data_file_typer0   r0   r1   _with_partition7  s   zManifestWriter._with_partitionc                 C  s(   t t | j| | j| td| jdS )Nmanifest_entry)rp  file_schemarecord_schemaschema_namemetadata)r   r   rc  r  r  r&   r  r.   r0   r0   r1   rs  =  s   
zManifestWriter.new_writerrH  r   c                 C  r~  rO   r0   r/   rH  r0   r0   r1   prepare_entryF  r  zManifestWriter.prepare_entryr;  c                 C  sh   d| _ | jpt}tj| jjt| jj	| j
j|  t|| j| j| j| j| j| j| jt| j
| j| jddS )zReturn the manifest file.TN)r,  r-  r.  r   r   r8  r/  r0  r1  r2  r3  r4  r5  r!  r   )r`  rm  UNASSIGNED_SEQr;  r   rc  locationlenre  rp  ra  r   r   rf  rg  ri  rk  rh  rj  rl  r+  rb  rn  )r/   r8  r0   r0   r1   to_manifest_fileI  s&   

zManifestWriter.to_manifest_filec                 C  s  | j rtd|jtjkr|  jd7  _|  j|jj7  _n6|jtj	kr5|  j
d7  _
|  j|jj7  _n|jtjkrL|  jd7  _|  j|jj7  _ntd|j | j|jj |jtjksh|jtj	kr||jd ur|| jd u sx|j| jk r||j| _| j| |g | S )Nz*Cannot add entry to closed manifest writerr(   zUnknown entry: )r`  RuntimeErrorr   rC   rE   rg  rh  r   rb   rD   ri  rj  rF   rk  rl  r9   rn  rV  r_   r   rm  re  write_blockr  r  r0   r0   r1   	add_entry`  s&   
zManifestWriter.add_entryc                 C  s2   |  tjtj| j|jtkr|jnd |jd | S )N)r   r   r   r   )	r  r   r   rC   rE   rf  r   r  r   r  r0   r0   r1   add{  s   	zManifestWriter.addc              	   C  s(   |  tjtj| j|j|j|jd | S N)r   r   r   r   r   )	r  r   r   rC   rF   rf  r   r   r   r  r0   r0   r1   delete     	zManifestWriter.deletec              	   C  s(   |  tjtj|j|j|j|jd | S r  )	r  r   r   rC   rD   r   r   r   r   r  r0   r0   r1   existing  r  zManifestWriter.existingN)r   r   r   r   rp  r   r   r   rq  r   r)   r   )r)   r_  ru  rv  rw  rx  ry  rz  r)   r   rN  r)   r   r)   r  )r   r   r)   r   )r)   rd  rH  r   r)   r   )r)   r;  )rH  r   r)   r_  )r=   r>   r?   r   r  rt  r|  r   r   r   r  r  r  rs  r  r  r  r  r  r  r0   r0   r0   r1   r_    sF   
 



	
	



r_  c                      s@   e Zd Zd fddZdddZedddZdddZ  ZS )ManifestWriterV1r   r   r   r   rp  r   r   r   rq  r   c                      t  ||||| d S rO   r   r  rr  r   r0   r1   r       zManifestWriterV1.__init__r)   rA   c                 C     t jS rO   rA   r6   r.   r0   r0   r1   r        zManifestWriterV1.contentr   c                 C     dS r   r0   r.   r0   r0   r1   r    r  zManifestWriterV1.versionrH  r   c                 C  s   |S rO   r0   r  r0   r0   r1   r    r  zManifestWriterV1.prepare_entry
r   r   r   r   rp  r   r   r   rq  r   rN  r  r  )	r=   r>   r?   r  r   r   r  r  r   r0   r0   r   r1   r    s    

r  c                      sR   e Zd Zd fddZdddZedddZed fddZdddZ  Z	S ) ManifestWriterV2r   r   r   r   rp  r   r   r   rq  r   c                   r  rO   r  rr  r   r0   r1   r    r  zManifestWriterV2.__init__r)   rA   c                 C  r  rO   r  r.   r0   r0   r1   r     r  zManifestWriterV2.contentr   c                 C  r  r   r0   r.   r0   r0   r1   r    r  zManifestWriterV2.versionr  c                   s   i t  jddiS )Nr   r5   )r   r  r.   r   r0   r1   r    s
   zManifestWriterV2._metarH  r   c                 C  sH   |j d u r"|jd ur|j| jkrtd|j |jtjkr"td|S )Nz=Found unassigned sequence number for an entry from snapshot: z<Only entries with status ADDED can have null sequence number)r   r   rf  r9   r   rC   rE   r  r0   r0   r1   r    s   
zManifestWriterV2.prepare_entryr  rN  r  r  r  )
r=   r>   r?   r  r   r   r  r  r  r   r0   r0   r   r1   r    s    

r  rp  r   r   rq  r   c                 C  s>   | dkrt |||||S | dkrt|||||S td|  )Nr(   r%   z)Cannot write manifest for table version: )r  r  r9   )r   r   r   rp  r   rq  r0   r0   r1   write_manifest  s
   r  c                   @  st   e Zd ZU ded< ded< ded< ded< d	ed
< ded< d'ddZd(ddZd)ddZed*d!d"Zd+d$d%Z	d&S ),ManifestListWriterr   _format_versionr   rc  r  r  list[ManifestFile]_manifest_filesr   _commit_snapshot_idzAvroOutputFile[ManifestFile]re  r   rp  metadict[str, Any]c                 C  s   || _ || _|| _g | _d S rO   )r  rc  r  r  )r/   r   rp  r  r0   r0   r1   r    s   
zManifestListWriter.__init__r)   c                 C  s4   t t | jtt t| j d| jd| _| j  | S )zOpen the writer for writing.r[  )rp  r  r  r  r  )	r   r;  rc  r9  r&   r  r  re  rt  r.   r0   r0   r1   rt    s   
zManifestListWriter.__enter__ru  rv  rw  rx  ry  rz  r   c                 C  s   | j ||| dS )r{  N)re  r|  r}  r0   r0   r1   r|    s   zManifestListWriter.__exit__r[  r;  c                 C  r~  rO   r0   r/   r[  r0   r0   r1   prepare_manifest  r  z#ManifestListWriter.prepare_manifestrY  c                   s    j  fdd|D   S )Nc                   s   g | ]}  |qS r0   )r  )r   r[  r.   r0   r1   r     s    z4ManifestListWriter.add_manifests.<locals>.<listcomp>)re  r  )r/   rY  r0   r.   r1   add_manifests  s   z ManifestListWriter.add_manifestsN)r   r   rp  r   r  r  )r)   r  r  r[  r;  r)   r;  )rY  r  r)   r  )
r=   r>   r?   r   r  rt  r|  r   r  r  r0   r0   r0   r1   r    s   
 



r  c                      s(   e Zd Zd fd	d
ZdddZ  ZS )ManifestListWriterV1rp  r   r   r   parent_snapshot_idr   compressionr   c                   s:   t  jd|dt|d|d urt|ndddt|id d S )Nr(   snapshot-idparent-snapshot-idnullr  1r   rp  r  )r   r  r*   r   )r/   rp  r   r  r  r   r0   r1   r    s   
zManifestListWriterV1.__init__r[  r;  r)   c                 C  s   |j tjkr
td|S )Nz+Cannot store delete manifests in a v1 table)r   rA   r6   r   r  r0   r0   r1   r  '  s   z%ManifestListWriterV1.prepare_manifest)rp  r   r   r   r  r   r  r   r  )r=   r>   r?   r  r  r   r0   r0   r   r1   r    s    r  c                      s:   e Zd ZU ded< ded< d fddZdddZ  ZS )ManifestListWriterV2r   r  _sequence_numberrp  r   r   r  r   r   r  r   c                   sN   t  jd|dt|d|d urt|nddt|ddt|id || _|| _d S )	Nr%   r  r  r  zsequence-numberr  2r  )r   r  r*   r   r  r  )r/   rp  r   r  r   r  r   r0   r1   r  1  s   
zManifestListWriterV2.__init__r[  r;  r)   c                 C  sp   t |}|jtkr| j|jkrtd| j d|j | j|_|jtkr6| j|jkr2td|j | j|_|S )Nz?Found unassigned sequence number for a manifest from snapshot: z != )r   r   r  r  r/  r9   r  r8  )r/   r[  wrapped_manifest_filer0   r0   r1   r  G  s$   


z%ManifestListWriterV2.prepare_manifest)
rp  r   r   r   r  r   r   r   r  r   r  )r=   r>   r?   r   r  r  r   r0   r0   r   r1   r  -  s
   
 r  r  r   c                 C  sR   | dkrt ||||S | dkr"|d u rtd| t|||||S td|  )Nr(   r%   z+Sequence-number is required for V2 tables: z.Cannot write manifest list for table version: )r  r9   r  )r   rp  r   r  r   rq  r0   r0   r1   write_manifest_list_  s   r  )r   r#   r   r   r)   r#   )r   r   r   r#   r)   r   )r   r   r   r   r!  r"  r)   r#  )rA  r   rQ  r*   r)   rR  )rK  r   r)   r]  )rH  r   r^  r;  r)   r   )r   r   r   r   r   r   rp  r   r   r   rq  r   r)   r_  )r   r   rp  r   r   r   r  r   r   r   rq  r   r)   r  )_
__future__r   r  	threadingabcr   r   collections.abcr   r   enumr   r&  r   typingr	   r
   
cachetoolsr   pydantic_corer   pyiceberg.avro.codecsr   r   pyiceberg.avro.filer   r   pyiceberg.conversionsr   pyiceberg.exceptionsr   pyiceberg.ior   r   r   pyiceberg.partitioningr   pyiceberg.schemar   pyiceberg.typedefr   r   pyiceberg.typesr   r   r   r   r   r   r    r!   r"   r#   r  DEFAULT_BLOCK_SIZEr&   r   INITIAL_SEQUENCE_NUMBERr   r'   rA   rC   r*   rG   r6   r   r   r   r   itemsr  r   r   r  r  r  r+  r9  MANIFEST_LIST_FILE_STRUCTSPOSITIONAL_DELETE_SCHEMAr;  rP  RLockrU  r\  rT  rG  r_  r  r  r  r  r  r  r  r0   r0   r0   r1   <module>   sf  0 	
R_   
2e
	.
$4s

(
' 8
",2