o
    uyi                     @  s
  U d dl mZ d dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z; dZ<dZ=dZ>de?d< d Z@G dd deAeZBG dd deAeZCG dd deAeZDG dd  d eEeZFe;e8d!d"e: d#d$d%e8d&d'e: d#d(d%e8d)d*e; d#d+d%e8d,d-e6 d#d.d%e8d/d0e6 d#d1d%e8d2d3e6 d#d4e=d5e8d6d7e7d8e4 d9e6 d:d;d<d%e8d=d>e7d?e4 d@e6 d:d;dAd%e8dBdCe7dDe4 dEe6 d:d;dFd%e8dGdHe7dIe4 dJe6 d:d;dKd%e8dLdMe7dNe4 dOe2 d:d;dPd%e8dQdRe7dSe4 dTe2 d:d;dUd%e8dVdWe2 d;dXd%e8dYdZe5d[e6 d#d\d;d]d%e8d^d_e4 d;d`d%e;e8dadbe4 d#d(eBjGdce8d!d"e: d#d$d%e8d&d'e: d#d(d%e8d)d*e; d#d+d%e8d,d-e6 d#d.d%e8d/d0e6 d#d1d%e8d6d7e7d8e4 d9e6 d:d;d<d%e8d=d>e7d?e4 d@e6 d:d;dAd%e8dBdCe7dDe4 dEe6 d:d;dFd%e8dGdHe7dIe4 dJe6 d:d;dKd%e8dLdMe7dNe4 dOe2 d:d;dPd%e8dQdRe7dSe4 dTe2 d:d;dUd%e8dVdWe2 d;dXd%e8dYdZe5d[e6 d#d\d;d]d%e8dddee5dfe6 d#d\d;dgd%e8d^d_e4 d;dhd%e;e8dadbe4 d#d(eBjGdce8d!d"e: d#d$d%e8d&d'e: d#d(d%e8d)d*e; d#d+d%e8d,d-e6 d#d.d%e8d/d0e6 d#d1d%e8d6d7e7d8e4 d9e6 d:d;d<d%e8d=d>e7d?e4 d@e6 d:d;dAd%e8dBdCe7dDe4 dEe6 d:d;dFd%e8dGdHe7dIe4 dJe6 d:d;dKd%e8dLdMe7dNe4 dOe2 d:d;dPd%e8dQdRe7dSe4 dTe2 d:d;dUd%e8dVdWe2 d;dXd%e8dYdZe5d[e6 d#d\d;d]d%e8dddee5dfe6 d#d\d;dgd%e8d^d_e4 d;dhd%e8didje6 d;dkd%e8dldme: d;dnd%e8dodpe6 d;dqd%e8drdse6 d;dtd%duZHdve?dw< dd}d~ZIG dd de/ZJe-e8d de4 d#de8dde6 d#de8ddeHd d#de-e8d de4 d#de8dde6 d;de8dde6 d;de8dde6 d;de8ddeHd d#de-e8d de4 d#de8dde6 d;de8dde6 d;de8dde6 d;de8ddeHd d#dduZKdd eKL D ZMdddZNG dd de/ZOe;e8dde3 d#de8dde3 d;de8dde2 d;de8dde2 d;dZPG dd de/ZQG dd dZRdddZSe-e8dde: d#d$de8dde6 d#de8dde4 d#de8dde6 d#de8dde4 d;de8dde4 d;de8dde4 d;de8dde6 d;de8dde6 d;de8dde6 d;de8dde5dePd#dd;de8ddWe2 d;de-e8dde: d#d$de8dde6 d#de8dde4 d#de8ddbe4 d#eCjGde8dde6 d#d de8dde6 d#d de8dde6 d#de8dde4 d#de8dde4 d#de8dde4 d#de8dde6 d#de8dde6 d#de8dde6 d#de8dde5dePd#dd;de8ddWe2 d;de-e8dde: d#d$de8dde6 d#de8dde4 d#de8ddbe4 d#eCjGde8dde6 d#d de8dde6 d#d de8dde6 d#de8dde4 d#de8dde4 d#de8dde4 d#de8dde6 d#de8dde6 d#de8dde6 d#de8dde5dePd#dd;de8ddWe2 d;de8ddje6 d;dduZTde?d< dd eTL D ZUe-e8dd"e: e8dde4 ZVG dd˄ de/ZWeedQd̍dd΄ dύdddքZXdddۄZYddd߄ZZG dd deZ[G dd de[Z\G dd de[Z]dddZ^G dd deZ_G dd de_Z`G dd de_ZadddZbdS )    )annotationsN)ABCabstractmethod)copy)Enum)TracebackType)	AnyDictIteratorListLiteralOptionalTupleTypeUnion)LRUCachecachedhashkey)to_json)AVRO_CODEC_KEYAvroCompressionCodec)AvroFileAvroOutputFile)to_bytes)ValidationError)FileIO	InputFile
OutputFile)PartitionSpec)Schema)RecordTableVersion)

BinaryTypeBooleanTypeIntegerTypeListTypeLongTypeMapTypeNestedFieldPrimitiveType
StringType
StructTypei      z
Literal[2]DEFAULT_READ_VERSIONc                   @  "   e Zd ZdZdZdZd	ddZdS )
DataFileContentr      r.   returnstrc                 C     d| j  S )z>Return the string representation of the DataFileContent class.zDataFileContent.nameself r:   U/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/pyiceberg/manifest.py__repr__I      zDataFileContent.__repr__Nr3   r4   )__name__
__module____qualname__DATAPOSITION_DELETESEQUALITY_DELETESr<   r:   r:   r:   r;   r1   D   
    r1   c                   @  s   e Zd ZdZdZdddZdS )	ManifestContentr   r2   r3   r4   c                 C  r5   )z>Return the string representation of the ManifestContent class.zManifestContent.r6   r8   r:   r:   r;   r<   R   r=   zManifestContent.__repr__Nr>   )r?   r@   rA   rB   DELETESr<   r:   r:   r:   r;   rF   N   s    rF   c                   @  r0   )
ManifestEntryStatusr   r2   r.   r3   r4   c                 C  r5   )zBReturn the string representation of the ManifestEntryStatus class.zManifestEntryStatus.r6   r8   r:   r:   r;   r<   \   r=   zManifestEntryStatus.__repr__Nr>   )r?   r@   rA   EXISTINGADDEDDELETEDr<   r:   r:   r:   r;   rH   W   rE   rH   c                   @  s4   e Zd ZdZdZdZdZedd	d
ZdddZ	dS )
FileFormatAVROPARQUETORCPUFFINvalueobjectr3   Union[None, str]c                 C  s(   | D ]}|j t| kr|  S qd S N)rQ   r4   upper)clsrQ   memberr:   r:   r;   	_missing_g   s
   zFileFormat._missing_r4   c                 C  r5   )z9Return the string representation of the FileFormat class.zFileFormat.r6   r8   r:   r:   r;   r<   n   r=   zFileFormat.__repr__N)rQ   rR   r3   rS   r>   )
r?   r@   rA   rM   rN   rO   rP   classmethodrX   r<   r:   r:   r:   r;   rL   a   s    rL   d   	file_pathTzLocation URI with FS schemefield_idr7   
field_typerequireddoce   file_formatz'File format name: avro, orc, or parquetf   	partition8Partition data tuple, schema based on the partition specg   record_countzNumber of records in the fileh   file_size_in_byteszTotal file size in bytesi   block_size_in_bytesz=Deprecated. Always write a default in v1. Do not write in v2.)r]   r7   r^   r_   r`   write_defaultl   column_sizesu   v   )key_idkey_typevalue_id
value_typeFz&Map of column id to total size on diskm   value_countsw   x   z7Map of column id to total count, including null and NaNn   null_value_countsy   z   z$Map of column id to null value count   nan_value_counts      z6Map of column id to number of NaN values in the column}   lower_bounds~      zMap of column id to lower bound   upper_bounds      zMap of column id to upper bound   key_metadatazEncryption key metadata blob   split_offsets   )
element_idelement_typeelement_requiredzSplittable offsets   sort_order_idzSort order ID   content)r]   r7   r^   r_   r`   initial_default   equality_ids   zBField ids used to determine row equality in equality delete files.z(ID representing sort order for this file   first_row_idz/The _row_id for the first row in the data file.   referenced_data_filezWFully qualified location (URI with FS scheme) of a data file that all deletes reference   content_offsetz0The offset in the file where the content starts.   content_size_in_bytesz\The length of a referenced content stored in the file; required if content_offset is present)r2   r.      zDict[int, StructType]DATA_FILE_TYPEpartition_typer,   format_versionr"   r3   c                   s0   t dd | jD   t  fddt| jD  S )Nc                 S  s$   g | ]}t |j|j|j|jd qS ))r]   r7   r^   r_   )r)   r]   r7   r^   r_   .0fieldr:   r:   r;   
<listcomp>  s    z,data_file_with_partition.<locals>.<listcomp>c              	     s,   g | ]}|j d krtd d dddn|qS )rc   rd   Tre   r\   r]   r)   r   data_file_partition_typer:   r;   r     s    

)r,   fieldsr   )r   r   r:   r   r;   data_file_with_partition  s   

r   c                      sP  e Zd ZU eefdA fddZedBd	d
ZedCddZedDddZ	edEddZ
edFddZedFddZedGddZedGddZedGddZedGd d!ZedHd#d$ZedHd%d&ZedId(d)ZedJd+d,ZedJd-d.ZedKd0d1Zded2< edFd3d4ZejdLd7d4ZdM fd9d:ZdFd;d<ZdNd?d@Z  ZS )ODataFile_table_format_versionr"   	argumentsr   r3   c                   s   t | }t j|fi |S rT   )r   super_bind)rV   r   r   struct	__class__r:   r;   	from_args  s   zDataFile.from_argsr1   c                 C  
   | j d S Nr   _datar8   r:   r:   r;   r        
zDataFile.contentr4   c                 C  r   Nr2   r   r8   r:   r:   r;   r[     r   zDataFile.file_pathrL   c                 C  r   Nr.   r   r8   r:   r:   r;   rb     r   zDataFile.file_formatr!   c                 C  r   Nr   r   r8   r:   r:   r;   rd     r   zDataFile.partitionintc                 C  r   N   r   r8   r:   r:   r;   rg     r   zDataFile.record_countc                 C  r   N   r   r8   r:   r:   r;   ri     r   zDataFile.file_size_in_bytesDict[int, int]c                 C  r   N   r   r8   r:   r:   r;   rn     r   zDataFile.column_sizesc                 C  r   N   r   r8   r:   r:   r;   rv     r   zDataFile.value_countsc                 C  r   N   r   r8   r:   r:   r;   rz     r   zDataFile.null_value_countsc                 C  r   N	   r   r8   r:   r:   r;   r~     r   zDataFile.nan_value_countsDict[int, bytes]c                 C  r   N
   r   r8   r:   r:   r;   r     r   zDataFile.lower_boundsc                 C  r   N   r   r8   r:   r:   r;   r     r   zDataFile.upper_boundsOptional[bytes]c                 C  r   N   r   r8   r:   r:   r;   r     r   zDataFile.key_metadataOptional[List[int]]c                 C  r   N   r   r8   r:   r:   r;   r     r   zDataFile.split_offsetsc                 C  r   N   r   r8   r:   r:   r;   r     r   zDataFile.equality_idsOptional[int]c                 C  r   )N   r   r8   r:   r:   r;   r     r   zDataFile.sort_order_id_spec_idc                 C  s   | j S rT   r   r8   r:   r:   r;   spec_id	  s   zDataFile.spec_idrQ   Nonec                 C  s
   || _ d S rT   r   r9   rQ   r:   r:   r;   r     r   r7   c                   s"   |dkrt | }t || dS )z!Assign a key/value to a DataFile.rb   N)rL   r   __setattr__)r9   r7   rQ   r   r:   r;   r     s   zDataFile.__setattr__c                 C  
   t | jS )z!Return the hash of the file path.)hashr[   r8   r:   r:   r;   __hash__  r   zDataFile.__hash__otherboolc                 C     t |tr| j|jkS dS )zxCompare the datafile with another object.

        If it is a datafile, it will compare based on the file_path.
        F)
isinstancer   r[   r9   r   r:   r:   r;   __eq__  s   zDataFile.__eq__)r   r"   r   r   r3   r   )r3   r1   r>   )r3   rL   )r3   r!   r3   r   )r3   r   )r3   r   r3   r   )r3   r   r3   r   rQ   r   r3   r   )r7   r4   rQ   r   r3   r   r   r   r3   r   )r?   r@   rA   rY   r/   r   propertyr   r[   rb   rd   rg   ri   rn   rv   rz   r~   r   r   r   r   r   r   __annotations__r   setterr   r   r   __classcell__r:   r:   r   r;   r     sV   
 
r   statusr_   r2   snapshot_id	data_filer   sequence_numberr   file_sequence_numberc                 C     i | ]	\}}||  qS r:   	as_structr   r   schemar:   r:   r;   
<dictcomp>:      r   r    c                   s   t  fddt|  jD  S )Nc                   s*   g | ]}|j d krtd d ddn|qS )r.   r   Tr   r   r   r   r:   r;   r   ?  s    z8manifest_entry_schema_with_data_file.<locals>.<listcomp>)r    MANIFEST_ENTRY_SCHEMASr   r   r   r:   r  r;   $manifest_entry_schema_with_data_file=  s
   
r  c                      s   e Zd Zeefd fddZedd	d
Zejddd
Zed ddZ	e	jd!ddZ	ed ddZ
e
jd!ddZ
ed ddZejd!ddZed"ddZejd#ddZ  ZS )$ManifestEntryr   r"   r   r   r3   c                      t  jdi |dt| iS Nr   r:   )r   r   MANIFEST_ENTRY_SCHEMAS_STRUCTrV   r   r   r   r:   r;   r   G     zManifestEntry.from_argsrH   c                 C  r   r   r   r8   r:   r:   r;   r   K  r   zManifestEntry.statusrQ   r   c                 C     || j d< d S r   r   r   r:   r:   r;   r   O     r   c                 C  r   r   r   r8   r:   r:   r;   r   S  r   zManifestEntry.snapshot_idr   c                 C  r  r   r   r   r:   r:   r;   r   W  r  c                 C  r   r   r   r8   r:   r:   r;   r   [  r   zManifestEntry.sequence_numberc                 C  r  r   r   r   r:   r:   r;   r   _  r  c                 C  r   r   r   r8   r:   r:   r;   r   c  r   z"ManifestEntry.file_sequence_numberc                 C  r  r   r   r   r:   r:   r;   r   g  r  r   c                 C  r   r   r   r8   r:   r:   r;   r   k  r   zManifestEntry.data_filec                 C  r  r   r   r   r:   r:   r;   r   o  r  )r   r"   r   r   r3   r  )r3   rH   )rQ   rH   r3   r   r   r   )r3   r   )rQ   r   r3   r   )r?   r@   rA   rY   r/   r   r   r   r   r   r   r   r   r   r:   r:   r   r;   r  F  s.    r  i  contains_nulli  contains_nani  lower_boundi  upper_boundc                      sZ   e Zd Zed fddZedddZedd
dZedddZedddZ	  Z
S )PartitionFieldSummaryr   r   r3   c                   s   t  jdi |dtiS r  )r   r   PARTITION_FIELD_SUMMARY_TYPE)rV   r   r   r:   r;   r   }     zPartitionFieldSummary.from_argsr   c                 C  r   r   r   r8   r:   r:   r;   r    r   z#PartitionFieldSummary.contains_nullOptional[bool]c                 C  r   r   r   r8   r:   r:   r;   r    r   z"PartitionFieldSummary.contains_nanr   c                 C  r   r   r   r8   r:   r:   r;   r    r   z!PartitionFieldSummary.lower_boundc                 C  r   r   r   r8   r:   r:   r;   r    r   z!PartitionFieldSummary.upper_bound)r   r   r3   r  r3   r   )r3   r  r   )r?   r@   rA   rY   r   r   r  r  r  r  r   r:   r:   r   r;   r  |  s    r  c                   @  sT   e Zd ZU ded< ded< ded< ded< ded< dddZdddZdddZdS )PartitionFieldStatsr*   _typer   _contains_null_contains_nanzOptional[Any]_min_maxiceberg_typer3   r   c                 C  s"   || _ d| _d| _d | _d | _d S )NF)r  r  r  r  r  )r9   r  r:   r:   r;   __init__  s
   
zPartitionFieldStats.__init__r  c                 C  sD   t | j| j| jd urt| j| jnd | jd urt| j| jS d S rT   )r  r  r  r  r   r  r  r8   r:   r:   r;   
to_summary  s   zPartitionFieldStats.to_summaryrQ   r   c                 C  sj   |d u r	d| _ d S t|trt|rd| _d S | jd u r%|| _|| _d S t| j|| _t	| j|| _d S )NT)
r  r   floatmathisnanr  r  r  maxminr   r:   r:   r;   update  s   



zPartitionFieldStats.updateN)r  r*   r3   r   )r3   r  )rQ   r   r3   r   )r?   r@   rA   r   r  r  r$  r:   r:   r:   r;   r    s   
 

r  specr   r   
partitionsList[Record]List[PartitionFieldSummary]c           	      C  s|   dd |  |jD }dd |D }|D ]"}t|D ]\}}t|ts*td| || }|| | qqdd |D S )Nc                 S  s   g | ]}|j qS r:   )r^   r   r:   r:   r;   r     s    z1construct_partition_summaries.<locals>.<listcomp>c                 S  s   g | ]}t |qS r:   )r  )r   r^   r:   r:   r;   r         z7Expected a primitive type for the partition field, got c                 S  s   g | ]}|  qS r:   )r  r   r:   r:   r;   r     r)  )r   r   	enumerater   r*   
ValueErrorr$  )	r%  r   r&  typesfield_statspartition_keysir^   partition_keyr:   r:   r;   construct_partition_summaries  s   
r1  i  manifest_path)r_   r`   i  manifest_lengthi  partition_spec_idi  added_snapshot_idi  added_files_counti  existing_files_counti  deleted_files_counti   added_rows_counti  existing_rows_counti  deleted_rows_counti    )r   i    )r_   r   i  i  min_sequence_numberi  zDict[int, Schema]MANIFEST_LIST_FILE_SCHEMASc                 C  r   r:   r   r   r:   r:   r;   r     r   iiposc                      sL  e Zd ZeefdA fddZedBd	d
ZedCddZedCddZ	edDddZ
edCddZejdEddZedCddZejdEddZedFddZedFddZedFd d!ZedFd"d#ZedFd$d%ZedFd&d'ZedFd(d)ZedGd+d,ZedHd.d/ZdId1d2ZdId3d4ZdJdKd:d;ZdLd=d>ZdCd?d@Z  ZS )MManifestFiler   r"   r   r   r3   c                   r  r  )r   r   r?  r	  r   r:   r;   r     r
  zManifestFile.from_argsr4   c                 C  r   r   r   r8   r:   r:   r;   r2    r   zManifestFile.manifest_pathr   c                 C  r   r   r   r8   r:   r:   r;   r3    r   zManifestFile.manifest_lengthc                 C  r   r   r   r8   r:   r:   r;   r4    r   zManifestFile.partition_spec_idrF   c                 C  r   r   r   r8   r:   r:   r;   r     r   zManifestFile.contentc                 C  r   r   r   r8   r:   r:   r;   r     r   zManifestFile.sequence_numberrQ   r   c                 C  r  r   r   r   r:   r:   r;   r     r  c                 C  r   r   r   r8   r:   r:   r;   r>    r   z ManifestFile.min_sequence_numberc                 C  r  r   r   r   r:   r:   r;   r>    r  r   c                 C  r   r   r   r8   r:   r:   r;   r5  #  r   zManifestFile.added_snapshot_idc                 C  r   r   r   r8   r:   r:   r;   r6  '  r   zManifestFile.added_files_countc                 C  r   r   r   r8   r:   r:   r;   r7  +  r   z!ManifestFile.existing_files_countc                 C  r   r   r   r8   r:   r:   r;   r8  /  r   z ManifestFile.deleted_files_countc                 C  r   r   r   r8   r:   r:   r;   r9  3  r   zManifestFile.added_rows_countc                 C  r   r   r   r8   r:   r:   r;   r:  7  r   z ManifestFile.existing_rows_countc                 C  r   r   r   r8   r:   r:   r;   r;  ;  r   zManifestFile.deleted_rows_count%Optional[List[PartitionFieldSummary]]c                 C  r   r   r   r8   r:   r:   r;   r&  ?  r   zManifestFile.partitionsr   c                 C  r   r   r   r8   r:   r:   r;   r   C  r   zManifestFile.key_metadatar   c                 C     | j d u p	| j dkS r   )r6  r8   r:   r:   r;   has_added_filesG     zManifestFile.has_added_filesc                 C  rC  r   )r7  r8   r:   r:   r;   has_existing_filesJ  rE  zManifestFile.has_existing_filesTior   discard_deletedList[ManifestEntry]c                   sj   | j}tt |tt ttdttt	dd} fdd|D W  d   S 1 s.w   Y  dS )z
        Read the manifest entries from the manifest file.

        Args:
            io: The FileIO to fetch the file.
            discard_deleted: Filter on live entries.

        Returns:
            An Iterator of manifest entries.
        )r-   r.   )r   ra   r   
read_types
read_enumsc                   s&   g | ]} r|j tjkrt|qS r:   )r   rH   rK   _inherit_from_manifest)r   entryrH  r9   r:   r;   r   _  s    z5ManifestFile.fetch_manifest_entry.<locals>.<listcomp>N)
	new_inputr2  r   r  r  r/   r   rH   rL   r1   )r9   rG  rH  
input_filereaderr:   rO  r;   fetch_manifest_entryM  s   
$z!ManifestFile.fetch_manifest_entryr   c                 C  r   )z?Return the equality of two instances of the ManifestFile class.F)r   rA  r2  r   r:   r:   r;   r   e  r  zManifestFile.__eq__c                 C  r   )z!Return the hash of manifest_path.)r   r2  r8   r:   r:   r;   r   i  r   zManifestFile.__hash__)r   r"   r   r   r3   rA  r>   r   r3   rF   r   r   )r3   rB  r   r  )T)rG  r   rH  r   r3   rI  r   )r?   r@   rA   rY   r/   r   r   r2  r3  r4  r   r   r   r>  r5  r6  r7  r8  r9  r:  r;  r&  r   rD  rF  rS  r   r   r   r:   r:   r   r;   rA    sT    


rA  )maxsizec                 C  s   t |S rT   r   )rG  manifest_listr:   r:   r;   <lambda>n  s    rW  )cachekeyrG  r   rV  r4   Tuple[ManifestFile, ...]c                 C  s   |  |}tt|S )zaRead and cache manifests from the given manifest list, returning a tuple to prevent modification.)rP  tupleread_manifest_list)rG  rV  filer:   r:   r;   
_manifestsn  s   
r^  rQ  r   Iterator[ManifestFile]c                 c  sT    t t | tt ttddtid}|E dH  W d   dS 1 s#w   Y  dS )z
    Read the manifests from the manifest list.

    Args:
        input_file: The input file where the stream can be read from.

    Returns:
        An iterator of ManifestFiles that are part of the list.
    )r-   r<  r=  rJ  N)r   rA  r?  r/   r  rF   )rQ  rR  r:   r:   r;   r\  u  s   
"r\  rN  manifestc                 C  sz   | j du r|jdur|j| _ | jdu r"|jdks| jtjkr"|j| _| jdu r6|jdks2| jtjkr6|j| _|j| j_	| S )a  
    Inherits properties from manifest file.

    The properties that will be inherited are:
    - sequence numbers
    - partition spec id.

    More information about inheriting sequence numbers: https://iceberg.apache.org/spec/#sequence-number-inheritance

    Args:
        entry: The manifest entry.
        manifest: The manifest file.

    Returns:
        The manifest entry with properties inherited.
    Nr   )
r   r5  r   r   rH   rJ   r   r4  r   r   )rN  r`  r:   r:   r;   rM    s     
rM  c                   @  s&  e Zd ZU ded< ded< ded< ded< d	ed
< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< dJd d!ZdKd"d#ZdLd*d+ZedMd-d.Ze	edNd0d1Z
e	dOd3d4ZdPd6d7ZdQd8d9ZedRd<d=ZdSd?d@ZdTdAdBZdTdCdDZdTdEdFZdTdGdHZdIS )UManifestWriterr   closedr   _specr    _schemar   _output_fileAvroOutputFile[ManifestEntry]_writerr   _snapshot_id_added_files_added_rows_existing_files_existing_rows_deleted_files_deleted_rowsr   _min_sequence_numberr'  _partitionsr   _compressionr%  r   output_filer   avro_compressionr3   r   c                 C  sX   d| _ || _|| _|| _|| _d| _d| _d| _d| _d| _	d| _
d | _g | _|| _d S )NFr   )rb  rc  rd  re  rh  ri  rj  rk  rl  rm  rn  ro  rp  rq  r9   r%  r   rr  r   rs  r:   r:   r;   r    s   
zManifestWriter.__init__c                 C  s   |   | _| j  | S )zOpen the writer.)
new_writerrg  	__enter__r8   r:   r:   r;   rv    s   

zManifestWriter.__enter__exc_typeOptional[Type[BaseException]]	exc_valueOptional[BaseException]	tracebackOptional[TracebackType]c                 C  s8   | j | j | j dkrtdd| _| j||| dS )Close the writer.r   z'An empty manifest file has been writtenTN)ri  rk  rm  r+  rb  rg  __exit__r9   rw  ry  r{  r:   r:   r;   r~    s   zManifestWriter.__exit__rF   c                 C     d S rT   r:   r8   r:   r:   r;   r        zManifestWriter.contentr"   c                 C  r  rT   r:   r8   r:   r:   r;   version     zManifestWriter.versionDict[str, str]c              
   C  s<   d| j  dt| jjddt| jjdt| jt	| j
iS )Nr   zpartition-speczutf-8zpartition-spec-idformat-version)rd  model_dump_jsonr   rc  r   decoder4   r   r  r   rq  r8   r:   r:   r;   _meta  s   

zManifestWriter._metar   c                 C  s"   t || j| jd}t||dS )N)r   r   r  )r   rc  r   rd  r  )r9   r   data_file_typer:   r:   r;   _with_partition  s   zManifestWriter._with_partitionc                 C  s(   t t | j| | j| td| jdS )Nmanifest_entry)rr  file_schemarecord_schemaschema_namemetadata)r   r  re  r  r  r/   r  r8   r:   r:   r;   ru    s   
zManifestWriter.new_writerrN  r  c                 C  r  rT   r:   r9   rN  r:   r:   r;   prepare_entry  r  zManifestWriter.prepare_entryrA  c                 C  sh   d| _ | jpt}tj| jjt| jj	| j
j|  t|| j| j| j| j| j| j| jt| j
| j| jddS )zReturn the manifest file.TN)r2  r3  r4  r   r   r>  r5  r6  r7  r8  r9  r:  r;  r&  r   )rb  ro  UNASSIGNED_SEQrA  r   re  locationlenrg  rr  rc  r   r   rh  ri  rk  rm  rj  rl  rn  r1  rd  rp  )r9   r>  r:   r:   r;   to_manifest_file  s&   

zManifestWriter.to_manifest_filec                 C  s  | j rtd|jtjkr|  jd7  _|  j|jj7  _n6|jtj	kr5|  j
d7  _
|  j|jj7  _n|jtjkrL|  jd7  _|  j|jj7  _ntd|j | j|jj |jtjksh|jtj	kr||jd ur|| jd u sx|j| jk r||j| _| j| |g | S )Nz*Cannot add entry to closed manifest writerr2   zUnknown entry: )rb  RuntimeErrorr   rH   rJ   ri  rj  r   rg   rI   rk  rl  rK   rm  rn  r+  rp  appendrd   r   ro  rg  write_blockr  r  r:   r:   r;   	add_entry&  s&   
zManifestWriter.add_entryc                 C  s2   |  tjtj| j|jtkr|jnd |jd | S )N)r   r   r   r   )	r  r  r   rH   rJ   rh  r   r  r   r  r:   r:   r;   addA  s   	zManifestWriter.addc              	   C  s(   |  tjtj| j|j|j|jd | S N)r   r   r   r   r   )	r  r  r   rH   rK   rh  r   r   r   r  r:   r:   r;   deleteM     	zManifestWriter.deletec              	   C  s(   |  tjtj|j|j|j|jd | S r  )	r  r  r   rH   rI   r   r   r   r   r  r:   r:   r;   existingY  r  zManifestWriter.existingN)r%  r   r   r    rr  r   r   r   rs  r   r3   r   )r3   ra  rw  rx  ry  rz  r{  r|  r3   r   rT  r3   r"   r3   r  )r   r"   r3   r    )r3   rf  rN  r  r3   r  )r3   rA  )rN  r  r3   ra  )r?   r@   rA   r   r  rv  r~  r   r   r   r  r  r  ru  r  r  r  r  r  r  r:   r:   r:   r;   ra    sF   
 



	
	



ra  c                      s@   e Zd Zd fddZdddZedddZdddZ  ZS )ManifestWriterV1r%  r   r   r    rr  r   r   r   rs  r   c                      t  ||||| d S rT   r   r  rt  r   r:   r;   r  g     zManifestWriterV1.__init__r3   rF   c                 C     t jS rT   rF   rB   r8   r:   r:   r;   r   q     zManifestWriterV1.contentr"   c                 C     dS r   r:   r8   r:   r:   r;   r  t  r  zManifestWriterV1.versionrN  r  c                 C  s   |S rT   r:   r  r:   r:   r;   r  x  r  zManifestWriterV1.prepare_entry
r%  r   r   r    rr  r   r   r   rs  r   rT  r  r  )	r?   r@   rA   r  r   r   r  r  r   r:   r:   r   r;   r  f  s    

r  c                      sR   e Zd Zd fddZdddZedddZed fddZdddZ  Z	S ) ManifestWriterV2r%  r   r   r    rr  r   r   r   rs  r   c                   r  rT   r  rt  r   r:   r;   r  }  r  zManifestWriterV2.__init__r3   rF   c                 C  r  rT   r  r8   r:   r:   r;   r     r  zManifestWriterV2.contentr"   c                 C  r  r   r:   r8   r:   r:   r;   r    r  zManifestWriterV2.versionr  c                   s   i t  jddiS )Nr   data)r   r  r8   r   r:   r;   r    s
   zManifestWriterV2._metarN  r  c                 C  sH   |j d u r"|jd ur|j| jkrtd|j |jtjkr"td|S )Nz=Found unassigned sequence number for an entry from snapshot: z<Only entries with status ADDED can have null sequence number)r   r   rh  r+  r   rH   rJ   r  r:   r:   r;   r    s   
zManifestWriterV2.prepare_entryr  rT  r  r  r  )
r?   r@   rA   r  r   r   r  r  r  r   r:   r:   r   r;   r  |  s    

r  rr  r   r   rs  r   c                 C  s>   | dkrt |||||S | dkrt|||||S td|  )Nr2   r.   z)Cannot write manifest for table version: )r  r  r+  )r   r%  r   rr  r   rs  r:   r:   r;   write_manifest  s
   r  c                   @  st   e Zd ZU ded< ded< ded< ded< d	ed
< ded< d'ddZd(ddZd)ddZed*d!d"Zd+d$d%Z	d&S ),ManifestListWriterr"   _format_versionr   re  r  r  List[ManifestFile]_manifest_filesr   _commit_snapshot_idzAvroOutputFile[ManifestFile]rg  r   rr  metaDict[str, Any]c                 C  s   || _ || _|| _g | _d S rT   )r  re  r  r  )r9   r   rr  r  r:   r:   r;   r    s   
zManifestListWriter.__init__r3   c                 C  s4   t t | jtt t| j d| jd| _| j  | S )zOpen the writer for writing.manifest_file)rr  r  r  r  r  )	r   rA  re  r?  r/   r  r  rg  rv  r8   r:   r:   r;   rv    s   
zManifestListWriter.__enter__rw  rx  ry  rz  r{  r|  r   c                 C  s   | j ||| dS )r}  N)rg  r~  r  r:   r:   r;   r~    s   zManifestListWriter.__exit__r  rA  c                 C  r  rT   r:   r9   r  r:   r:   r;   prepare_manifest  r  z#ManifestListWriter.prepare_manifestmanifest_filesc                   s    j  fdd|D   S )Nc                   s   g | ]}  |qS r:   )r  )r   r  r8   r:   r;   r     s    z4ManifestListWriter.add_manifests.<locals>.<listcomp>)rg  r  )r9   r  r:   r8   r;   add_manifests  s   z ManifestListWriter.add_manifestsN)r   r"   rr  r   r  r  )r3   r  r  r  rA  r3   rA  )r  r  r3   r  )
r?   r@   rA   r   r  rv  r~  r   r  r  r:   r:   r:   r;   r    s   
 



r  c                      s(   e Zd Zd fd	d
ZdddZ  ZS )ManifestListWriterV1rr  r   r   r   parent_snapshot_idr   compressionr   c                   s:   t  jd|dt|d|d urt|ndddt|id d S )Nr2   snapshot-idparent-snapshot-idnullr  1r   rr  r  )r   r  r4   r   )r9   rr  r   r  r  r   r:   r;   r    s   
zManifestListWriterV1.__init__r  rA  r3   c                 C  s   |j tjkr
td|S )Nz+Cannot store delete manifests in a v1 table)r   rF   rB   r   r  r:   r:   r;   r    s   z%ManifestListWriterV1.prepare_manifest)rr  r   r   r   r  r   r  r   r  )r?   r@   rA   r  r  r   r:   r:   r   r;   r    s    r  c                      s:   e Zd ZU ded< ded< d fddZdddZ  ZS )ManifestListWriterV2r   r  _sequence_numberrr  r   r   r  r   r   r  r   c                   sN   t  jd|dt|d|d urt|nddt|ddt|id || _|| _d S )	Nr.   r  r  r  zsequence-numberr  2r  )r   r  r4   r   r  r  )r9   rr  r   r  r   r  r   r:   r;   r    s   
zManifestListWriterV2.__init__r  rA  r3   c                 C  sp   t |}|jtkr| j|jkrtd| j d|j | j|_|jtkr6| j|jkr2td|j | j|_|S )Nz?Found unassigned sequence number for a manifest from snapshot: z != )r   r   r  r  r5  r+  r  r>  )r9   r  wrapped_manifest_filer:   r:   r;   r    s   


z%ManifestListWriterV2.prepare_manifest)
rr  r   r   r   r  r   r   r   r  r   r  )r?   r@   rA   r   r  r  r   r:   r:   r   r;   r    s
   
 r  r  r   c                 C  sR   | dkrt ||||S | dkr"|d u rtd| t|||||S td|  )Nr2   r.   z+Sequence-number is required for V2 tables: z.Cannot write manifest list for table version: )r  r+  r  )r   rr  r   r  r   rs  r:   r:   r;   write_manifest_list$  s   r  )r   r,   r   r"   r3   r,   )r   r"   r   r,   r3   r    )r%  r   r   r    r&  r'  r3   r(  )rG  r   rV  r4   r3   rZ  )rQ  r   r3   r_  )rN  r  r`  rA  r3   r  )r   r"   r%  r   r   r    rr  r   r   r   rs  r   r3   ra  )r   r"   rr  r   r   r   r  r   r   r   rs  r   r3   r  )c
__future__r   r   abcr   r   r   enumr   r,  r   typingr   r	   r
   r   r   r   r   r   r   
cachetoolsr   r   cachetools.keysr   pydantic_corer   pyiceberg.avro.codecsr   r   pyiceberg.avro.filer   r   pyiceberg.conversionsr   pyiceberg.exceptionsr   pyiceberg.ior   r   r   pyiceberg.partitioningr   pyiceberg.schemar    pyiceberg.typedefr!   r"   pyiceberg.typesr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r  DEFAULT_BLOCK_SIZEr/   r   INITIAL_SEQUENCE_NUMBERr   r1   rF   rH   r4   rL   rB   r   r   r   r  itemsr  r  r  r  r  r  r1  r?  MANIFEST_LIST_FILE_STRUCTSPOSITIONAL_DELETE_SCHEMArA  r^  r\  rM  ra  r  r  r  r  r  r  r  r:   r:   r:   r;   <module>   sb  ,0
	
R_   
2e
	.
$4p

' 8
",1