o
    <it                     @  sV  d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
 d dlmZmZmZmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZmZ d d
lmZmZ d dlmZmZm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1 d dl2m3Z3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9 dZ:dZ;dZ<dZ=dZ>dZ?dZ@dZAdZBdZCdZDdZEdZFd ZGd ZHd ZId ZJdOd$d%ZKdPd(d)ZLdPd*d+ZMdPd,d-ZNdPd.d/ZOG d0d1 d1e/ZPdQd3d4ZQG d5d6 d6ePe/ZRG d7d8 d8ePe/ZSG d9d: d:ePe/ZTeeReSB eTB ed;d<f ZUe.dfdRdIdJZVG dKdL dLe0eU ZWG dMdN dNZXdS )S    )annotationsN)Iterable)copy)	AnnotatedAnyLiteral)Fieldfield_serializerfield_validatormodel_validator)ValidationError)PARTITION_FIELD_ID_STARTPartitionSpecassign_fresh_partition_spec_ids)Schemaassign_fresh_schema_ids)NameMappingparse_mapping_from_json)MAIN_BRANCHSnapshotRefSnapshotRefType)MetadataLogEntrySnapshotSnapshotLogEntry)UNSORTED_SORT_ORDERUNSORTED_SORT_ORDER_ID	SortOrderassign_fresh_sort_order_ids)PartitionStatisticsFileStatisticsFile)
EMPTY_DICTIcebergBaseModelIcebergRootModel
Properties)NestedField
StructTypetransform_dict_value_to_str)Config)datetime_to_milliscurrent-snapshot-idcurrent-schema-idschemasdefault-spec-idpartition-specpartition-specssort-orderslast-partition-idzlast-assigned-field-idrefszspec-idfield-idfields   datadict[str, Any]returnc                 C  s    t | v r| t  dkrd| t < | S )zRun before validation.N)CURRENT_SNAPSHOT_ID)r5    r:   U/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/pyiceberg/table/metadata.pycleanup_snapshot_idH   s   r<   table_metadataTableMetadatac                 C  s4   | j }| jD ]}|j|kr|   S qtd| d)z>Check if the current-schema-id is actually present in schemas.zcurrent-schema-id z can't be found in the schemas)current_schema_idr+   	schema_idr   )r=   r?   schemar:   r:   r;   check_schemasQ   s   

rB   c                 C  s8   | j }| j}|D ]}|j|kr|   S qtd| d)z;Check if the default-spec-id is present in partition-specs.zdefault-spec-id z can't be found)default_spec_idpartition_specsspec_idr   )r=   rC   rD   specr:   r:   r;   check_partition_specs\   s   
rG   c                 C  sH   | j }|tkr"| j}|D ]}|j|kr|   S qtd| d| | S )z=Check if the default_sort_order_id is present in sort-orders.zdefault-sort-order-id z can't be found in )default_sort_order_idr   sort_ordersorder_idr   )r=   rH   rI   
sort_orderr:   r:   r;   check_sort_ordersh   s   
rL   c                 C  s.   | j durt| jvrt| j tjd| jt< | S )zSet the main branch if missing.N)snapshot_idsnapshot_ref_type)current_snapshot_idr   r1   r   r   BRANCHr=   r:   r:   r;   construct_refsv   s   

rR   c                   @  sf  e Zd ZU dZe Zded< 	 edejdZ	ded< 	 edd	d
 dZ
ded< 	 eddZded< 	 eedZded< 	 ededZded< 	 ededZded< 	 ededZded< 	 edddZded< 	 eedZded < 	 ed!ddZded"< 	 eedZd#ed$< 	 ed%edZd&ed'< 	 ed(edZd)ed*< 	 ed+edZd,ed-< 	 ed.edZded/< 	 eedZd0ed1< 	 eedZd2ed3< 	 ed4edZd5ed6< 	 e d d7d8dkd;d<Z!dld?d@Z"dmdCdDZ#dndFdGZ$dodIdJZ%dpdLdMZ&dqdOdPZ'drdsdTdUZ(dtdVdWZ)dudZd[Z*dvd\d]Z+dtd^d_Z,dwdadbZ-dxdedfZ.e/d"dydgdhZ0e/d$dzdidjZ1dS ){TableMetadataCommonFieldszMetadata for an Iceberg table as specified in the Apache Iceberg spec.

    https://iceberg.apache.org/spec/#iceberg-table-spec
    strlocationz
table-uuidaliasdefault_factoryz	uuid.UUID
table_uuidzlast-updated-msc                   C  s   t tj  S N)r(   datetimenow
astimezoner:   r:   r:   r;   <lambda>   s    z"TableMetadataCommonFields.<lambda>intlast_updated_mszlast-column-idrW   last_column_id)rX   zlist[Schema]r+   r*   rW   defaultr?   r.   zlist[PartitionSpec]rD   r,   rC   r0   N
int | Nonelast_partition_iddict[str, str]
propertiesr)   rO   list[Snapshot]	snapshotszsnapshot-logzlist[SnapshotLogEntry]snapshot_logzmetadata-logzlist[MetadataLogEntry]metadata_logr/   zlist[SortOrder]rI   zdefault-sort-order-idrH   zdict[str, SnapshotRef]r1   zlist[StatisticsFile]
statisticszpartition-statisticszlist[PartitionStatisticsFile]partition_statisticsbeforemoder#   r7   c                 C     t |S rZ   )r&   )clsrh   r:   r:   r;   &transform_properties_dict_value_to_str      z@TableMetadataCommonFields.transform_properties_dict_value_to_strrM   Snapshot | Nonec                      t  fdd| jD dS )z Get the snapshot by snapshot_id.c                 3      | ]
}|j  kr|V  qd S rZ   rM   .0snapshotry   r:   r;   	<genexpr>       z;TableMetadataCommonFields.snapshot_by_id.<locals>.<genexpr>N)nextrj   selfrM   r:   ry   r;   snapshot_by_id      z(TableMetadataCommonFields.snapshot_by_idr@   Schema | Nonec                   rw   )zGet the schema by schema_id.c                 3  rx   rZ   r@   r{   rA   r   r:   r;   r}      r~   z9TableMetadataCommonFields.schema_by_id.<locals>.<genexpr>Nr   r+   )r   r@   r:   r   r;   schema_by_id   r   z&TableMetadataCommonFields.schema_by_idr   c                      t  fdd jD S )z!Return the schema for this table.c                 3       | ]}|j  jkr|V  qd S rZ   )r@   r?   r   r   r:   r;   r}          z3TableMetadataCommonFields.schema.<locals>.<genexpr>r   r   r:   r   r;   rA         z TableMetadataCommonFields.schemaNameMapping | Nonec                 C  s   | j d }rt|S dS )z(Return the table's field-id NameMapping.zschema.name-mapping.defaultN)rh   getr   )r   name_mapping_jsonr:   r:   r;   name_mapping   s   z&TableMetadataCommonFields.name_mappingr   c                   r   )z(Return the partition spec of this table.c                 3  r   rZ   )rE   rC   r{   rF   r   r:   r;   r}     r   z1TableMetadataCommonFields.spec.<locals>.<genexpr>)r   rD   r   r:   r   r;   rF     r   zTableMetadataCommonFields.specdict[int, PartitionSpec]c                 C  s   dd | j D S )z-Return a dict the partition specs this table.c                 S  s   i | ]}|j |qS r:   )rE   r   r:   r:   r;   
<dictcomp>      z3TableMetadataCommonFields.specs.<locals>.<dictcomp>)rD   r   r:   r:   r;   specs     zTableMetadataCommonFields.specsspec_idsIterable[int] | Noner%   c           
   	     s   |    |du r  n fdd|D }dd |D }|  }g }t|D ]}|| }||j}|j|}	|t	|j
|j|	dd q&t| S )a  Produce a struct of the combined PartitionSpecs.

        The partition fields should be optional: Partition fields may be added later,
        in which case not all files would have the result field, and it may be null.

        Args:
            spec_ids: Optional iterable of spec IDs to include. When not provided,
                all table specs are used.

        :return: A StructType that represents the combined PartitionSpecs of the table
        Nc                   s   g | ]
}| v r | qS r:   r:   )r{   rE   r   r:   r;   
<listcomp>      z:TableMetadataCommonFields.specs_struct.<locals>.<listcomp>c                 S  s    i | ]}|j D ]}|j|qqS r:   )r3   field_idr{   rF   fieldr:   r:   r;   r     s     z:TableMetadataCommonFields.specs_struct.<locals>.<dictcomp>F)r   nametyperequired)r   valuesrA   sorted	find_type	source_id	transformresult_typeappendr$   r   r   r%   )
r   r   selected_specsstruct_fieldsrA   nested_fieldsr   r   source_typer   r:   r   r;   specs_struct
  s   "z&TableMetadataCommonFields.specs_structc                 C  s,   t  }| |durt  }| |dus
|S )z-Generate a new snapshot-id that's not in use.N)_generate_snapshot_idr   r   r:   r:   r;   new_snapshot_id(  s
   z)TableMetadataCommonFields.new_snapshot_idr   
str | Nonec                 C  s,   |du rt }| j| }r| |jS dS )zUReturn the snapshot referenced by the given name or null if no such reference exists.N)r   r1   r   r   rM   )r   r   refr:   r:   r;   snapshot_by_name0  s
   z*TableMetadataCommonFields.snapshot_by_namec                 C  s   | j dur| | j S dS )zQGet the current snapshot for this table, or None if there is no current snapshot.N)rO   r   r   r:   r:   r;   current_snapshot8  s   
z*TableMetadataCommonFields.current_snapshotc                 C  s   | j dkr
| jd S tS )N   )format_versionlast_sequence_numberINITIAL_SEQUENCE_NUMBERr   r:   r:   r;   next_sequence_number>  s   z.TableMetadataCommonFields.next_sequence_numberr   c                 C  s   |  | jptS )z\Get the current sort order for this table, or UNSORTED_SORT_ORDER if there is no sort order.)sort_order_by_idrH   r   r   r:   r:   r;   rK   A  r   z$TableMetadataCommonFields.sort_ordersort_order_idSortOrder | Nonec                   rw   )z$Get the sort order by sort_order_id.c                 3  rx   rZ   )rJ   )r{   rK   r   r:   r;   r}   G  r~   z=TableMetadataCommonFields.sort_order_by_id.<locals>.<genexpr>N)r   rI   )r   r   r:   r   r;   r   E  r   z*TableMetadataCommonFields.sort_order_by_idc                 C  s   |d u rt  drdS |S )Nzlegacy-current-snapshot-idr8   )r'   get_bool)r   rO   r:   r:   r;   serialize_current_snapshot_idI  s   z7TableMetadataCommonFields.serialize_current_snapshot_idc                 C  s   | j dkrdd |D S |S )Nr   c                 S  s   g | ]
}|j d didqS )sequence_numberN)update)
model_copyrz   r:   r:   r;   r   S  r   zATableMetadataCommonFields.serialize_snapshots.<locals>.<listcomp>)r   )r   rj   r:   r:   r;   serialize_snapshotsO  s   
z-TableMetadataCommonFields.serialize_snapshots)rh   r#   r7   rg   )rM   r_   r7   rv   )r@   r_   r7   r   )r7   r   )r7   r   )r7   r   )r7   r   rZ   )r   r   r7   r%   r7   r_   )r   r   r7   rv   )r7   rv   )r7   r   )r   r_   r7   r   )rO   re   r7   re   )rj   ri   r7   ri   )2__name__
__module____qualname____doc__r   rU   __annotations__uuiduuid4rY   r`   rb   listr+   DEFAULT_SCHEMA_IDr?   rD   INITIAL_SPEC_IDrC   rf   dictrh   rO   rj   rk   rl   rI   r   rH   r1   rm   rn   r
   rt   r   r   rA   r   rF   r   r   r   r   r   r   rK   r   r	   r   r   r:   r:   r:   r;   rS      sz   
 












rS   r_   c                  C  s`   t  } tjtdd t| jdd | jdd ddD ddd	}|dkr*|}|S |d
 }|S )zIGenerate a new Snapshot ID from a UUID.

    Returns: An 64 bit long
    c                 s  s    | ]	\}}||A V  qd S rZ   r:   )r{   lhsrhsr:   r:   r;   r}   ^  s    z(_generate_snapshot_id.<locals>.<genexpr>r         T)strictlittle)	byteordersignedr8   )r   r   r_   
from_bytesbyteszip)rnd_uuidrM   r:   r:   r;   r   W  s   .r   c                   @  s   e Zd ZU dZeddd%ddZed	dd&d
dZeddd%ddZeddd%ddZeddd%ddZ	eddd%ddZ
d'ddZedddZded< 	 eddZded< 	 ed ed!Zd"ed#< d$S )(TableMetadataV1zRepresents version 1 of the Table Metadata.

    More information about the specification:
    https://iceberg.apache.org/spec/#version-1-analytic-data-tables
    ro   rp   r5   r6   r7   c                 C  rr   rZ   r<   rs   r5   r:   r:   r;   r<   u  ru   z#TableMetadataV1.cleanup_snapshot_idafterc                 C     t | S rZ   rR   r   r:   r:   r;   rR   y  ru   zTableMetadataV1.construct_refsc                 C  s0   | d}t|trd|vrd|vrt|d< |S )zSet default values to be compatible with the format v2.

        Args:
            data: The raw arguments when initializing a V1 TableMetadata.

        Returns:
            The TableMetadata with the defaults applied.
        rA   r@   z	schema-id)r   
isinstancer   r   rs   r5   rA   r:   r:   r;   set_v2_compatible_defaults}  s
   

z*TableMetadataV1.set_v2_compatible_defaultsc                 C  s    | ds|d }|g|d< |S )a  Convert the schema into schemas.

        For V1 schemas is optional, and if they aren't set, we'll set them
        in this validator. This was we can always use the schemas when reading
        table metadata, and we don't have to worry if it is a v1 or v2 format.

        Args:
            data: The raw data after validation, meaning that the aliases are applied.

        Returns:
            The TableMetadata with the schemas set, if not provided.
        r+   rA   )r   r   r:   r:   r;   construct_schemas  s   

z!TableMetadataV1.construct_schemasc                 C  s   | ts?| tdur|t }ttt|ig|t< t|t< n!| ddur7|d }ttt|ig|t< t|t< ndddg|t< tdd |t D td d	|t	< |S )
a  Convert the partition_spec into partition_specs.

        For V1 partition_specs is optional, and if they aren't set, we'll set them
        in this validator. This was we can always use the partition_specs when reading
        table metadata, and we don't have to worry if it is a v1 or v2 format.

        Args:
            data: The raw data after validation, meaning that the aliases are applied.

        Returns:
            The TableMetadata with the partition_specs set, if not provided.
        Npartition_specr   r:   )r2   r3   c                 S  s$   g | ]}|t  D ]}|tqqS r:   )FIELDSr   FIELD_IDr   r:   r:   r;   r     s   $ z=TableMetadataV1.construct_partition_specs.<locals>.<listcomp>r   )rd   )
r   PARTITION_SPECSPARTITION_SPECSPEC_IDr   r   DEFAULT_SPEC_IDmaxr   LAST_PARTITION_ID)rs   r5   r3   r:   r:   r;   construct_partition_specs  s   



z)TableMetadataV1.construct_partition_specsc                 C  s"   | ts| dstg|t< |S )aY  Set the sort_orders if not provided.

        For V1 sort_orders is optional, and if they aren't set, we'll set them
        in this validator.

        Args:
            data: The raw data after validation, meaning that the aliases are applied.

        Returns:
            The TableMetadata with the sort_orders set, if not provided.
        rI   )r   SORT_ORDERSr   r   r:   r:   r;   set_sort_orders  s   
zTableMetadataV1.set_sort_ordersTableMetadataV2c                 C  s   t |  }d|d< t|S )Nr4   format-version)r   
model_dumpr   model_validate)r   metadatar:   r:   r;   to_v2  s   
zTableMetadataV1.to_v2r   r   rc   z
Literal[1]r   rA   ra   r   schema_r-   rV   zlist[dict[str, Any]]r   Nr5   r6   r7   r6   )r7   r   )r7   r   )r   r   r   r   r   r<   rR   r   r   r   r   r   r   r   r   r   r   r   r:   r:   r:   r;   r   g  s*   
 "
r   c                   @  s   e Zd ZU dZedddddZed	ddddZed	ddddZed	ddddZed	ddddZ	e
dddZded< 	 e
dedZded< dS )r   a>  Represents version 2 of the Table Metadata.

    This extends Version 1 with row-level deletes, and adds some additional
    information to the schema, such as all the historical schemas, partition-specs,
    sort-orders.

    For more information:
    https://iceberg.apache.org/spec/#version-2-row-level-deletes
    ro   rp   r5   r6   r7   c                 C  rr   rZ   r   r   r:   r:   r;   r<     ru   z#TableMetadataV2.cleanup_snapshot_idr   r>   c                 C  r   rZ   rB   r   r:   r:   r;   rB     ru   zTableMetadataV2.check_schemasc                 C  r   rZ   rG   r   r:   r:   r;   rG     ru   z%TableMetadataV2.check_partition_specsc                 C  r   rZ   rL   r   r:   r:   r;   rL     ru   z!TableMetadataV2.check_sort_ordersc                 C  r   rZ   r   r   r:   r:   r;   rR     ru   zTableMetadataV2.construct_refsr   r4   rc   z
Literal[2]r   last-sequence-numberr_   r   Nr   r7   r>   )r   r   r   r   r   r<   rB   rG   rL   rR   r   r   r   r   r   r:   r:   r:   r;   r     s    
 
r   c                   @  s   e Zd ZU dZeddd*ddZed	dd+ddZed	dd+ddZed	dd+ddZed	dd+ddZ	e
dddZded< 	 e
dedZded< 	 e
dddZded< 	 d,d-d(d)ZdS ).TableMetadataV3a  Represents version 3 of the Table Metadata.

    Version 3 of the Iceberg spec extends data types and existing metadata structures to add new capabilities:

        - New data types: nanosecond timestamp(tz), unknown
        - Default value support for columns
        - Multi-argument transforms for partitioning and sorting
        - Row Lineage tracking
        - Binary deletion vectors

    For more information:
    https://iceberg.apache.org/spec/?column-projection#version-3-extended-types-and-capabilities
    ro   rp   r5   r6   r7   c                 C  rr   rZ   r   r   r:   r:   r;   r<   "  ru   z#TableMetadataV3.cleanup_snapshot_idr   r>   c                 C  r   rZ   r   r   r:   r:   r;   rB   &  ru   zTableMetadataV3.check_schemasc                 C  r   rZ   r   r   r:   r:   r;   rG   *  ru   z%TableMetadataV3.check_partition_specsc                 C  r   rZ   r   r   r:   r:   r;   rL   .  ru   z!TableMetadataV3.check_sort_ordersc                 C  r   rZ   r   r   r:   r:   r;   rR   2  ru   zTableMetadataV3.construct_refsr      rc   z
Literal[3]r   r   r_   r   znext-row-idNre   next_row_idTexclude_noneboolexclude
Any | Noneby_aliaskwargsr   rT   c                 K  s   t d)NzZWriting V3 is not yet supported, see: https://github.com/apache/iceberg-python/issues/1551)NotImplementedError)r   r  r  r  r  r:   r:   r;   model_dump_jsonA  s   zTableMetadataV3.model_dump_jsonr   r   )TNT)
r  r  r  r  r  r  r  r   r7   rT   )r   r   r   r   r   r<   rB   rG   rL   rR   r   r   r   r   r   r  r	  r:   r:   r:   r;   r     s&   
 r   r   )discriminatorrA   r   r   r   rK   r   rU   rT   rh   r#   rY   uuid.UUID | Nonec                 C  s  ddl m} t||j|j}| | t| }t|| |}	t	|| |}
|d u r-t
 }|dkrNt||j|j|dd |	jD |	g|	j|
g|
j||	j|dS |dkrit||g|j|j|	g|	j|
g|
j||	j|dS |d	krt||g|j|j|	g|	j|
g|
j||	j|dS td
| )Nr   )TablePropertiesr   c                 S  s   g | ]}|  qS r:   )r   )r{   r   r:   r:   r;   r   d  r   z&new_table_metadata.<locals>.<listcomp>)rU   rb   r?   rA   r   rD   rC   rI   rH   rh   rf   rY   r4   )rU   r+   rb   r?   rD   rC   rI   rH   rh   rf   rY   r   Unknown format version: )pyiceberg.tabler  r_   popFORMAT_VERSIONDEFAULT_FORMAT_VERSION"check_format_version_compatibilityr   r   r   r   r   r   highest_field_idr@   r3   rE   rJ   last_assigned_field_idr   r   r   )rA   r   rK   rU   rh   rY   r  r   fresh_schemafresh_partition_specfresh_sort_orderr:   r:   r;   new_table_metadataH  sh   
r  c                   @  s   e Zd ZU ded< dS )TableMetadataWrapperr>   rootN)r   r   r   r   r:   r:   r:   r;   r    s   
 r  c                   @  s:   e Zd ZdZedddZedd	d
ZedddZdS )TableMetadataUtilz'Helper class for parsing TableMetadata.r5   rT   r7   r>   c              
   C  s2   zt | jW S  ty } zt||d }~ww rZ   )r  model_validate_jsonr  PydanticValidationErrorr   )r5   er:   r:   r;   	parse_raw  s   
zTableMetadataUtil.parse_rawr6   c                 C  sn   d| vrt d|  | d }|dkrtdi | S |dkr%tdi | S |dkr0tdi | S t d| )Nr   )Missing format-version in TableMetadata: r   r4   r   r  r:   )r   r   r   r   )r5   r   r:   r:   r;   	parse_obj  s   zTableMetadataUtil.parse_objr=   c                 C  s   | j du rtd|  | j dkrtjdi t| S | j dkr*tjdi t| S | j dkr9tjdi t| S td| j  )zConstruct table metadata from an existing table without performing validation.

        This method is useful during a sequence of table updates when the model needs to be
        re-constructed but is not yet ready for validation.
        Nr   r   r4   r   r  r:   )r   r   r   model_constructr   r   r   rQ   r:   r:   r;   _construct_without_validation  s   



z/TableMetadataUtil._construct_without_validationN)r5   rT   r7   r>   )r5   r6   r7   r>   r=   r>   r7   r>   )r   r   r   r   staticmethodr  r!  r#  r:   r:   r:   r;   r    s    r  r   r$  r   )rA   r   r   r   rK   r   rU   rT   rh   r#   rY   r  r7   r>   )Y
__future__r   r[   r   collections.abcr   r   typingr   r   r   pydanticr   r	   r
   r   r   r  pyiceberg.exceptionspyiceberg.partitioningr   r   r   pyiceberg.schemar   r   pyiceberg.table.name_mappingr   r   pyiceberg.table.refsr   r   r   pyiceberg.table.snapshotsr   r   r   pyiceberg.table.sortingr   r   r   r   pyiceberg.table.statisticsr   r   pyiceberg.typedefr    r!   r"   r#   pyiceberg.typesr$   r%   r&   pyiceberg.utils.configr'   pyiceberg.utils.datetimer(   r9   CURRENT_SCHEMA_IDSCHEMASr   r   r   r   r   LAST_ASSIGNED_FIELD_IDREFSr   r   r   r   r   r   SUPPORTED_TABLE_FORMAT_VERSIONr<   rB   rG   rL   rR   rS   r   r   r   r   r>   r  r  r  r:   r:   r:   r;   <module>   sn   

	



 
X (2E