o
    uyi                    @  sD  d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZmZmZ d dlmZ d d	lm Z m!Z!m"Z" d d
l#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z: erd dl;Z<d dl=m>Z> edZ?edZ@d ZAdZBdZCG dd de!ZDG dd dee? eZEG dd dee? eZFG dd dee@e?f eZGG dd deGe@e?f ZHG dd dee@ eZIedd$d%ZJeJKeDdd(d)ZLeJKe3dd,d)ZLeJKe-dd/d)ZLeJKe/dd2d)ZLeJKe1dd5d)ZLG d6d7 d7eEe? eZMed8d8d8d9G d:d; d;ZNedd>d?ZOeOKeDdd@d)ZLeOKe3ddAd)ZLeOKe-ddBd)ZLeOKe/ddCd)ZLeOKe1ddDd)ZLeddFdGZPePKeDddHd)ZLePKe3ddId)ZLePKe-ddJd)ZLePKe/ddKd)ZLePKe1ddLd)ZLG dMdN dNeEeeQe0f  ZRddPdQZSG dRdS dSeEeeQeQf  ZTddUdVZUG dWdX dXeEeeVeQf  ZWddZd[ZXdd]d^ZYeQZZG d_d` d`eEeeZeNf  Z[ddbdcZ\dddfdgZ]G dhdi dieFe+ Z^ddldmZ_ddodpZ`ddqdrZaddtduZbddvdwZcG dxdy dyeEee+  Zdddd}d~ZeG dd deEee+  ZfedddZgegKe,ddd)ZLegKe*ddd)ZLegKe2ddd)ZLegKe$ddd)ZLegKe'ddd)ZLegKe)ddd)ZLegKe9ddd)ZLdddZhG dd deFei ZjdS )    )annotationsN)ABCabstractmethod)	dataclass)cached_propertypartialsingledispatch)TYPE_CHECKINGAnyCallableDictGenericListLiteralOptionalSetTupleTypeVarUnion)FieldPrivateAttrmodel_validatorResolveError)
EMPTY_DICTIcebergBaseModelStructProtocol)
BinaryTypeBooleanTypeDateTypeDecimalType
DoubleType	FixedType	FloatTypeIcebergTypeIntegerTypeListTypeLongTypeMapTypeNestedFieldPrimitiveType
StringType
StructTypeTimestampNanoTypeTimestampTypeTimestamptzNanoTypeTimestamptzTypeTimeTypeUnknownTypeUUIDType)NameMappingTPzfield-idziceberg-field-namec                      s  e Zd ZU dZdZded< eedZded< ede	d	Z
d
ed< ededZded< e Zded< df fddZdgddZdgddZdhddZdid!d"Zed#d$djd%d&Zedkd'd(Zedld*d+Zedmd-d.Zednd/d0Zedod2d3Zedpd5d6Zdqd8d9Zdrd;d<ZdsdtdAdBZ dsdudDdEZ!edhdFdGZ"edvdIdJZ#dwdMdNZ$edxdPdQZ%dydTdUZ&dzdWdXZ'd=dYd{d[d\Z(ed|d^d_Z)d}dadbZ*d~dddeZ+  Z,S )SchemazsA table Schema.

    Example:
        >>> from pyiceberg import schema
        >>> from pyiceberg import types
    structzLiteral['struct']type)default_factoryTuple[NestedField, ...]fieldsz	schema-id)aliasdefaultint	schema_idzidentifier-field-ids)r=   r:   z	List[int]identifier_field_idsDict[str, int]_name_to_idr)   datar
   c                   s,   |r||d< t  jdi | t| | _d S )Nr<    )super__init__index_by_namerC   )selfr<   rD   	__class__rE   S/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/pyiceberg/schema.pyrG   d   s   zSchema.__init__returnstrc                 C  s   dd dd | jD  d S )5Return the string representation of the Schema class.ztable {

c                 S  s   g | ]}d t | qS )z  )rN   .0fieldrE   rE   rL   
<listcomp>l       z"Schema.__str__.<locals>.<listcomp>z
})joincolumnsrI   rE   rE   rL   __str__j   s   zSchema.__str__c                 C  s.   dd dd | jD  d| j d| j dS )rO   zSchema(z, c                 s  s    | ]}t |V  qd S N)repr)rR   columnrE   rE   rL   	<genexpr>p   s    z"Schema.__repr__.<locals>.<genexpr>z, schema_id=z, identifier_field_ids=))rV   rW   r@   rA   rX   rE   rE   rL   __repr__n   s   .zSchema.__repr__c                 C  s
   t | jS )z6Return the length of an instance of the Literal class.)lenr<   rX   rE   rE   rL   __len__r      
zSchema.__len__otherboolc                 C  s^   |sdS t |tsdS t| jt|jkrdS | j|jk}tdd t| j|jD }|o.|S )z9Return the equality of two instances of the Schema class.Fc                 s  s    | ]	\}}||kV  qd S rZ   rE   )rR   lhsrhsrE   rE   rL   r]      s    z Schema.__eq__.<locals>.<genexpr>)
isinstancer7   r`   rW   rA   allzip)rI   rc   identifier_field_ids_is_equalschema_is_equalrE   rE   rL   __eq__v   s   
zSchema.__eq__after)modec                 C  s    | j r| j D ]}| | q| S rZ   )rA   _validate_identifier_fieldrI   field_idrE   rE   rL   check_schema   s   
zSchema.check_schemac                 C     | j S )z A tuple of the top-level fields.)r<   rX   rE   rE   rL   rW      s   zSchema.columnsDict[int, NestedField]c                 C     t | S )zReturn an index of field ID to NestedField instance.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        )index_by_idrX   rE   rE   rL   _lazy_id_to_field      zSchema._lazy_id_to_fieldDict[int, int]c                 C  ru   )zReturns an index of field ID to parent field IDs.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        )_index_parentsrX   rE   rE   rL   _lazy_id_to_parent   rx   zSchema._lazy_id_to_parentc                 C  s   dd | j  D S )zReturn an index of lower-case field names to field IDs.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        c                 S  s   i | ]	\}}|  |qS rE   )lower)rR   namerq   rE   rE   rL   
<dictcomp>       z1Schema._lazy_name_to_id_lower.<locals>.<dictcomp>)rC   itemsrX   rE   rE   rL   _lazy_name_to_id_lower   s   zSchema._lazy_name_to_id_lowerDict[int, str]c                 C  ru   )zReturn an index of field ID to full name.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        )index_name_by_idrX   rE   rE   rL   _lazy_id_to_name   rx   zSchema._lazy_id_to_nameDict[int, Accessor]c                 C  ru   )zReturn an index of field ID to accessor.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        )build_position_accessorsrX   rE   rE   rL   _lazy_id_to_accessor   rx   zSchema._lazy_id_to_accessorr,   c                 C  s
   t | j S )zReturn the schema as a struct.)r,   r<   rX   rE   rE   rL   	as_struct   rb   zSchema.as_struct'pa.Schema'c                 C     ddl m} || S )z%Return the schema as an Arrow schema.r   )schema_to_pyarrow)pyiceberg.io.pyarrowr   )rI   r   rE   rE   rL   as_arrow      zSchema.as_arrowT
name_or_idUnion[str, int]case_sensitivec                 C  st   t |tr|| jvrtd| | j| S |r| j|}n| j| }|du r5td| d| | j| S )a  Find a field using a field name or field ID.

        Args:
            name_or_id (Union[str, int]): Either a field name or a field ID.
            case_sensitive (bool, optional): Whether to perform a case-sensitive lookup using a field name. Defaults to True.

        Raises:
            ValueError: When the value cannot be found.

        Returns:
            NestedField: The matched NestedField.
        zCould not find field with id: NzCould not find field with name , case_sensitive=)rg   r?   rw   
ValueErrorrC   getr   r|   )rI   r   r   rq   rE   rE   rL   
find_field   s   



zSchema.find_fieldr$   c                 C  s,   | j ||d}|std| d| |jS )ad  Find a field type using a field name or field ID.

        Args:
            name_or_id (Union[str, int]): Either a field name or a field ID.
            case_sensitive (bool, optional): Whether to perform a case-sensitive lookup using a field name. Defaults to True.

        Returns:
            NestedField: The type of the matched NestedField.
        )r   r   z%Could not find field with name or id r   )r   r   
field_type)rI   r   r   rS   rE   rE   rL   	find_type   s   
zSchema.find_typec                 C  s   t | j ddS )Nr   )r>   )maxr   keysrX   rE   rE   rL   highest_field_id   s   zSchema.highest_field_idr4   c                 C  r   )Nr   )create_mapping_from_schema)pyiceberg.table.name_mappingr   )rI   r   rE   rE   rL   name_mapping   r   zSchema.name_mapping	column_idOptional[str]c                 C  s   | j |S )zFind a column name given a column ID.

        Args:
            column_id (int): The ID of the column.

        Returns:
            str: The column name (or None if the column ID cannot be found).
        )r   r   )rI   r   rE   rE   rL   find_column_name      	zSchema.find_column_name	List[str]c                 C     t | j S )z
        Return a list of all the column names, including nested fields.

        Excludes short names.

        Returns:
            List[str]: The column names.
        )listr   valuesrX   rE   rE   rL   column_names  s   
zSchema.column_namesrq   Accessorc                 C  s"   || j vrtd| | j | S )a  Find a schema position accessor given a field ID.

        Args:
            field_id (int): The ID of the field.

        Raises:
            ValueError: When the value cannot be found.

        Returns:
            Accessor: An accessor for the given field ID.
        z+Could not find accessor for field with id: )r   r   rp   rE   rE   rL   accessor_for_field  s   

zSchema.accessor_for_fieldSet[str]c                 C  s@   t  }| jD ]}| |}|du rtd| || q|S )zwReturn the names of the identifier fields.

        Returns:
            Set of names of the identifier fields
        Nz%Could not find identifier column id: )setrA   r   r   add)rI   idsrq   column_namerE   rE   rL   identifier_field_names"  s   

zSchema.identifier_field_names)r   namesc             
     sb   z|r fdd|D }n	 fdd|D }W n t y+ } ztd| |d}~ww t |S )a  Return a new schema instance pruned to a subset of columns.

        Args:
            names (List[str]): A list of column names.
            case_sensitive (bool, optional): Whether to perform a case-sensitive lookup for each column name. Defaults to True.

        Returns:
            Schema: A new schema with pruned columns.

        Raises:
            ValueError: If a column is selected that doesn't exist.
        c                   s   h | ]} j | qS rE   )rC   rR   r}   rX   rE   rL   	<setcomp>@      z Schema.select.<locals>.<setcomp>c                   s   h | ]	} j |  qS rE   )r   r|   r   rX   rE   rL   r   B  r   zCould not find column: N)KeyErrorr   prune_columns)rI   r   r   r   erE   rX   rL   select1  s   
zSchema.selectSet[int]c                 C  r   )z%Return the IDs of the current schema.)r   rC   r   rX   rE   rE   rL   	field_idsH  s   zSchema.field_idsNonec                 C  s   |  |}|jjstd| d|jstd| dt|jttfr,td| d| j	|j
}g }|durH|| | j	|}|dus9|rr|  | }|jjs`td|j d| |jsntd|j d| |sJdS dS )	zValidate that the field with the given ID is a valid identifier field.

        Args:
          field_id: The ID of the field to validate.

        Raises:
          ValueError: If the field is not valid.
        zIdentifier field z$ invalid: not a primitive type fieldz invalid: not a required fieldz+ invalid: must not be float or double fieldNzCannot add field z/ as an identifier field: must not be nested in zA as an identifier field: must not be nested in an optional field )r   r   is_primitiver   requiredrg   r!   r#   r{   r   rq   appendpop	is_structr}   )rI   rq   rS   	parent_idr<   parentrE   rE   rL   ro   M  s,   
	
z!Schema._validate_identifier_fieldformat_versionc                 C  sB   | j  D ]}||j k rt|j d|j  d| qdS )zCheck that the schema is compatible for the given table format version.

        Args:
          format_version: The Iceberg table format version.

        Raises:
          ValueError: If the schema is not compatible for the format version.
        z is only supported in z' or higher. Current format version is: N)rw   r   r   minimum_format_versionr   )rI   r   rS   rE   rE   rL   "check_format_version_compatibilityr  s   	z)Schema.check_format_version_compatibility)r<   r)   rD   r
   rM   rN   )rM   r?   )rc   r
   rM   rd   )rM   r7   )rM   r;   )rM   rt   )rM   ry   rM   rB   rM   r   )rM   r   )rM   r,   )rM   r   T)r   r   r   rd   rM   r)   )r   r   r   rd   rM   r$   )rM   r4   )r   r?   rM   r   )rM   r   )rq   r?   rM   r   )rM   r   )r   rN   r   rd   rM   r7   )rM   r   )rq   r?   rM   r   )r   r?   rM   r   )-__name__
__module____qualname____doc__r9   __annotations__r   tupler<   INITIAL_SCHEMA_IDr@   r   rA   r   rC   rG   rY   r_   ra   rl   r   rr   propertyrW   r   rw   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ro   r   __classcell__rE   rE   rJ   rL   r7   U   sX   
 









%r7   c                   @  s   e Zd Zd7ddZd7ddZd8d
dZd8ddZd9ddZd9ddZd:ddZ	d:ddZ
ed;ddZed<d"d#Zed=d%d&Zed>d*d+Zed?d0d1Zed@d4d5Zd6S )ASchemaVisitorrS   r)   rM   r   c                 C     dS zNOverride this method to perform an action immediately before visiting a field.NrE   rI   rS   rE   rE   rL   before_field      zSchemaVisitor.before_fieldc                 C  r   zMOverride this method to perform an action immediately after visiting a field.NrE   r   rE   rE   rL   after_field  r   zSchemaVisitor.after_fieldelementc                 C     |  | dS zcOverride this method to perform an action immediately before visiting an element within a ListType.Nr   rI   r   rE   rE   rL   before_list_element     z!SchemaVisitor.before_list_elementc                 C  r   zbOverride this method to perform an action immediately after visiting an element within a ListType.Nr   r   rE   rE   rL   after_list_element  r   z SchemaVisitor.after_list_elementkeyc                 C  r   z]Override this method to perform an action immediately before visiting a key within a MapType.Nr   rI   r   rE   rE   rL   before_map_key  r   zSchemaVisitor.before_map_keyc                 C  r   z\Override this method to perform an action immediately after visiting a key within a MapType.Nr   r   rE   rE   rL   after_map_key  r   zSchemaVisitor.after_map_keyvaluec                 C  r   z_Override this method to perform an action immediately before visiting a value within a MapType.Nr   rI   r   rE   rE   rL   before_map_value  r   zSchemaVisitor.before_map_valuec                 C  r   z^Override this method to perform an action immediately after visiting a value within a MapType.Nr   r   rE   rE   rL   after_map_value  r   zSchemaVisitor.after_map_valueschemar7   struct_resultr5   c                 C  r   zVisit a Schema.NrE   rI   r   r   rE   rE   rL   r     r   zSchemaVisitor.schemar8   r,   field_resultsList[T]c                 C  r   zVisit a StructType.NrE   rI   r8   r   rE   rE   rL   r8     r   zSchemaVisitor.structfield_resultc                 C  r   zVisit a NestedField.NrE   rI   rS   r   rE   rE   rL   rS     r   zSchemaVisitor.field	list_typer&   element_resultc                 C  r   zVisit a ListType.NrE   rI   r   r   rE   rE   rL   r     r   zSchemaVisitor.listmap_typer(   
key_resultvalue_resultc                 C  r   zVisit a MapType.NrE   rI   r   r   r   rE   rE   rL   map  r   zSchemaVisitor.map	primitiver*   c                 C  r   Visit a PrimitiveType.NrE   rI   r  rE   rE   rL   r    r   zSchemaVisitor.primitiveNrS   r)   rM   r   r   r)   rM   r   )r   r)   rM   r   r   r)   rM   r   )r   r7   r   r5   rM   r5   )r8   r,   r   r   rM   r5   )rS   r)   r   r5   rM   r5   )r   r&   r   r5   rM   r5   )r   r(   r   r5   r   r5   rM   r5   r  r*   rM   r5   r   r   r   r   r   r   r   r   r   r   r   r   r   r8   rS   r   r  r  rE   rE   rE   rL   r     *    







r   c                   @  s`   e Zd Zed$ddZed%ddZed&ddZed'ddZed(ddZed)d!d"Z	d#S )*PreOrderSchemaVisitorr   r7   r   Callable[[], T]rM   r5   c                 C  r   r   rE   r   rE   rE   rL   r     r   zPreOrderSchemaVisitor.schemar8   r,   r   List[Callable[[], T]]c                 C  r   r   rE   r   rE   rE   rL   r8     r   zPreOrderSchemaVisitor.structrS   r)   r   c                 C  r   r   rE   r   rE   rE   rL   rS     r   zPreOrderSchemaVisitor.fieldr   r&   r   c                 C  r   r   rE   r   rE   rE   rL   r     r   zPreOrderSchemaVisitor.listr   r(   r   r   c                 C  r   r  rE   r  rE   rE   rL   r    r   zPreOrderSchemaVisitor.mapr  r*   c                 C  r   r  rE   r  rE   rE   rL   r    r   zPreOrderSchemaVisitor.primitiveN)r   r7   r   r  rM   r5   )r8   r,   r   r  rM   r5   )rS   r)   r   r  rM   r5   )r   r&   r   r  rM   r5   )r   r(   r   r  r   r  rM   r5   r  )
r   r   r   r   r   r8   rS   r   r  r  rE   rE   rE   rL   r    s    r  c                   @  s   e Zd ZdAddZdAd	d
ZdBddZdBddZdCddZdCddZdDddZ	dDddZ
edEd"d#ZedFd)d*ZedGd,d-ZedHd2d3ZedId9d:ZedJd>d?Zd@S )KSchemaWithPartnerVisitorrS   r)   field_partnerOptional[P]rM   r   c                 C  r   r   rE   rI   rS   r  rE   rE   rL   r     r   z%SchemaWithPartnerVisitor.before_fieldc                 C  r   r   rE   r  rE   rE   rL   r     r   z$SchemaWithPartnerVisitor.after_fieldr   element_partnerc                 C     |  || dS r   r   rI   r   r  rE   rE   rL   r        z,SchemaWithPartnerVisitor.before_list_elementc                 C  r  r   r   r  rE   rE   rL   r     r  z+SchemaWithPartnerVisitor.after_list_elementr   key_partnerc                 C  r  r   r   rI   r   r  rE   rE   rL   r     r  z'SchemaWithPartnerVisitor.before_map_keyc                 C  r  r   r   r  rE   rE   rL   r     r  z&SchemaWithPartnerVisitor.after_map_keyr   value_partnerc                 C  r  r   r   rI   r   r  rE   rE   rL   r     r  z)SchemaWithPartnerVisitor.before_map_valuec                 C  r  r   r   r  rE   rE   rL   r     r  z(SchemaWithPartnerVisitor.after_map_valuer   r7   schema_partnerr   r5   c                 C  r   )zVisit a schema with a partner.NrE   )rI   r   r  r   rE   rE   rL   r     r   zSchemaWithPartnerVisitor.schemar8   r,   struct_partnerr   r   c                 C  r   )z#Visit a struct type with a partner.NrE   )rI   r8   r  r   rE   rE   rL   r8     r   zSchemaWithPartnerVisitor.structr   c                 C  r   )z$Visit a nested field with a partner.NrE   )rI   rS   r  r   rE   rE   rL   rS     r   zSchemaWithPartnerVisitor.fieldr   r&   list_partnerr   c                 C  r   )z!Visit a list type with a partner.NrE   )rI   r   r  r   rE   rE   rL   r     r   zSchemaWithPartnerVisitor.listr   r(   map_partnerr   r   c                 C  r   )z Visit a map type with a partner.NrE   )rI   r   r   r   r   rE   rE   rL   r    r   zSchemaWithPartnerVisitor.mapr  r*   primitive_partnerc                 C  r   )z&Visit a primitive type with a partner.NrE   rI   r  r!  rE   rE   rL   r    r   z"SchemaWithPartnerVisitor.primitiveN)rS   r)   r  r  rM   r   )r   r)   r  r  rM   r   )r   r)   r  r  rM   r   )r   r)   r  r  rM   r   )r   r7   r  r  r   r5   rM   r5   )r8   r,   r  r  r   r   rM   r5   )rS   r)   r  r  r   r5   rM   r5   )r   r&   r  r  r   r5   rM   r5   )
r   r(   r   r  r   r5   r   r5   rM   r5   r  r*   r!  r  rM   r5   r  rE   rE   rE   rL   r    r  r  c                   @  s  e Zd ZdOddZedPddZedQddZedRddZedSddZedTddZ	edUd d!Z
edVd$d%ZedWd(d)ZedXd,d-ZedYd0d1ZedZd4d5Zed[d8d9Zed\d<d=Zed]d@dAZed^dDdEZed_dHdIZed`dLdMZdNS )aPrimitiveWithPartnerVisitorr  r*   r!  r  rM   r5   c                 C  s  t |tr| ||S t |tr| ||S t |tr!| ||S t |tr,| ||S t |t	r7| 
||S t |trB| ||S t |trM| ||S t |trX| ||S t |trc| ||S t |trn| ||S t |try| ||S t |tr| ||S t |tr| ||S t |tr| ||S t |tr| ||S t |tr|  ||S t |t!r| "||S t#d| r  zType not recognized: )$rg   r   visit_booleanr%   visit_integerr'   
visit_longr#   visit_floatr!   visit_doubler    visit_decimalr   
visit_dater1   
visit_timer.   visit_timestampr-   visit_timestamp_nsr0   visit_timestamptzr/   visit_timestamptz_nsr+   visit_stringr3   
visit_uuidr"   visit_fixedr   visit_binaryr2   visit_unknownr   r"  rE   rE   rL   r    sF   
















z%PrimitiveWithPartnerVisitor.primitiveboolean_typer   partnerc                 C  r   zVisit a BooleanType.NrE   )rI   r7  r8  rE   rE   rL   r&  4  r   z)PrimitiveWithPartnerVisitor.visit_booleaninteger_typer%   c                 C  r   zVisit a IntegerType.NrE   )rI   r:  r8  rE   rE   rL   r'  8  r   z)PrimitiveWithPartnerVisitor.visit_integer	long_typer'   c                 C  r   zVisit a LongType.NrE   )rI   r<  r8  rE   rE   rL   r(  <  r   z&PrimitiveWithPartnerVisitor.visit_long
float_typer#   c                 C  r   zVisit a FloatType.NrE   )rI   r>  r8  rE   rE   rL   r)  @  r   z'PrimitiveWithPartnerVisitor.visit_floatdouble_typer!   c                 C  r   zVisit a DoubleType.NrE   )rI   r@  r8  rE   rE   rL   r*  D  r   z(PrimitiveWithPartnerVisitor.visit_doubledecimal_typer    c                 C  r   zVisit a DecimalType.NrE   )rI   rB  r8  rE   rE   rL   r+  H  r   z)PrimitiveWithPartnerVisitor.visit_decimal	date_typer   c                 C  r   rC  rE   )rI   rD  r8  rE   rE   rL   r,  L  r   z&PrimitiveWithPartnerVisitor.visit_date	time_typer1   c                 C  r   rC  rE   )rI   rE  r8  rE   rE   rL   r-  P  r   z&PrimitiveWithPartnerVisitor.visit_timetimestamp_typer.   c                 C  r   zVisit a TimestampType.NrE   )rI   rF  r8  rE   rE   rL   r.  T  r   z+PrimitiveWithPartnerVisitor.visit_timestamptimestamp_ns_typer-   c                 C  r   zVisit a TimestampNanoType.NrE   )rI   rH  r8  rE   rE   rL   r/  X  r   z.PrimitiveWithPartnerVisitor.visit_timestamp_nstimestamptz_typer0   c                 C  r   zVisit a TimestamptzType.NrE   )rI   rJ  r8  rE   rE   rL   r0  \  r   z-PrimitiveWithPartnerVisitor.visit_timestamptztimestamptz_ns_typer/   c                 C  r   zVisit a TimestamptzNanoType.NrE   )rI   rL  r8  rE   rE   rL   r1  `  r   z0PrimitiveWithPartnerVisitor.visit_timestamptz_nsstring_typer+   c                 C  r   zVisit a StringType.NrE   )rI   rN  r8  rE   rE   rL   r2  d  r   z(PrimitiveWithPartnerVisitor.visit_string	uuid_typer3   c                 C  r   zVisit a UUIDType.NrE   )rI   rP  r8  rE   rE   rL   r3  h  r   z&PrimitiveWithPartnerVisitor.visit_uuid
fixed_typer"   c                 C  r   zVisit a FixedType.NrE   )rI   rR  r8  rE   rE   rL   r4  l  r   z'PrimitiveWithPartnerVisitor.visit_fixedbinary_typer   c                 C  r   zVisit a BinaryType.NrE   )rI   rT  r8  rE   rE   rL   r5  p  r   z(PrimitiveWithPartnerVisitor.visit_binaryunknown_typer2   c                 C  r   zVisit a UnknownType.NrE   )rI   rV  r8  rE   rE   rL   r6  t  r   z)PrimitiveWithPartnerVisitor.visit_unknownNr#  )r7  r   r8  r  rM   r5   )r:  r%   r8  r  rM   r5   )r<  r'   r8  r  rM   r5   )r>  r#   r8  r  rM   r5   )r@  r!   r8  r  rM   r5   )rB  r    r8  r  rM   r5   )rD  r   r8  r  rM   r5   )rE  r1   r8  r  rM   r5   )rF  r.   r8  r  rM   r5   )rH  r-   r8  r  rM   r5   )rJ  r0   r8  r  rM   r5   )rL  r/   r8  r  rM   r5   )rN  r+   r8  r  rM   r5   )rP  r3   r8  r  rM   r5   )rR  r"   r8  r  rM   r5   )rT  r   r8  r  rM   r5   )rV  r2   r8  r  rM   r5   )r   r   r   r  r   r&  r'  r(  r)  r*  r+  r,  r-  r.  r/  r0  r1  r2  r3  r4  r5  r6  rE   rE   rE   rL   r$    H    
'r$  c                   @  sR   e Zd ZedddZedddZedddZedddZedddZdS )PartnerAccessorr8  r  rM   c                 C  r   )z0Return the equivalent of the schema as a struct.NrE   )rI   r8  rE   rE   rL   r  z  r   zPartnerAccessor.schema_partnerpartner_structrq   r?   
field_namerN   c                 C  r   )zGReturn the equivalent struct field by name or id in the partner struct.NrE   )rI   rZ  rq   r[  rE   rE   rL   r  ~  r   zPartnerAccessor.field_partnerpartner_listc                 C  r   )z7Return the equivalent list element in the partner list.NrE   )rI   r\  rE   rE   rL   list_element_partner  r   z$PartnerAccessor.list_element_partnerpartner_mapc                 C  r   )z1Return the equivalent map key in the partner map.NrE   rI   r^  rE   rE   rL   map_key_partner  r   zPartnerAccessor.map_key_partnerc                 C  r   )z3Return the equivalent map value in the partner map.NrE   r_  rE   rE   rL   map_value_partner  r   z!PartnerAccessor.map_value_partnerN)r8  r  rM   r  )rZ  r  rq   r?   r[  rN   rM   r  )r\  r  rM   r  )r^  r  rM   r  )	r   r   r   r   r  r  r]  r`  ra  rE   rE   rE   rL   rY  y  s    rY  schema_or_typeUnion[Schema, IcebergType]r8  visitorSchemaWithPartnerVisitor[T, P]accessorPartnerAccessor[P]rM   c                 C     t d|  )NzUnsupported type: )r   )rb  r8  rd  rf  rE   rE   rL   visit_with_partner  s   ri  r   SchemaWithPartnerVisitor[P, T]c              	   C  s&   | |}|| |t|  |||S rZ   )r  r   ri  r   )r   r8  rd  rf  r  rE   rE   rL   _  s   
rk  r8   r,   c                 C  s~   g }| j D ]2}|||j|j}||| zt|j|||}||||| W |	|| q|	|| w |
| ||S rZ   )r<   r  rq   r}   r   ri  r   r   rS   r   r8   )r8   r8  rd  rf  r   rS   r  r   rE   rE   rL   rk    s   
r   r&   c              
   C  sZ   | |}|| j| zt| j|||}W || j| n|| j| w || ||S rZ   )r]  r   element_fieldri  element_typer   r   )r   r8  rd  rf  r  r   rE   rE   rL   rk    s   
 r   r(   c              
   C  s   | |}|| j| zt| j|||}W || j| n|| j| w ||}|| j| zt| j	|||}W |
| j| n|
| j| w || |||S rZ   )r`  r   	key_fieldri  key_typer   ra  r   value_field
value_typer   r  )r   r8  rd  rf  r  r   r  r   rE   rE   rL   rk    s   
 
 r  r*   c                 C  s   | | |S rZ   r  )r  r8  rd  rk  rE   rE   rL   rk    s   c                   @  s  e Zd ZdKddZedLd	d
ZedMddZedNddZedOddZedPddZ	edQddZ
edRd!d"ZedSd%d&ZedTd)d*ZedUd-d.ZedVd0d1ZedWd4d5ZedXd8d9ZedYd<d=ZedZd@dAZed[dDdEZed\dHdIZdJS )]SchemaVisitorPerPrimitiveTyper  r*   rM   r5   c                 C  sb  t |tr
| |S t |tr| |S t |tr| |S t |tr(| |S t |t	r2| 
|S t |tr<| |S t |trF| |S t |trP| |S t |trZ| |S t |trd| |S t |trn| |S t |trx| |S t |tr| |S t |tr| |S t |tr| |S t |tr|  |S t |t!r| "|S t#d| r%  )$rg   r"   r4  r    r+  r   r&  r%   r'  r'   r(  r#   r)  r!   r*  r   r,  r1   r-  r.   r.  r-   r/  r0   r0  r/   r1  r+   r2  r3   r3  r   r5  r2   r6  r   r  rE   rE   rL   r    sF   

































z'SchemaVisitorPerPrimitiveType.primitiverR  r"   c                 C  r   rS  rE   )rI   rR  rE   rE   rL   r4    r   z)SchemaVisitorPerPrimitiveType.visit_fixedrB  r    c                 C  r   rC  rE   )rI   rB  rE   rE   rL   r+    r   z+SchemaVisitorPerPrimitiveType.visit_decimalr7  r   c                 C  r   r9  rE   )rI   r7  rE   rE   rL   r&    r   z+SchemaVisitorPerPrimitiveType.visit_booleanr:  r%   c                 C  r   r;  rE   )rI   r:  rE   rE   rL   r'    r   z+SchemaVisitorPerPrimitiveType.visit_integerr<  r'   c                 C  r   r=  rE   )rI   r<  rE   rE   rL   r(    r   z(SchemaVisitorPerPrimitiveType.visit_longr>  r#   c                 C  r   r?  rE   )rI   r>  rE   rE   rL   r)  
  r   z)SchemaVisitorPerPrimitiveType.visit_floatr@  r!   c                 C  r   rA  rE   )rI   r@  rE   rE   rL   r*    r   z*SchemaVisitorPerPrimitiveType.visit_doublerD  r   c                 C  r   rC  rE   )rI   rD  rE   rE   rL   r,    r   z(SchemaVisitorPerPrimitiveType.visit_daterE  r1   c                 C  r   rC  rE   )rI   rE  rE   rE   rL   r-    r   z(SchemaVisitorPerPrimitiveType.visit_timerF  r.   c                 C  r   rG  rE   rI   rF  rE   rE   rL   r.    r   z-SchemaVisitorPerPrimitiveType.visit_timestampr-   c                 C  r   rI  rE   rt  rE   rE   rL   r/    r   z0SchemaVisitorPerPrimitiveType.visit_timestamp_nsrJ  r0   c                 C  r   rK  rE   )rI   rJ  rE   rE   rL   r0  "  r   z/SchemaVisitorPerPrimitiveType.visit_timestamptzrL  r/   c                 C  r   rM  rE   )rI   rL  rE   rE   rL   r1  &  r   z2SchemaVisitorPerPrimitiveType.visit_timestamptz_nsrN  r+   c                 C  r   rO  rE   )rI   rN  rE   rE   rL   r2  *  r   z*SchemaVisitorPerPrimitiveType.visit_stringrP  r3   c                 C  r   rQ  rE   )rI   rP  rE   rE   rL   r3  .  r   z(SchemaVisitorPerPrimitiveType.visit_uuidrT  r   c                 C  r   rU  rE   )rI   rT  rE   rE   rL   r5  2  r   z*SchemaVisitorPerPrimitiveType.visit_binaryrV  r2   c                 C  r   rW  rE   )rI   rV  rE   rE   rL   r6  6  r   z+SchemaVisitorPerPrimitiveType.visit_unknownNr  )rR  r"   rM   r5   )rB  r    rM   r5   )r7  r   rM   r5   )r:  r%   rM   r5   )r<  r'   rM   r5   )r>  r#   rM   r5   )r@  r!   rM   r5   )rD  r   rM   r5   )rE  r1   rM   r5   )rF  r.   rM   r5   )rF  r-   rM   r5   )rJ  r0   rM   r5   )rL  r/   rM   r5   )rN  r+   rM   r5   )rP  r3   rM   r5   )rT  r   rM   r5   )rV  r2   rM   r5   )r   r   r   r  r   r4  r+  r&  r'  r(  r)  r*  r,  r-  r.  r/  r0  r1  r2  r3  r5  r6  rE   rE   rE   rL   rs    rX  rs  T)initeqfrozenc                   @  sD   e Zd ZU dZded< dZded< dd	d
ZdddZdddZdS )r   zVAn accessor for a specific position in a container that implements the StructProtocol.r?   positionNzOptional[Accessor]innerrM   rN   c                 C  s   d| j  d| j dS )7Return the string representation of the Accessor class.zAccessor(position=z,inner=r^   rx  ry  rX   rE   rE   rL   rY   B  s   zAccessor.__str__c                 C  s   |   S )rz  )rY   rX   rE   rE   rL   r_   F  s   zAccessor.__repr__	containerr   r
   c                 C  s2   | j }|| }| }|jr|j}||j  }|js|S )zReturn the value at self.position in `container`.

        Args:
            container (StructProtocol): A container to access at position `self.position`.

        Returns:
            Any: The value at position `self.position` in the container.
        r{  )rI   r|  posvalry  rE   rE   rL   r   J  s   	
zAccessor.getr   )r|  r   rM   r
   )	r   r   r   r   r   ry  rY   r_   r   rE   rE   rE   rL   r   ;  s   
 

r   objSchemaVisitor[T]c                 C  rh  )a  Apply a schema visitor to any point within a schema.

    The function traverses the schema in post-order fashion.

    Args:
        obj (Union[Schema, IcebergType]): An instance of a Schema or an IcebergType.
        visitor (SchemaVisitor[T]): An instance of an implementation of the generic SchemaVisitor base class.

    Raises:
        NotImplementedError: If attempting to visit an unrecognized object type.
    Cannot visit non-type: NotImplementedErrorr  rd  rE   rE   rL   visit]  s   r  c                 C  s   | | t|  |S )z-Visit a Schema with a concrete SchemaVisitor.)r   r  r   r  rE   rE   rL   rk  m     c                 C  sN   g }| j D ]}|| t|j|}|| |||| q|| |S )z1Visit a StructType with a concrete SchemaVisitor.)r<   r   r  r   r   r   rS   r8   )r  rd  resultsrS   resultrE   rE   rL   rk  s  s   


c                 C  s0   | | j t| j|}|| j || |S )z/Visit a ListType with a concrete SchemaVisitor.)r   rl  r  rm  r   r   )r  rd  r  rE   rE   rL   rk    s   c                 C  sV   | | j t| j|}|| j || j t| j|}|| j |	| ||S )z.Visit a MapType with a concrete SchemaVisitor.)
r   rn  r  ro  r   r   rp  rq  r   r  )r  rd  r   r   rE   rE   rL   rk    s   c                 C  
   | | S )z4Visit a PrimitiveType with a concrete SchemaVisitor.rr  r  rE   rE   rL   rk       
PreOrderSchemaVisitor[T]c                 C  rh  )a\  Apply a schema visitor to any point within a schema.

    The function traverses the schema in pre-order fashion. This is a slimmed down version
    compared to the post-order traversal (missing before and after methods), mostly
    because we don't use the pre-order traversal much.

    Args:
        obj (Union[Schema, IcebergType]): An instance of a Schema or an IcebergType.
        visitor (PreOrderSchemaVisitor[T]): An instance of an implementation of the generic PreOrderSchemaVisitor base class.

    Raises:
        NotImplementedError: If attempting to visit an unrecognized object type.
    r  r  r  rE   rE   rL   pre_order_visit  s   r  c                         fddS )z5Visit a Schema with a concrete PreOrderSchemaVisitor.c                     s   t   S rZ   )r  r   rE   r  rE   rL   <lambda>  s    _.<locals>.<lambda>)r   r  rE   r  rL   rk    r  c                   s     |  fdd| jD S )z9Visit a StructType with a concrete PreOrderSchemaVisitor.c                   s   g | ]}t  fd d|qS )c                   s     | t fdd| S )Nc                   s   t | j S rZ   )r  r   rS   rd  rE   rL   r        z0_.<locals>.<listcomp>.<lambda>.<locals>.<lambda>)rS   r   r  r  rE   rL   r    r   z_.<locals>.<listcomp>.<lambda>)r   rQ   r  rE   rL   rT     s    
z_.<locals>.<listcomp>)r8   r<   r  rE   r  rL   rk    s   
c                   r  )z7Visit a ListType with a concrete PreOrderSchemaVisitor.c                        t  jS rZ   )r  rm  rE   r  rE   rL   r    r  r  )r   r  rE   r  rL   rk    r  c                   s"      fdd fddS )z6Visit a MapType with a concrete PreOrderSchemaVisitor.c                     r  rZ   )r  ro  rE   r  rE   rL   r    r  r  c                     r  rZ   )r  rq  rE   r  rE   rL   r    r  )r  r  rE   r  rL   rk    s   "c                 C  r  )z<Visit a PrimitiveType with a concrete PreOrderSchemaVisitor.rr  r  rE   rE   rL   rk    r  c                   @  sV   e Zd ZdZd'ddZd(d
dZd)ddZd*ddZd+ddZd,d d!Z	d-d$d%Z
d&S ).
_IndexByIdz@A schema visitor for generating a field ID to NestedField index.rM   r   c                 C  s
   i | _ d S rZ   _indexrX   rE   rE   rL   rG     s   
z_IndexById.__init__r   r7   r   rt   c                 C  rs   rZ   r  r   rE   rE   rL   r        z_IndexById.schemar8   r,   r   List[Dict[int, NestedField]]c                 C  rs   rZ   r  r   rE   rE   rL   r8     r  z_IndexById.structrS   r)   r   c                 C  s   || j |j< | j S )zAdd the field ID to the index.)r  rq   r   rE   rE   rL   rS     s   z_IndexById.fieldr   r&   r   c                 C  s   |j | j|j j< | jS )z%Add the list element ID to the index.)rl  r  rq   r   rE   rE   rL   r        z_IndexById.listr   r(   r   r   c                 C  s&   |j | j|j j< |j| j|jj< | jS )z=Add the key ID and value ID as individual items in the index.)rn  r  rq   rp  r  rE   rE   rL   r    s   z_IndexById.mapr  r*   c                 C  rs   rZ   r  r  rE   rE   rL   r    r  z_IndexById.primitiveNrM   r   )r   r7   r   rt   rM   rt   )r8   r,   r   r  rM   rt   )rS   r)   r   rt   rM   rt   )r   r&   r   rt   rM   rt   )r   r(   r   rt   r   rt   rM   rt   )r  r*   rM   rt   )r   r   r   r   rG   r   r8   rS   r   r  r  rE   rE   rE   rL   r    s    





r  rt   c                 C     t | t S )zGenerate an index of field IDs to NestedField instances.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[int, NestedField]: An index of field IDs to NestedField instances.
    )r  r  rb  rE   rE   rL   rv     r   rv   c                   @  sf   e Zd Zd*ddZd+ddZd+d	d
Zd,ddZd-ddZd.ddZd/ddZ	d0d#d$Z
d1d'd(Zd)S )2_IndexParentsrM   r   c                 C  s   i | _ g | _d S rZ   )id_to_parentid_stackrX   rE   rE   rL   rG        
z_IndexParents.__init__rS   r)   c                 C  s   | j |j d S rZ   )r  r   rq   r   rE   rE   rL   r        z_IndexParents.before_fieldc                 C  s   | j   d S rZ   )r  r   r   rE   rE   rL   r     s   z_IndexParents.after_fieldr   r7   r   ry   c                 C  rs   rZ   r  r   rE   rE   rL   r     r  z_IndexParents.schemar8   r,   r   List[Dict[int, int]]c                 C  s:   |j D ]}| jr| jd nd }|d ur|| j|j< q| jS N)r<   r  r  rq   )rI   r8   r   rS   r   rE   rE   rL   r8     s   
z_IndexParents.structr   c                 C  rs   rZ   r  r   rE   rE   rL   rS     r  z_IndexParents.fieldr   r&   r   c                 C  s   | j d | j|j< | jS r  )r  r  
element_idr   rE   rE   rL   r      s   z_IndexParents.listr   r(   r   r   c                 C  s*   | j d | j|j< | j d | j|j< | jS r  )r  r  key_idvalue_idr  rE   rE   rL   r  $  s   z_IndexParents.mapr  r*   c                 C  rs   rZ   r  r  rE   rE   rL   r  )  r  z_IndexParents.primitiveNr  r  )r   r7   r   ry   rM   ry   )r8   r,   r   r  rM   ry   )rS   r)   r   ry   rM   ry   )r   r&   r   ry   rM   ry   )r   r(   r   ry   r   ry   rM   ry   )r  r*   rM   ry   )r   r   r   rG   r   r   r   r8   rS   r   r  r  rE   rE   rE   rL   r    s    





	

r  ry   c                 C  r  )zGenerate an index of field IDs to their parent field IDs.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[int, int]: An index of field IDs to their parent field IDs.
    )r  r  r  rE   rE   rL   rz   -  r   rz   c                   @  s   e Zd ZdZd@ddZdAdd	ZdAd
dZdBddZdBddZdCddZ	dCddZ
dDddZdEd d!ZdFd#d$ZdGd(d)ZdHd.d/ZdId4d5ZdJd8d9ZdKd:d;ZdLd=d>Zd?S )M_IndexByNamez?A schema visitor for generating a field name to field ID index.rM   r   c                 C  s"   i | _ i | _i | _g | _g | _d S rZ   )r  _short_name_to_id_combined_index_field_names_short_field_namesrX   rE   rE   rL   rG   <  s
   
z_IndexByName.__init__r   r)   c                 C  s,   t |jts| j|j | j|j d S rZ   rg   r   r,   r  r   r}   r  r   rE   rE   rL   r   C  s   z_IndexByName.before_map_valuec                 C  $   t |jts| j  | j  d S rZ   rg   r   r,   r  r   r  r   rE   rE   rL   r   H     
z_IndexByName.after_map_valuer   c                 C  s,   t |jts| j|j | j|j dS )z@Short field names omit element when the element is a StructType.Nr  r   rE   rE   rL   r   M  s   z _IndexByName.before_list_elementc                 C  r  rZ   r  r   rE   rE   rL   r   S  r  z_IndexByName.after_list_elementrS   c                 C  s    | j |j | j|j dS )zStore the field name.N)r  r   r}   r  r   rE   rE   rL   r   X  s   z_IndexByName.before_fieldc                 C  s   | j   | j  dS )z"Remove the last field name stored.N)r  r   r  r   rE   rE   rL   r   ]  s   
z_IndexByName.after_fieldr   r7   r   rB   c                 C  rs   rZ   r  r   rE   rE   rL   r   b  r  z_IndexByName.schemar8   r,   r   List[Dict[str, int]]c                 C  rs   rZ   r  r   rE   rE   rL   r8   e  r  z_IndexByName.structr   c                 C  s   |  |j|j | jS )z Add the field name to the index.)
_add_fieldr}   rq   r  r   rE   rE   rL   rS   h  r  z_IndexByName.fieldr   r&   r   c                 C  s   |  |jj|jj | jS )z'Add the list element name to the index.)r  rl  r}   rq   r  r   rE   rE   rL   r   m  s   z_IndexByName.listr   r(   r   r   c                 C  s.   |  |jj|jj |  |jj|jj | jS )zAAdd the key name and value name as individual items in the index.)r  rn  r}   rq   rp  r  r  rE   rE   rL   r  r  s   z_IndexByName.mapr}   rN   rq   r?   c                 C  s   |}| j rdd| j |g}|| jv r%td| d| j|  d| || j|< | jr?dd| j|g}|| j|< dS dS )a  Add a field name to the index, mapping its full name to its field ID.

        Args:
            name (str): The field name.
            field_id (int): The field ID.

        Raises:
            ValueError: If the field name is already contained in the index.
        .z)Invalid schema, multiple fields for name z: z and N)r  rV   r  r   r  r  )rI   r}   rq   	full_name
short_namerE   rE   rL   r  x  s   

 
z_IndexByName._add_fieldr  r*   c                 C  rs   rZ   r  r  rE   rE   rL   r    r  z_IndexByName.primitivec                 C  s   | j  }|| j |S )zReturn an index of combined full and short names.

        Note: Only short names that do not conflict with full names are included.
        )r  copyupdater  )rI   combined_indexrE   rE   rL   by_name  s   
z_IndexByName.by_namer   c                 C  s   dd | j  D }|S )z$Return an index of ID to full names.c                 S  s   i | ]\}}||qS rE   rE   )rR   r   r   rE   rE   rL   r~     r   z&_IndexByName.by_id.<locals>.<dictcomp>)r  r   )rI   id_to_full_namerE   rE   rL   by_id  s   z_IndexByName.by_idNr  r
  r	  r  )r   r7   r   rB   rM   rB   )r8   r,   r   r  rM   rB   )rS   r)   r   rB   rM   rB   )r   r&   r   rB   rM   rB   )r   r(   r   rB   r   rB   rM   rB   )r}   rN   rq   r?   rM   r   )r  r*   rM   rB   r   r   )r   r   r   r   rG   r   r   r   r   r   r   r   r8   rS   r   r  r  r  r  r  rE   rE   rE   rL   r  9  s$    














	r  rB   c                 C  s*   t | jdkrt }t| | | S tS )zGenerate an index of field names to field IDs.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[str, int]: An index of field names to field IDs.
    r   )r`   r<   r  r  r  r   rb  indexerrE   rE   rL   rH     s
   	
rH   r   c                 C  s   t  }t| | | S )zGenerate an index of field IDs full field names.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[str, int]: An index of field IDs to full names.
    )r  r  r  r  rE   rE   rL   r     s   	
r   c                   @  sL   e Zd ZdZd$ddZd%ddZd&ddZd'ddZd(ddZd)d!d"Z	d#S )*_BuildPositionAccessorsaP  A schema visitor for generating a field ID to accessor index.

    Example:
        >>> from pyiceberg.schema import Schema
        >>> from pyiceberg.types import *
        >>> schema = Schema(
        ...     NestedField(field_id=2, name="id", field_type=IntegerType(), required=False),
        ...     NestedField(field_id=1, name="data", field_type=StringType(), required=True),
        ...     NestedField(
        ...         field_id=3,
        ...         name="location",
        ...         field_type=StructType(
        ...             NestedField(field_id=5, name="latitude", field_type=FloatType(), required=False),
        ...             NestedField(field_id=6, name="longitude", field_type=FloatType(), required=False),
        ...         ),
        ...         required=True,
        ...     ),
        ...     schema_id=1,
        ...     identifier_field_ids=[1],
        ... )
        >>> result = build_position_accessors(schema)
        >>> expected = {
        ...     2: Accessor(position=0, inner=None),
        ...     1: Accessor(position=1, inner=None),
        ...     5: Accessor(position=2, inner=Accessor(position=0, inner=None)),
        ...     6: Accessor(position=2, inner=Accessor(position=1, inner=None))
        ...     3: Accessor(position=2, inner=None),
        ... }
        >>> result == expected
        True
    r   r7   r   Dict[Position, Accessor]rM   c                 C     |S rZ   rE   r   rE   rE   rL   r        z_BuildPositionAccessors.schemar8   r,   r   List[Dict[Position, Accessor]]c                 C  sX   i }t |jD ]"\}}|| r"||  D ]\}}t||d||< qt|||j< q|S )N)ry  )	enumerater<   r   r   rq   )rI   r8   r   r  rx  rS   inner_field_idaccrE   rE   rL   r8     s   z_BuildPositionAccessors.structrS   r)   r   c                 C  r  rZ   rE   r   rE   rE   rL   rS     r  z_BuildPositionAccessors.fieldr   r&   r   c                 C     i S rZ   rE   r   rE   rE   rL   r     r  z_BuildPositionAccessors.listr   r(   r   r   c                 C  r  rZ   rE   r  rE   rE   rL   r    s   z_BuildPositionAccessors.mapr  r*   c                 C  r  rZ   rE   r  rE   rE   rL   r    r  z!_BuildPositionAccessors.primitiveN)r   r7   r   r  rM   r  )r8   r,   r   r  rM   r  )rS   r)   r   r  rM   r  )r   r&   r   r  rM   r  )r   r(   r   r  r   r  rM   r  )r  r*   rM   r  )
r   r   r   r   r   r8   rS   r   r  r  rE   rE   rE   rL   r    s    
 



r  r   c                 C  r  )zGenerate an index of field IDs to schema position accessors.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[int, Accessor]: An index of field IDs to accessors.
    )r  r  r  rE   rE   rL   r     r   r   next_idOptional[Callable[[], int]]c                 C  s   t | t|dS )z'Traverses the schema, and sets new IDs.)next_id_func)r  _SetFreshIDs)rb  r  rE   rE   rL   assign_fresh_schema_ids  r  r  c                   @  sl   e Zd ZU dZded< d1d2d	d
Zd3ddZd4ddZd5ddZd6d d!Z	d7d%d&Z
d8d+d,Zd9d/d0ZdS ):r  z>Traverses the schema and assigns monotonically increasing ids.ry   old_id_to_new_idNr  r  rM   r   c                   s4   i | _ td |d ur|| _d S  fdd| _d S )N   c                     s   t  S rZ   )nextrE   counterrE   rL   r    s    z'_SetFreshIDs.__init__.<locals>.<lambda>)r  	itertoolscountr  )rI   r  rE   r  rL   rG     s   
$z_SetFreshIDs.__init__
current_idr?   c                 C  s   |   }|| j|< |S rZ   )r  r  )rI   r  new_idrE   rE   rL   _get_and_increment  s   
z_SetFreshIDs._get_and_incrementr   r7   r   Callable[[], StructType]c                   s"   t | jd fdd|jD iS )NrA   c                   s   g | ]} j | qS rE   )r  )rR   rq   rX   rE   rL   rT   $  r   z'_SetFreshIDs.schema.<locals>.<listcomp>)r7   r<   rA   r   rE   rX   rL   r   !  s
   z_SetFreshIDs.schemar8   r,   r   List[Callable[[], IcebergType]]c              
     sZ    fdd|j D }g }t||j |D ]\}}}|t||j| |j|jd qt| S )Nc                   s   g | ]}  |jqS rE   )r  rq   rQ   rX   rE   rL   rT   (  rU   z'_SetFreshIDs.struct.<locals>.<listcomp>)rq   r}   r   r   doc)r<   ri   r   r)   r}   r   r  r,   )rI   r8   r   new_ids
new_fieldsrq   rS   r   rE   rX   rL   r8   '  s   	z_SetFreshIDs.structrS   r)   r   Callable[[], IcebergType]r$   c                 C  s   | S rZ   rE   r   rE   rE   rL   rS   6  r  z_SetFreshIDs.fieldr   r&   r   c                 C  s   |  |j}t|| |jdS )N)r  r   element_required)r  r  r&   r  )rI   r   r   r  rE   rE   rL   r   9  s   z_SetFreshIDs.listr   r(   r   r   c                 C  s0   |  |j}|  |j}t|| || |jdS )N)r  ro  r  rq  value_required)r  r  r  r(   r  )rI   r   r   r   r  r  rE   rE   rL   r  A  s   z_SetFreshIDs.mapr  r*   c                 C     |S rZ   rE   r  rE   rE   rL   r  L  r  z_SetFreshIDs.primitiverZ   )r  r  rM   r   )r  r?   rM   r?   )r   r7   r   r  rM   r7   )r8   r,   r   r  rM   r,   )rS   r)   r   r  rM   r$   )r   r&   r   r  rM   r&   )r   r(   r   r  r   r  rM   r(   )r  r*   rM   r*   )r   r   r   r   r   rG   r  r   r8   rS   r   r  r  rE   rE   rE   rL   r    s   
 





r  r}   rN   c                 C  s   t | st| S | S )a  Make a field name compatible with Avro specification.

    This function sanitizes field names to comply with Avro naming rules:
    - Names must start with [A-Za-z_]
    - Subsequent characters must be [A-Za-z0-9_]

    Invalid characters are replaced with _xHHHH where HHHH is the hex code.
    Names starting with digits get a leading underscore.

    Args:
        name: The original field name

    Returns:
        A sanitized name that complies with Avro specification
    )_valid_avro_name_sanitize_name)r}   rE   rE   rL   make_compatible_nameQ  s   r  rd   c                 C  sX   t | std| d }| s|dksdS | dd  D ]}| s)|dks) dS qdS )Nz Can not validate empty avro namer   rk  Fr  T)r`   r   isalphaisalnum)r}   first	characterrE   rE   rL   r  f  s   r  c                 C  s|   g }| d }|  s|dks|t| n|| | dd  D ]}| s3|dks3|t| q!|| q!d|S )Nr   rk  r   )r  r   _sanitize_charr  rV   )r}   sbr  r  rE   rE   rL   r  s  s   

r  r  c                 C  s,   |   rd|  S dtt| dd    S )Nrk  _x   )isdigithexordupper)r  rE   rE   rL   r    s   r  c                 C  s,   t |  t }t|pt j| j| jdS )a_  Sanitize column names to make them compatible with Avro.

    The column name should be starting with '_' or digit followed by a string only contains '_', digit or alphabet,
    otherwise it will be sanitized to conform the avro naming convention.

    Args:
        schema: The schema to be sanitized.

    Returns:
        The sanitized schema.
    r@   rA   )r  r   _SanitizeColumnsVisitorr7   r,   r<   r@   rA   )r   r  rE   rE   rL   sanitize_column_names  s   
r  c                   @  sH   e Zd Zd#ddZd$ddZd%ddZd&ddZd'ddZd(d d!Zd"S ))r  r   r7   r   Optional[IcebergType]rM   c                 C  r  rZ   rE   r   rE   rE   rL   r     r  z_SanitizeColumnsVisitor.schemarS   r)   r   c                 C  s   t |jt|j||j|jdS )Nrq   r}   r   r  r   )r)   rq   r  r}   r  r   r   rE   rE   rL   rS     s   z_SanitizeColumnsVisitor.fieldr8   r,   r   List[Optional[IcebergType]]c                 C  s   t dd |D  S )Nc                 S  s   g | ]}|d ur|qS rZ   rE   rQ   rE   rE   rL   rT     rU   z2_SanitizeColumnsVisitor.struct.<locals>.<listcomp>)r,   r   rE   rE   rL   r8     r  z_SanitizeColumnsVisitor.structr   r&   r   c                 C  s   t |j||jdS N)r  rm  r  )r&   r  r  r   rE   rE   rL   r     r  z_SanitizeColumnsVisitor.listr   r(   r   r   c                 C  s   t |j|j|||jdS N)r  r  ro  rq  r  )r(   r  r  r  r  rE   rE   rL   r    s   z_SanitizeColumnsVisitor.mapr  r*   c                 C  r  rZ   rE   r  rE   rE   rL   r    r  z!_SanitizeColumnsVisitor.primitiveNr   r7   r   r  rM   r  rS   r)   r   r  rM   r  r8   r,   r   r  rM   r  r   r&   r   r  rM   r  r   r(   r   r  r   r  rM   r  r  r*   rM   r  )	r   r   r   r   rS   r8   r   r  r  rE   rE   rE   rL   r    s    


	

r  selectedr   select_full_typesc                 C  s:   t |  t||}t|pt j| jt|| j	dS )a  Prunes a column by only selecting a set of field-ids.

    Args:
        schema: The schema to be pruned.
        selected: The field-ids to be included.
        select_full_types: Return the full struct when a subset is recorded

    Returns:
        The pruned schema.
    r  )
r  r   _PruneColumnsVisitorr7   r,   r<   r@   r   intersectionrA   )r   r  r  r  rE   rE   rL   r     s   
r   c                   @  s   e Zd ZU ded< ded< d1ddZd2ddZd3ddZd4ddZd5ddZd6d"d#Z	d7d&d'Z
ed8d)d*Zed9d,d-Zed:d.d/Zd0S );r   r   r  rd   r  c                 C  s   || _ || _d S rZ   )r  r  )rI   r  r  rE   rE   rL   rG     r  z_PruneColumnsVisitor.__init__r   r7   r   r  rM   c                 C  r  rZ   rE   r   rE   rE   rL   r     r  z_PruneColumnsVisitor.schemar8   r,   r   r  c           	   
   C  s   |j }g }d}t|D ])\}}|| }|j|kr|| q|d ur4d}|t|j|j||j|jd q|rIt	|t	|krE|du rE|S t
| S d S )NTFr  )r<   r  r   r   r)   rq   r}   r  r   r`   r,   )	rI   r8   r   r<   selected_fields	same_typeidxprojected_typerS   rE   rE   rL   r8     s0   

z_PruneColumnsVisitor.structrS   r)   r   c                 C  sj   |j | jv r-| jr|jS |jjr| |S |jjs*td|j  d|j d|j d|jS |d ur3|S d S )N-Cannot explicitly project List or Map types, :	 of type  was selected)	rq   r  r  r   r   _project_selected_structr   r   r}   r   rE   rE   rL   rS     s   
z_PruneColumnsVisitor.fieldr   r&   r   c                 C  sx   |j | jv r0| jr|S |jr|jjr| |}| ||S |jjs.td|j  d|j d|S |d ur:| ||S d S )Nr  r  r	  )	r  r  r  rm  r   r
  _project_listr   r   )rI   r   r   projected_structrE   rE   rL   r     s   
z_PruneColumnsVisitor.listr   r(   r   r   c                 C  s   |j | jv r0| jr|S |jr|jjr| |}| ||S |jjs.td|j  d|j d|S |d ur:| ||S |j	| jv rB|S d S )Nz7Cannot explicitly project List or Map types, Map value r  r	  )
r  r  r  rq  r   r
  _project_mapr   r   r  )rI   r   r   r   r  rE   rE   rL   r     s    
z_PruneColumnsVisitor.mapr  r*   c                 C  s   d S rZ   rE   r  rE   rE   rL   r  4  r  z_PruneColumnsVisitor.primitiveprojected_fieldc                 C  s(   | rt | tstd| d u rt S | S )NzExpected a struct)rg   r,   r   )r  rE   rE   rL   r
  7  s
   z-_PruneColumnsVisitor._project_selected_structr$   c                 C  s    | j |kr| S t| j|| jdS r  )rm  r&   r  r  )r   r   rE   rE   rL   r  A  s
   

z"_PruneColumnsVisitor._project_listc                 C  s(   | j |kr| S t| j| j| j|| jdS r  )rq  r(   r  r  ro  r  )r   r   rE   rE   rL   r  J  s   
z!_PruneColumnsVisitor._project_mapN)r  r   r  rd   r  r  r  r  r  r  )r  r  rM   r,   )r   r&   r   r$   rM   r&   )r   r(   r   r$   rM   r(   )r   r   r   r   rG   r   r8   rS   r   r  r  staticmethodr
  r  r  rE   rE   rE   rL   r     s    
 






	r   	file_typer$   	read_typec                 C  s    | |kr| S t d|  d| )a  Promotes reading a file type to a read type.

    Args:
        file_type (IcebergType): The type of the Avro file.
        read_type (IcebergType): The requested read type.

    Raises:
        ResolveError: If attempting to resolve an unrecognized object type.
    Cannot promote  to r   r  r  rE   rE   rL   promoteX  s   r  r%   c                 C     t |tr|S td| )NzCannot promote an int to )rg   r'   r   r  rE   rE   rL   rk  i     
r#   c                 C  r  )NzCannot promote an float to )rg   r!   r   r  rE   rE   rL   rk  r  r  r+   c                 C  r  )NzCannot promote an string to )rg   r   r   r  rE   rE   rL   rk  {     
r   c                 C  r  )NzCannot promote an binary to )rg   r+   r   r  rE   rE   rL   rk    r  r    c                 C  sH   t |tr| j|jkr| j| jkr|S td|  d| td| )NzCannot reduce precision from r  zCannot promote an decimal to )rg   r    	precisionscaler   r  rE   rE   rL   rk    s
   
r"   c                 C  s.   t |trt| dkr|S td|  d| )N   r  r  )rg   r3   r`   r   r  rE   rE   rL   rk    s   r2   c                 C  s"   t |tr|S td|  d| )Nr  r  )rg   r*   r   r  rE   rE   rL   rk    s   
requested_schemaprovided_schemar   c                 C  s   t | t| dS )aZ  
    Check if the `provided_schema` is compatible with `requested_schema`.

    Both Schemas must have valid IDs and share the same ID for the same field names.

    Two schemas are considered compatible when:
    1. All `required` fields in `requested_schema` are present and are also `required` in the `provided_schema`
    2. Field Types are consistent for fields that are present in both schemas. I.e. the field type
       in the `provided_schema` can be promoted to the field type of the same field ID in `requested_schema`

    Raises:
        ValueError: If the schemas are not compatible.
    N)r  _SchemaCompatibilityVisitor)r  r  rE   rE   rL   _check_schema_compatible  s   r  c                   @  sf   e Zd ZU ded< d*ddZd+d	d
Zd,ddZd-ddZd.ddZd/ddZ	d0d#d$Z
d1d'd(Zd)S )2r  r7   r  c                 C  s`   ddl m} ddlm} || _|ddd| _| jd | jd | jd	 |dd
| _d S )Nr   )Console)TableTbold)show_headerheader_styler  zTable fieldzDataframe field)record)rich.consoler   
rich.tabler!  r  
rich_table
add_columnconsole)rI   r  r   	RichTablerE   rE   rL   rG     s   z$_SchemaCompatibilityVisitor.__init__re   r)   rM   rd   c              
     s  z	| j  jW n$ ty-    jr | jdt d Y dS | jdt d Y dS w  jrBjsB| jdt t dS  jjkrV| jdt t dS t	 fddt
tthD rs| jdt t dS ztj j | jdt t W dS  ty   tjtrt jtsd j d	}nd
 j d}| jdt t d|  Y dS | jdt t Y dS w )Nu   ❌MissingFu   ✅Tc                 3  s(    | ]}t  j|ot j|V  qd S rZ   )rg   r   )rR   container_typere   rf   rE   rL   r]     s
    
zC_SchemaCompatibilityVisitor._is_field_compatible.<locals>.<genexpr>zANull type (UnknownType) cannot be promoted to non-primitive type za. UnknownType can only be promoted to primitive types (string, int, boolean, etc.) in V3+ tables.z.Null type (UnknownType) cannot be promoted to zi. This may be due to table format version limitations (V1/V2 tables don't support UnknownType promotion).z - )r  r   rq   r   r   r(  add_rowrN   r   anyr,   r(   r&   r  r   rg   r2   r*   )rI   re   	error_msgrE   r.  rL   _is_field_compatible  sD   "z0_SchemaCompatibilityVisitor._is_field_compatibler   r   Callable[[], bool]c                 C  s0   |  }s| j | j td| j   |S )NzMismatch in fields:
)r*  printr(  r   export_text)rI   r   r   r  rE   rE   rL   r     s   
z"_SchemaCompatibilityVisitor.schemar8   r,   r   List[Callable[[], bool]]c                 C  s   dd |D }t |S )Nc                 S  s   g | ]}| qS rE   rE   )rR   r  rE   rE   rL   rT     s    z6_SchemaCompatibilityVisitor.struct.<locals>.<listcomp>)rh   )rI   r8   r   r  rE   rE   rL   r8     s   z"_SchemaCompatibilityVisitor.structrS   r   c                 C  s   |  |o| S rZ   )r2  r   rE   rE   rL   rS     s   z!_SchemaCompatibilityVisitor.fieldr   r&   r   c                 C  s   |  |jo| S rZ   )r2  rl  r   rE   rE   rL   r     r  z _SchemaCompatibilityVisitor.listr   r(   r   r   c                 C  s$   t | |j| |j| | gS rZ   )rh   r2  rn  rp  r  rE   rE   rL   r    s   

z_SchemaCompatibilityVisitor.mapr  r*   c                 C  r   )NTrE   r  rE   rE   rL   r    r  z%_SchemaCompatibilityVisitor.primitiveN)r  r7   )re   r)   rM   rd   )r   r7   r   r3  rM   rd   )r8   r,   r   r6  rM   rd   )rS   r)   r   r3  rM   rd   )r   r&   r   r3  rM   rd   )r   r(   r   r3  r   r3  rM   rd   )r  r*   rM   rd   )r   r   r   r   rG   r2  r   r8   rS   r   r  r  rE   rE   rE   rL   r    s   
 


1




r  )
rb  rc  r8  r6   rd  re  rf  rg  rM   r5   )
r   r7   r8  r6   rd  rj  rf  rg  rM   r5   )
r8   r,   r8  r6   rd  rj  rf  rg  rM   r5   )
r   r&   r8  r6   rd  rj  rf  rg  rM   r5   )
r   r(   r8  r6   rd  rj  rf  rg  rM   r5   )
r  r*   r8  r6   rd  rj  rk  rg  rM   r5   )r  rc  rd  r  rM   r5   )r  r7   rd  r  rM   r5   )r  r,   rd  r  rM   r5   )r  r&   rd  r  rM   r5   )r  r(   rd  r  rM   r5   )r  r*   rd  r  rM   r5   )r  rc  rd  r  rM   r5   )r  r7   rd  r  rM   r5   )r  r,   rd  r  rM   r5   )r  r&   rd  r  rM   r5   )r  r(   rd  r  rM   r5   )r  r*   rd  r  rM   r5   )rb  rc  rM   rt   )rb  rc  rM   ry   )rb  rc  rM   rB   )rb  rc  rM   r   )rb  rc  rM   r   rZ   )rb  rc  r  r  rM   r7   )r}   rN   rM   rN   )r}   rN   rM   rd   )r  rN   rM   rN   )r   r7   rM   r7   r   )r   r7   r  r   r  rd   rM   r7   )r  r$   r  r$   rM   r$   )r  r%   r  r$   rM   r$   )r  r#   r  r$   rM   r$   )r  r+   r  r$   rM   r$   )r  r   r  r$   rM   r$   )r  r    r  r$   rM   r$   )r  r"   r  r$   rM   r$   )r  r2   r  r$   rM   r$   )r  r7   r  r7   rM   r   )k
__future__r   r  abcr   r   dataclassesr   	functoolsr   r   r   typingr	   r
   r   r   r   r   r   r   r   r   r   r   pydanticr   r   r   pyiceberg.exceptionsr   pyiceberg.typedefr   r   r   pyiceberg.typesr   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   pyarrowpar   r4   r5   r6   r   FIELD_ID_PROPICEBERG_FIELD_NAME_PROPr7   r   r  r  r$  rY  ri  registerrk  rs  r   r  r  r?   r  rv   r  rz   rN   r  rH   r   Positionr  r   r  r  r  r  r  r  r  r  r   r   r  r  rd   r  rE   rE   rE   rL   <module>   s   8d  /88mm!	
"
'
h

>
?



" 

