o
    <i$                    @  s<  d dl mZ d dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZmZmZmZmZ d dlmZmZmZ d d	lmZ d d
lmZmZmZ d dlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 erd dl7Z8d dl9m:Z: edZ;edZ<d Z=dZ>dZ?G dd deZ@G dd dee; eZAG dd dee; eZBG dd dee<e;f eZCG dd deCe<e;f ZDG dd dee< eZEedd%d&ZFeFGe@dd)d*ZHeFGe/dd-d*ZHeFGe)dd0d*ZHeFGe+dd3d*ZHeFGe-dd6d*ZHG d7d8 d8eAe; eZIe
d9d9d9d:G d;d< d<ZJedd?d@ZKeKGe@ddAd*ZHeKGe/ddBd*ZHeKGe)ddCd*ZHeKGe+ddDd*ZHeKGe-ddEd*ZHeddGdHZLeLGe@ddId*ZHeLGe/ddJd*ZHeLGe)ddKd*ZHeLGe+ddLd*ZHeLGe-ddMd*ZHG dNdO dOeAeMeNe,f  ZOddQdRZPG dSdT dTeAeMeNeNf  ZQddVdWZRG dXdY dYeAeMeSeNf  ZTdd[d\ZUdd^d_ZVeNZWG d`da daeAeMeWeJf  ZXddcddZYdddgdhZZG didj djeBe' Z[ddmdnZ\ddpdqZ]ddrdsZ^ddudvZ_ddwdxZ`G dydz dzeAe'dB  Zaddd~dZbG dd deAe'dB  ZcedddZdedGe(ddd*ZHedGe&ddd*ZHedGe.ddd*ZHedGe ddd*ZHedGe#ddd*ZHedGe%ddd*ZHedGe5ddd*ZHdddZeG dd deBef ZgdS )    )annotationsN)ABCabstractmethod)Callable)	dataclass)cached_propertypartialsingledispatch)TYPE_CHECKINGAnyGenericLiteralTypeVar)FieldPrivateAttrmodel_validatorResolveError)
EMPTY_DICTIcebergBaseModelStructProtocol)
BinaryTypeBooleanTypeDateTypeDecimalType
DoubleType	FixedType	FloatTypeIcebergTypeIntegerTypeListTypeLongTypeMapTypeNestedFieldPrimitiveType
StringType
StructTypeTimestampNanoTypeTimestampTypeTimestamptzNanoTypeTimestamptzTypeTimeTypeUnknownTypeUUIDType)NameMappingTPzfield-idziceberg-field-namec                      s  e Zd ZU dZdZded< eedZded< ede	d	Z
d
ed< ededZded< e Zded< df fddZdgddZdgddZdhddZdid!d"Zed#d$djd%d&Zedkd'd(Zedld*d+Zedmd-d.Zednd/d0Zedod2d3Zedpd5d6Zdqd8d9Zdrd;d<ZdsdtdAdBZ dsdudDdEZ!edhdFdGZ"edvdIdJZ#dwdMdNZ$edxdPdQZ%dydTdUZ&dzdWdXZ'd=dYd{d[d\Z(ed|d^d_Z)d}dadbZ*d~dddeZ+  Z,S )SchemazsA table Schema.

    Example:
        >>> from pyiceberg import schema
        >>> from pyiceberg import types
    structzLiteral['struct']type)default_factorytuple[NestedField, ...]fieldsz	schema-id)aliasdefaultint	schema_idzidentifier-field-ids)r7   r4   z	list[int]identifier_field_idsdict[str, int]_name_to_idr#   datar   c                   s,   |r||d< t  jdi | t| | _d S )Nr6    )super__init__index_by_namer=   )selfr6   r>   	__class__r?   M/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/pyiceberg/schema.pyrA   _   s   zSchema.__init__returnstrc                 C  s   dd dd | jD  d S )5Return the string representation of the Schema class.ztable {

c                 S  s   g | ]}d t | qS )z  )rH   .0fieldr?   r?   rF   
<listcomp>g       z"Schema.__str__.<locals>.<listcomp>z
})joincolumnsrC   r?   r?   rF   __str__e   s   zSchema.__str__c                 C  s2   d dd | jD }d| d| j d| j dS )rI   z, c                 s  s    | ]}t |V  qd S N)repr)rL   columnr?   r?   rF   	<genexpr>k   s    z"Schema.__repr__.<locals>.<genexpr>zSchema(z, schema_id=z, identifier_field_ids=))rP   rQ   r:   r;   )rC   columns_reprr?   r?   rF   __repr__i   s   zSchema.__repr__c                 C  s
   t | jS )z6Return the length of an instance of the Literal class.)lenr6   rR   r?   r?   rF   __len__n      
zSchema.__len__otherboolc                 C  sb   |sdS t |tsdS t| jt|jkrdS | j|jk}tdd t| j|jddD }|o0|S )z9Return the equality of two instances of the Schema class.Fc                 s  s    | ]	\}}||kV  qd S rT   r?   )rL   lhsrhsr?   r?   rF   rW   ~   s    z Schema.__eq__.<locals>.<genexpr>Tstrict)
isinstancer1   r[   rQ   r;   allzip)rC   r^   identifier_field_ids_is_equalschema_is_equalr?   r?   rF   __eq__r   s   
 zSchema.__eq__after)modec                 C  s    | j r| j D ]}| | q| S rT   )r;   _validate_identifier_fieldrC   field_idr?   r?   rF   check_schema   s   
zSchema.check_schemac                 C     | j S )z A tuple of the top-level fields.)r6   rR   r?   r?   rF   rQ      s   zSchema.columnsdict[int, NestedField]c                 C     t | S )zReturn an index of field ID to NestedField instance.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        )index_by_idrR   r?   r?   rF   _lazy_id_to_field      zSchema._lazy_id_to_fielddict[int, int]c                 C  rr   )zReturns an index of field ID to parent field IDs.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        )_index_parentsrR   r?   r?   rF   _lazy_id_to_parent   ru   zSchema._lazy_id_to_parentc                 C  s   dd | j  D S )zReturn an index of lower-case field names to field IDs.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        c                 S  s   i | ]	\}}|  |qS r?   )lower)rL   namern   r?   r?   rF   
<dictcomp>       z1Schema._lazy_name_to_id_lower.<locals>.<dictcomp>)r=   itemsrR   r?   r?   rF   _lazy_name_to_id_lower   s   zSchema._lazy_name_to_id_lowerdict[int, str]c                 C  rr   )zReturn an index of field ID to full name.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        )index_name_by_idrR   r?   r?   rF   _lazy_id_to_name   ru   zSchema._lazy_id_to_namedict[int, Accessor]c                 C  rr   )zReturn an index of field ID to accessor.

        This is calculated once when called for the first time. Subsequent calls to this method will use a cached index.
        )build_position_accessorsrR   r?   r?   rF   _lazy_id_to_accessor   ru   zSchema._lazy_id_to_accessorr&   c                 C  s
   t | j S )zReturn the schema as a struct.)r&   r6   rR   r?   r?   rF   	as_struct   r]   zSchema.as_struct	pa.Schemac                 C     ddl m} || S )z%Return the schema as an Arrow schema.r   )schema_to_pyarrow)pyiceberg.io.pyarrowr   )rC   r   r?   r?   rF   as_arrow      zSchema.as_arrowT
name_or_id	str | intcase_sensitivec                 C  st   t |tr|| jvrtd| | j| S |r| j|}n| j| }|du r5td| d| | j| S )a  Find a field using a field name or field ID.

        Args:
            name_or_id (Union[str, int]): Either a field name or a field ID.
            case_sensitive (bool, optional): Whether to perform a case-sensitive lookup using a field name. Defaults to True.

        Raises:
            ValueError: When the value cannot be found.

        Returns:
            NestedField: The matched NestedField.
        zCould not find field with id: NzCould not find field with name , case_sensitive=)rd   r9   rt   
ValueErrorr=   getr~   ry   )rC   r   r   rn   r?   r?   rF   
find_field   s   



zSchema.find_fieldr   c                 C  s,   | j ||d}|std| d| |jS )ad  Find a field type using a field name or field ID.

        Args:
            name_or_id (Union[str, int]): Either a field name or a field ID.
            case_sensitive (bool, optional): Whether to perform a case-sensitive lookup using a field name. Defaults to True.

        Returns:
            NestedField: The type of the matched NestedField.
        )r   r   z%Could not find field with name or id r   )r   r   
field_type)rC   r   r   rM   r?   r?   rF   	find_type   s   
zSchema.find_typec                 C  s   t | j ddS )Nr   )r8   )maxr   keysrR   r?   r?   rF   highest_field_id   s   zSchema.highest_field_idr.   c                 C  r   )Nr   )create_mapping_from_schema)pyiceberg.table.name_mappingr   )rC   r   r?   r?   rF   name_mapping   r   zSchema.name_mapping	column_id
str | Nonec                 C  s   | j |S )zFind a column name given a column ID.

        Args:
            column_id (int): The ID of the column.

        Returns:
            str: The column name (or None if the column ID cannot be found).
        )r   r   )rC   r   r?   r?   rF   find_column_name      	zSchema.find_column_name	list[str]c                 C     t | j S )z
        Return a list of all the column names, including nested fields.

        Excludes short names.

        Returns:
            List[str]: The column names.
        )listr   valuesrR   r?   r?   rF   column_names  s   
zSchema.column_namesrn   Accessorc                 C  s"   || j vrtd| | j | S )a  Find a schema position accessor given a field ID.

        Args:
            field_id (int): The ID of the field.

        Raises:
            ValueError: When the value cannot be found.

        Returns:
            Accessor: An accessor for the given field ID.
        z+Could not find accessor for field with id: )r   r   rm   r?   r?   rF   accessor_for_field  s   

zSchema.accessor_for_fieldset[str]c                 C  s@   t  }| jD ]}| |}|du rtd| || q|S )zwReturn the names of the identifier fields.

        Returns:
            Set of names of the identifier fields
        Nz%Could not find identifier column id: )setr;   r   r   add)rC   idsrn   column_namer?   r?   rF   identifier_field_names  s   

zSchema.identifier_field_names)r   namesc             
     sb   z|r fdd|D }n	 fdd|D }W n t y+ } ztd| |d}~ww t |S )a  Return a new schema instance pruned to a subset of columns.

        Args:
            names (List[str]): A list of column names.
            case_sensitive (bool, optional): Whether to perform a case-sensitive lookup for each column name. Defaults to True.

        Returns:
            Schema: A new schema with pruned columns.

        Raises:
            ValueError: If a column is selected that doesn't exist.
        c                   s   h | ]} j | qS r?   )r=   rL   rz   rR   r?   rF   	<setcomp><      z Schema.select.<locals>.<setcomp>c                   s   h | ]	} j |  qS r?   )r~   ry   r   rR   r?   rF   r   >  r|   zCould not find column: N)KeyErrorr   prune_columns)rC   r   r   r   er?   rR   rF   select-  s   
zSchema.selectset[int]c                 C  r   )z%Return the IDs of the current schema.)r   r=   r   rR   r?   r?   rF   	field_idsD  s   zSchema.field_idsNonec                 C  s   |  |}|jjstd| d|jstd| dt|jttfr,td| d| j	|j
}g }|durH|| | j	|}|dus9|rr|  | }|jjs`td|j d| |jsntd|j d| |sJdS dS )	zValidate that the field with the given ID is a valid identifier field.

        Args:
          field_id: The ID of the field to validate.

        Raises:
          ValueError: If the field is not valid.
        zIdentifier field z$ invalid: not a primitive type fieldz invalid: not a required fieldz+ invalid: must not be float or double fieldNzCannot add field z/ as an identifier field: must not be nested in zA as an identifier field: must not be nested in an optional field )r   r   is_primitiver   requiredrd   r   r   rx   r   rn   appendpop	is_structrz   )rC   rn   rM   	parent_idr6   parentr?   r?   rF   rl   I  s,   
	
z!Schema._validate_identifier_fieldformat_versionc                 C  sB   | j  D ]}||j k rt|j d|j  d| qdS )zCheck that the schema is compatible for the given table format version.

        Args:
          format_version: The Iceberg table format version.

        Raises:
          ValueError: If the schema is not compatible for the format version.
        z is only supported in z' or higher. Current format version is: N)rt   r   r   minimum_format_versionr   )rC   r   rM   r?   r?   rF   "check_format_version_compatibilityn  s   	z)Schema.check_format_version_compatibility)r6   r#   r>   r   rG   rH   )rG   r9   )r^   r   rG   r_   )rG   r1   )rG   r5   )rG   rq   )rG   rv   rG   r<   rG   r   )rG   r   )rG   r&   )rG   r   T)r   r   r   r_   rG   r#   )r   r   r   r_   rG   r   )rG   r.   )r   r9   rG   r   )rG   r   )rn   r9   rG   r   )rG   r   )r   rH   r   r_   rG   r1   )rG   r   )rn   r9   rG   r   )r   r9   rG   r   )-__name__
__module____qualname____doc__r3   __annotations__r   tupler6   INITIAL_SCHEMA_IDr:   r   r;   r   r=   rA   rS   rZ   r\   ri   r   ro   propertyrQ   r   rt   rx   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rl   r   __classcell__r?   r?   rD   rF   r1   P   sX   
 









%r1   c                   @  s   e Zd Zd7ddZd7ddZd8d
dZd8ddZd9ddZd9ddZd:ddZ	d:ddZ
ed;ddZed<d"d#Zed=d%d&Zed>d*d+Zed?d0d1Zed@d4d5Zd6S )ASchemaVisitorrM   r#   rG   r   c                 C     dS zNOverride this method to perform an action immediately before visiting a field.Nr?   rC   rM   r?   r?   rF   before_field      zSchemaVisitor.before_fieldc                 C  r   zMOverride this method to perform an action immediately after visiting a field.Nr?   r   r?   r?   rF   after_field  r   zSchemaVisitor.after_fieldelementc                 C     |  | dS zcOverride this method to perform an action immediately before visiting an element within a ListType.Nr   rC   r   r?   r?   rF   before_list_element     z!SchemaVisitor.before_list_elementc                 C  r   zbOverride this method to perform an action immediately after visiting an element within a ListType.Nr   r   r?   r?   rF   after_list_element  r   z SchemaVisitor.after_list_elementkeyc                 C  r   z]Override this method to perform an action immediately before visiting a key within a MapType.Nr   rC   r   r?   r?   rF   before_map_key  r   zSchemaVisitor.before_map_keyc                 C  r   z\Override this method to perform an action immediately after visiting a key within a MapType.Nr   r   r?   r?   rF   after_map_key  r   zSchemaVisitor.after_map_keyvaluec                 C  r   z_Override this method to perform an action immediately before visiting a value within a MapType.Nr   rC   r   r?   r?   rF   before_map_value  r   zSchemaVisitor.before_map_valuec                 C  r   z^Override this method to perform an action immediately after visiting a value within a MapType.Nr   r   r?   r?   rF   after_map_value  r   zSchemaVisitor.after_map_valueschemar1   struct_resultr/   c                 C  r   zVisit a Schema.Nr?   rC   r   r   r?   r?   rF   r     r   zSchemaVisitor.schemar2   r&   field_resultsbuiltins.list[T]c                 C  r   zVisit a StructType.Nr?   rC   r2   r   r?   r?   rF   r2     r   zSchemaVisitor.structfield_resultc                 C  r   zVisit a NestedField.Nr?   rC   rM   r   r?   r?   rF   rM     r   zSchemaVisitor.field	list_typer    element_resultc                 C  r   zVisit a ListType.Nr?   rC   r   r   r?   r?   rF   r     r   zSchemaVisitor.listmap_typer"   
key_resultvalue_resultc                 C  r   zVisit a MapType.Nr?   rC   r   r   r   r?   r?   rF   map  r   zSchemaVisitor.map	primitiver$   c                 C  r   Visit a PrimitiveType.Nr?   rC   r  r?   r?   rF   r    r   zSchemaVisitor.primitiveNrM   r#   rG   r   r   r#   rG   r   )r   r#   rG   r   r   r#   rG   r   )r   r1   r   r/   rG   r/   )r2   r&   r   r   rG   r/   )rM   r#   r   r/   rG   r/   )r   r    r   r/   rG   r/   )r   r"   r   r/   r   r/   rG   r/   r  r$   rG   r/   r   r   r   r   r   r   r   r   r   r   r   r   r   r2   rM   r   r   r  r?   r?   r?   rF   r     *    







r   c                   @  s`   e Zd Zed$ddZed%ddZed&ddZed'ddZed(ddZed)d!d"Z	d#S )*PreOrderSchemaVisitorr   r1   r   Callable[[], T]rG   r/   c                 C  r   r   r?   r   r?   r?   rF   r     r   zPreOrderSchemaVisitor.schemar2   r&   r   builtins.list[Callable[[], T]]c                 C  r   r   r?   r   r?   r?   rF   r2     r   zPreOrderSchemaVisitor.structrM   r#   r   c                 C  r   r   r?   r   r?   r?   rF   rM     r   zPreOrderSchemaVisitor.fieldr   r    r   c                 C  r   r   r?   r   r?   r?   rF   r     r   zPreOrderSchemaVisitor.listr   r"   r   r   c                 C  r   r   r?   r   r?   r?   rF   r     r   zPreOrderSchemaVisitor.mapr  r$   c                 C  r   r  r?   r  r?   r?   rF   r    r   zPreOrderSchemaVisitor.primitiveN)r   r1   r   r  rG   r/   )r2   r&   r   r  rG   r/   )rM   r#   r   r  rG   r/   )r   r    r   r  rG   r/   )r   r"   r   r  r   r  rG   r/   r  )
r   r   r   r   r   r2   rM   r   r   r  r?   r?   r?   rF   r    s    r  c                   @  s   e Zd ZdAddZdAd	d
ZdBddZdBddZdCddZdCddZdDddZ	dDddZ
edEd"d#ZedFd)d*ZedGd,d-ZedHd2d3ZedId9d:ZedJd>d?Zd@S )KSchemaWithPartnerVisitorrM   r#   field_partnerP | NonerG   r   c                 C  r   r   r?   rC   rM   r  r?   r?   rF   r     r   z%SchemaWithPartnerVisitor.before_fieldc                 C  r   r   r?   r  r?   r?   rF   r     r   z$SchemaWithPartnerVisitor.after_fieldr   element_partnerc                 C     |  || dS r   r   rC   r   r  r?   r?   rF   r        z,SchemaWithPartnerVisitor.before_list_elementc                 C  r  r   r   r  r?   r?   rF   r     r  z+SchemaWithPartnerVisitor.after_list_elementr   key_partnerc                 C  r  r   r   rC   r   r  r?   r?   rF   r     r  z'SchemaWithPartnerVisitor.before_map_keyc                 C  r  r   r   r  r?   r?   rF   r     r  z&SchemaWithPartnerVisitor.after_map_keyr   value_partnerc                 C  r  r   r   rC   r   r  r?   r?   rF   r     r  z)SchemaWithPartnerVisitor.before_map_valuec                 C  r  r   r   r  r?   r?   rF   r     r  z(SchemaWithPartnerVisitor.after_map_valuer   r1   schema_partnerr   r/   c                 C  r   )zVisit a schema with a partner.Nr?   )rC   r   r  r   r?   r?   rF   r     r   zSchemaWithPartnerVisitor.schemar2   r&   struct_partnerr   r   c                 C  r   )z#Visit a struct type with a partner.Nr?   )rC   r2   r  r   r?   r?   rF   r2     r   zSchemaWithPartnerVisitor.structr   c                 C  r   )z$Visit a nested field with a partner.Nr?   )rC   rM   r  r   r?   r?   rF   rM     r   zSchemaWithPartnerVisitor.fieldr   r    list_partnerr   c                 C  r   )z!Visit a list type with a partner.Nr?   )rC   r   r  r   r?   r?   rF   r     r   zSchemaWithPartnerVisitor.listr   r"   map_partnerr   r   c                 C  r   )z Visit a map type with a partner.Nr?   )rC   r   r  r   r   r?   r?   rF   r      r   zSchemaWithPartnerVisitor.mapr  r$   primitive_partnerc                 C  r   )z&Visit a primitive type with a partner.Nr?   rC   r  r  r?   r?   rF   r    r   z"SchemaWithPartnerVisitor.primitiveN)rM   r#   r  r  rG   r   )r   r#   r  r  rG   r   )r   r#   r  r  rG   r   )r   r#   r  r  rG   r   )r   r1   r  r  r   r/   rG   r/   )r2   r&   r  r  r   r   rG   r/   )rM   r#   r  r  r   r/   rG   r/   )r   r    r  r  r   r/   rG   r/   )
r   r"   r  r  r   r/   r   r/   rG   r/   r  r$   r  r  rG   r/   r	  r?   r?   r?   rF   r    r
  r  c                   @  s  e Zd ZdOddZedPddZedQddZedRddZedSddZedTddZ	edUd d!Z
edVd$d%ZedWd(d)ZedXd,d-ZedYd0d1ZedZd4d5Zed[d8d9Zed\d<d=Zed]d@dAZed^dDdEZed_dHdIZed`dLdMZdNS )aPrimitiveWithPartnerVisitorr  r$   r  r  rG   r/   c                 C  s  t |tr| ||S t |tr| ||S t |tr!| ||S t |tr,| ||S t |t	r7| 
||S t |trB| ||S t |trM| ||S t |trX| ||S t |trc| ||S t |trn| ||S t |try| ||S t |tr| ||S t |tr| ||S t |tr| ||S t |tr| ||S t |tr|  ||S t |t!r| "||S t#d| r  zType not recognized: )$rd   r   visit_booleanr   visit_integerr!   
visit_longr   visit_floatr   visit_doubler   visit_decimalr   
visit_dater+   
visit_timer(   visit_timestampr'   visit_timestamp_nsr*   visit_timestamptzr)   visit_timestamptz_nsr%   visit_stringr-   
visit_uuidr   visit_fixedr   visit_binaryr,   visit_unknownr   r  r?   r?   rF   r  
  sF   
















z%PrimitiveWithPartnerVisitor.primitiveboolean_typer   partnerc                 C  r   zVisit a BooleanType.Nr?   )rC   r4  r5  r?   r?   rF   r#  1  r   z)PrimitiveWithPartnerVisitor.visit_booleaninteger_typer   c                 C  r   zVisit a IntegerType.Nr?   )rC   r7  r5  r?   r?   rF   r$  5  r   z)PrimitiveWithPartnerVisitor.visit_integer	long_typer!   c                 C  r   zVisit a LongType.Nr?   )rC   r9  r5  r?   r?   rF   r%  9  r   z&PrimitiveWithPartnerVisitor.visit_long
float_typer   c                 C  r   zVisit a FloatType.Nr?   )rC   r;  r5  r?   r?   rF   r&  =  r   z'PrimitiveWithPartnerVisitor.visit_floatdouble_typer   c                 C  r   zVisit a DoubleType.Nr?   )rC   r=  r5  r?   r?   rF   r'  A  r   z(PrimitiveWithPartnerVisitor.visit_doubledecimal_typer   c                 C  r   zVisit a DecimalType.Nr?   )rC   r?  r5  r?   r?   rF   r(  E  r   z)PrimitiveWithPartnerVisitor.visit_decimal	date_typer   c                 C  r   r@  r?   )rC   rA  r5  r?   r?   rF   r)  I  r   z&PrimitiveWithPartnerVisitor.visit_date	time_typer+   c                 C  r   r@  r?   )rC   rB  r5  r?   r?   rF   r*  M  r   z&PrimitiveWithPartnerVisitor.visit_timetimestamp_typer(   c                 C  r   zVisit a TimestampType.Nr?   )rC   rC  r5  r?   r?   rF   r+  Q  r   z+PrimitiveWithPartnerVisitor.visit_timestamptimestamp_ns_typer'   c                 C  r   zVisit a TimestampNanoType.Nr?   )rC   rE  r5  r?   r?   rF   r,  U  r   z.PrimitiveWithPartnerVisitor.visit_timestamp_nstimestamptz_typer*   c                 C  r   zVisit a TimestamptzType.Nr?   )rC   rG  r5  r?   r?   rF   r-  Y  r   z-PrimitiveWithPartnerVisitor.visit_timestamptztimestamptz_ns_typer)   c                 C  r   zVisit a TimestamptzNanoType.Nr?   )rC   rI  r5  r?   r?   rF   r.  ]  r   z0PrimitiveWithPartnerVisitor.visit_timestamptz_nsstring_typer%   c                 C  r   zVisit a StringType.Nr?   )rC   rK  r5  r?   r?   rF   r/  a  r   z(PrimitiveWithPartnerVisitor.visit_string	uuid_typer-   c                 C  r   zVisit a UUIDType.Nr?   )rC   rM  r5  r?   r?   rF   r0  e  r   z&PrimitiveWithPartnerVisitor.visit_uuid
fixed_typer   c                 C  r   zVisit a FixedType.Nr?   )rC   rO  r5  r?   r?   rF   r1  i  r   z'PrimitiveWithPartnerVisitor.visit_fixedbinary_typer   c                 C  r   zVisit a BinaryType.Nr?   )rC   rQ  r5  r?   r?   rF   r2  m  r   z(PrimitiveWithPartnerVisitor.visit_binaryunknown_typer,   c                 C  r   zVisit a UnknownType.Nr?   )rC   rS  r5  r?   r?   rF   r3  q  r   z)PrimitiveWithPartnerVisitor.visit_unknownNr   )r4  r   r5  r  rG   r/   )r7  r   r5  r  rG   r/   )r9  r!   r5  r  rG   r/   )r;  r   r5  r  rG   r/   )r=  r   r5  r  rG   r/   )r?  r   r5  r  rG   r/   )rA  r   r5  r  rG   r/   )rB  r+   r5  r  rG   r/   )rC  r(   r5  r  rG   r/   )rE  r'   r5  r  rG   r/   )rG  r*   r5  r  rG   r/   )rI  r)   r5  r  rG   r/   )rK  r%   r5  r  rG   r/   )rM  r-   r5  r  rG   r/   )rO  r   r5  r  rG   r/   )rQ  r   r5  r  rG   r/   )rS  r,   r5  r  rG   r/   )r   r   r   r  r   r#  r$  r%  r&  r'  r(  r)  r*  r+  r,  r-  r.  r/  r0  r1  r2  r3  r?   r?   r?   rF   r!  	  H    
'r!  c                   @  sR   e Zd ZedddZedddZedddZedddZedddZdS )PartnerAccessorr5  r  rG   c                 C  r   )z0Return the equivalent of the schema as a struct.Nr?   )rC   r5  r?   r?   rF   r  w  r   zPartnerAccessor.schema_partnerpartner_structrn   r9   
field_namerH   c                 C  r   )zGReturn the equivalent struct field by name or id in the partner struct.Nr?   )rC   rW  rn   rX  r?   r?   rF   r  {  r   zPartnerAccessor.field_partnerpartner_listc                 C  r   )z7Return the equivalent list element in the partner list.Nr?   )rC   rY  r?   r?   rF   list_element_partner  r   z$PartnerAccessor.list_element_partnerpartner_mapc                 C  r   )z1Return the equivalent map key in the partner map.Nr?   rC   r[  r?   r?   rF   map_key_partner  r   zPartnerAccessor.map_key_partnerc                 C  r   )z3Return the equivalent map value in the partner map.Nr?   r\  r?   r?   rF   map_value_partner  r   z!PartnerAccessor.map_value_partnerN)r5  r  rG   r  )rW  r  rn   r9   rX  rH   rG   r  )rY  r  rG   r  )r[  r  rG   r  )	r   r   r   r   r  r  rZ  r]  r^  r?   r?   r?   rF   rV  v  s    rV  schema_or_typeSchema | IcebergTyper5  visitorSchemaWithPartnerVisitor[T, P]accessorPartnerAccessor[P]rG   c                 C     t d|  )NzUnsupported type: )r   )r_  r5  ra  rc  r?   r?   rF   visit_with_partner  s   rf  r   SchemaWithPartnerVisitor[P, T]c              	   C  s&   | |}|| |t|  |||S rT   )r  r   rf  r   )r   r5  ra  rc  r  r?   r?   rF   _  s   
rh  r2   r&   c                 C  s~   g }| j D ]2}|||j|j}||| zt|j|||}||||| W |	|| q|	|| w |
| ||S rT   )r6   r  rn   rz   r   rf  r   r   rM   r   r2   )r2   r5  ra  rc  r   rM   r  r   r?   r?   rF   rh    s   
r   r    c              
   C  sZ   | |}|| j| zt| j|||}W || j| n|| j| w || ||S rT   )rZ  r   element_fieldrf  element_typer   r   )r   r5  ra  rc  r  r   r?   r?   rF   rh    s   
 r   r"   c              
   C  s   | |}|| j| zt| j|||}W || j| n|| j| w ||}|| j| zt| j	|||}W |
| j| n|
| j| w || |||S rT   )r]  r   	key_fieldrf  key_typer   r^  r   value_field
value_typer   r   )r   r5  ra  rc  r  r   r  r   r?   r?   rF   rh    s   
 
 r  r$   c                 C  s   | | |S rT   r  )r  r5  ra  rh  r?   r?   rF   rh    s   c                   @  s  e Zd ZdKddZedLd	d
ZedMddZedNddZedOddZedPddZ	edQddZ
edRd!d"ZedSd%d&ZedTd)d*ZedUd-d.ZedVd0d1ZedWd4d5ZedXd8d9ZedYd<d=ZedZd@dAZed[dDdEZed\dHdIZdJS )]SchemaVisitorPerPrimitiveTyper  r$   rG   r/   c                 C  sb  t |tr
| |S t |tr| |S t |tr| |S t |tr(| |S t |t	r2| 
|S t |tr<| |S t |trF| |S t |trP| |S t |trZ| |S t |trd| |S t |trn| |S t |trx| |S t |tr| |S t |tr| |S t |tr| |S t |tr|  |S t |t!r| "|S t#d| r"  )$rd   r   r1  r   r(  r   r#  r   r$  r!   r%  r   r&  r   r'  r   r)  r+   r*  r(   r+  r'   r,  r*   r-  r)   r.  r%   r/  r-   r0  r   r2  r,   r3  r   r  r?   r?   rF   r    sF   

































z'SchemaVisitorPerPrimitiveType.primitiverO  r   c                 C  r   rP  r?   )rC   rO  r?   r?   rF   r1    r   z)SchemaVisitorPerPrimitiveType.visit_fixedr?  r   c                 C  r   r@  r?   )rC   r?  r?   r?   rF   r(    r   z+SchemaVisitorPerPrimitiveType.visit_decimalr4  r   c                 C  r   r6  r?   )rC   r4  r?   r?   rF   r#    r   z+SchemaVisitorPerPrimitiveType.visit_booleanr7  r   c                 C  r   r8  r?   )rC   r7  r?   r?   rF   r$    r   z+SchemaVisitorPerPrimitiveType.visit_integerr9  r!   c                 C  r   r:  r?   )rC   r9  r?   r?   rF   r%    r   z(SchemaVisitorPerPrimitiveType.visit_longr;  r   c                 C  r   r<  r?   )rC   r;  r?   r?   rF   r&    r   z)SchemaVisitorPerPrimitiveType.visit_floatr=  r   c                 C  r   r>  r?   )rC   r=  r?   r?   rF   r'    r   z*SchemaVisitorPerPrimitiveType.visit_doublerA  r   c                 C  r   r@  r?   )rC   rA  r?   r?   rF   r)    r   z(SchemaVisitorPerPrimitiveType.visit_daterB  r+   c                 C  r   r@  r?   )rC   rB  r?   r?   rF   r*    r   z(SchemaVisitorPerPrimitiveType.visit_timerC  r(   c                 C  r   rD  r?   rC   rC  r?   r?   rF   r+    r   z-SchemaVisitorPerPrimitiveType.visit_timestampr'   c                 C  r   rF  r?   rq  r?   r?   rF   r,    r   z0SchemaVisitorPerPrimitiveType.visit_timestamp_nsrG  r*   c                 C  r   rH  r?   )rC   rG  r?   r?   rF   r-    r   z/SchemaVisitorPerPrimitiveType.visit_timestamptzrI  r)   c                 C  r   rJ  r?   )rC   rI  r?   r?   rF   r.  #  r   z2SchemaVisitorPerPrimitiveType.visit_timestamptz_nsrK  r%   c                 C  r   rL  r?   )rC   rK  r?   r?   rF   r/  '  r   z*SchemaVisitorPerPrimitiveType.visit_stringrM  r-   c                 C  r   rN  r?   )rC   rM  r?   r?   rF   r0  +  r   z(SchemaVisitorPerPrimitiveType.visit_uuidrQ  r   c                 C  r   rR  r?   )rC   rQ  r?   r?   rF   r2  /  r   z*SchemaVisitorPerPrimitiveType.visit_binaryrS  r,   c                 C  r   rT  r?   )rC   rS  r?   r?   rF   r3  3  r   z+SchemaVisitorPerPrimitiveType.visit_unknownNr  )rO  r   rG   r/   )r?  r   rG   r/   )r4  r   rG   r/   )r7  r   rG   r/   )r9  r!   rG   r/   )r;  r   rG   r/   )r=  r   rG   r/   )rA  r   rG   r/   )rB  r+   rG   r/   )rC  r(   rG   r/   )rC  r'   rG   r/   )rG  r*   rG   r/   )rI  r)   rG   r/   )rK  r%   rG   r/   )rM  r-   rG   r/   )rQ  r   rG   r/   )rS  r,   rG   r/   )r   r   r   r  r   r1  r(  r#  r$  r%  r&  r'  r)  r*  r+  r,  r-  r.  r/  r0  r2  r3  r?   r?   r?   rF   rp    rU  rp  T)initeqfrozenc                   @  sD   e Zd ZU dZded< dZded< dd	d
ZdddZdddZdS )r   zVAn accessor for a specific position in a container that implements the StructProtocol.r9   positionNzAccessor | NoneinnerrG   rH   c                 C  s   d| j  d| j dS )7Return the string representation of the Accessor class.zAccessor(position=z,inner=rX   ru  rv  rR   r?   r?   rF   rS   ?  s   zAccessor.__str__c                 C  s   |   S )rw  )rS   rR   r?   r?   rF   rZ   C  s   zAccessor.__repr__	containerr   r   c                 C  s2   | j }|| }| }|jr|j}||j  }|js|S )zReturn the value at self.position in `container`.

        Args:
            container (StructProtocol): A container to access at position `self.position`.

        Returns:
            Any: The value at position `self.position` in the container.
        rx  )rC   ry  posvalrv  r?   r?   rF   r   G  s   	
zAccessor.getr   )ry  r   rG   r   )	r   r   r   r   r   rv  rS   rZ   r   r?   r?   r?   rF   r   8  s   
 

r   objSchemaVisitor[T]c                 C  re  )a  Apply a schema visitor to any point within a schema.

    The function traverses the schema in post-order fashion.

    Args:
        obj (Union[Schema, IcebergType]): An instance of a Schema or an IcebergType.
        visitor (SchemaVisitor[T]): An instance of an implementation of the generic SchemaVisitor base class.

    Raises:
        NotImplementedError: If attempting to visit an unrecognized object type.
    Cannot visit non-type: NotImplementedErrorr|  ra  r?   r?   rF   visitZ  s   r  c                 C  s   | | t|  |S )z-Visit a Schema with a concrete SchemaVisitor.)r   r  r   r  r?   r?   rF   rh  j     c                 C  sN   g }| j D ]}|| t|j|}|| |||| q|| |S )z1Visit a StructType with a concrete SchemaVisitor.)r6   r   r  r   r   r   rM   r2   )r|  ra  resultsrM   resultr?   r?   rF   rh  p  s   


c                 C  s0   | | j t| j|}|| j || |S )z/Visit a ListType with a concrete SchemaVisitor.)r   ri  r  rj  r   r   )r|  ra  r  r?   r?   rF   rh  ~  s   c                 C  sV   | | j t| j|}|| j || j t| j|}|| j |	| ||S )z.Visit a MapType with a concrete SchemaVisitor.)
r   rk  r  rl  r   r   rm  rn  r   r   )r|  ra  r   r   r?   r?   rF   rh    s   c                 C  
   | | S )z4Visit a PrimitiveType with a concrete SchemaVisitor.ro  r  r?   r?   rF   rh       
PreOrderSchemaVisitor[T]c                 C  re  )a\  Apply a schema visitor to any point within a schema.

    The function traverses the schema in pre-order fashion. This is a slimmed down version
    compared to the post-order traversal (missing before and after methods), mostly
    because we don't use the pre-order traversal much.

    Args:
        obj (Union[Schema, IcebergType]): An instance of a Schema or an IcebergType.
        visitor (PreOrderSchemaVisitor[T]): An instance of an implementation of the generic PreOrderSchemaVisitor base class.

    Raises:
        NotImplementedError: If attempting to visit an unrecognized object type.
    r~  r  r  r?   r?   rF   pre_order_visit  s   r  c                         fddS )z5Visit a Schema with a concrete PreOrderSchemaVisitor.c                     s   t   S rT   )r  r   r?   r  r?   rF   <lambda>  s    _.<locals>.<lambda>)r   r  r?   r  rF   rh    r  c                   s     |  fdd| jD S )z9Visit a StructType with a concrete PreOrderSchemaVisitor.c                   s   g | ]}t  fd d|qS )c                   s     | t fdd| S )Nc                   s   t | j S rT   )r  r   rM   ra  r?   rF   r        z0_.<locals>.<listcomp>.<lambda>.<locals>.<lambda>)rM   r   r  r  r?   rF   r    r|   z_.<locals>.<listcomp>.<lambda>)r   rK   r  r?   rF   rN     s    
z_.<locals>.<listcomp>)r2   r6   r  r?   r  rF   rh    s   
c                   r  )z7Visit a ListType with a concrete PreOrderSchemaVisitor.c                        t  jS rT   )r  rj  r?   r  r?   rF   r    r  r  )r   r  r?   r  rF   rh    r  c                   s"      fdd fddS )z6Visit a MapType with a concrete PreOrderSchemaVisitor.c                     r  rT   )r  rl  r?   r  r?   rF   r    r  r  c                     r  rT   )r  rn  r?   r  r?   rF   r    r  )r   r  r?   r  rF   rh    s   "c                 C  r  )z<Visit a PrimitiveType with a concrete PreOrderSchemaVisitor.ro  r  r?   r?   rF   rh    r  c                   @  sV   e Zd ZdZd'ddZd(d
dZd)ddZd*ddZd+ddZd,d d!Z	d-d$d%Z
d&S ).
_IndexByIdz@A schema visitor for generating a field ID to NestedField index.rG   r   c                 C  s
   i | _ d S rT   _indexrR   r?   r?   rF   rA     s   
z_IndexById.__init__r   r1   r   rq   c                 C  rp   rT   r  r   r?   r?   rF   r        z_IndexById.schemar2   r&   r   %builtins.list[dict[int, NestedField]]c                 C  rp   rT   r  r   r?   r?   rF   r2     r  z_IndexById.structrM   r#   r   c                 C  s   || j |j< | j S )zAdd the field ID to the index.)r  rn   r   r?   r?   rF   rM     s   z_IndexById.fieldr   r    r   c                 C  s   |j | j|j j< | jS )z%Add the list element ID to the index.)ri  r  rn   r   r?   r?   rF   r        z_IndexById.listr   r"   r   r   c                 C  s&   |j | j|j j< |j| j|jj< | jS )z=Add the key ID and value ID as individual items in the index.)rk  r  rn   rm  r   r?   r?   rF   r     s   z_IndexById.mapr  r$   c                 C  rp   rT   r  r  r?   r?   rF   r    r  z_IndexById.primitiveNrG   r   )r   r1   r   rq   rG   rq   )r2   r&   r   r  rG   rq   )rM   r#   r   rq   rG   rq   )r   r    r   rq   rG   rq   )r   r"   r   rq   r   rq   rG   rq   )r  r$   rG   rq   )r   r   r   r   rA   r   r2   rM   r   r   r  r?   r?   r?   rF   r    s    





r  rq   c                 C     t | t S )zGenerate an index of field IDs to NestedField instances.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[int, NestedField]: An index of field IDs to NestedField instances.
    )r  r  r_  r?   r?   rF   rs     r   rs   c                   @  sf   e Zd Zd*ddZd+ddZd+d	d
Zd,ddZd-ddZd.ddZd/ddZ	d0d#d$Z
d1d'd(Zd)S )2_IndexParentsrG   r   c                 C  s   i | _ g | _d S rT   )id_to_parentid_stackrR   r?   r?   rF   rA        
z_IndexParents.__init__rM   r#   c                 C  s   | j |j d S rT   )r  r   rn   r   r?   r?   rF   r        z_IndexParents.before_fieldc                 C  s   | j   d S rT   )r  r   r   r?   r?   rF   r     s   z_IndexParents.after_fieldr   r1   r   rv   c                 C  rp   rT   r  r   r?   r?   rF   r     r  z_IndexParents.schemar2   r&   r   builtins.list[dict[int, int]]c                 C  s:   |j D ]}| jr| jd nd }|d ur|| j|j< q| jS N)r6   r  r  rn   )rC   r2   r   rM   r   r?   r?   rF   r2     s   
z_IndexParents.structr   c                 C  rp   rT   r  r   r?   r?   rF   rM     r  z_IndexParents.fieldr   r    r   c                 C  s   | j d | j|j< | jS r  )r  r  
element_idr   r?   r?   rF   r     s   z_IndexParents.listr   r"   r   r   c                 C  s*   | j d | j|j< | j d | j|j< | jS r  )r  r  key_idvalue_idr   r?   r?   rF   r   !  s   z_IndexParents.mapr  r$   c                 C  rp   rT   r  r  r?   r?   rF   r  &  r  z_IndexParents.primitiveNr  r  )r   r1   r   rv   rG   rv   )r2   r&   r   r  rG   rv   )rM   r#   r   rv   rG   rv   )r   r    r   rv   rG   rv   )r   r"   r   rv   r   rv   rG   rv   )r  r$   rG   rv   )r   r   r   rA   r   r   r   r2   rM   r   r   r  r?   r?   r?   rF   r    s    





	

r  rv   c                 C  r  )zGenerate an index of field IDs to their parent field IDs.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[int, int]: An index of field IDs to their parent field IDs.
    )r  r  r  r?   r?   rF   rw   *  r   rw   c                   @  s   e Zd ZdZd@ddZdAdd	ZdAd
dZdBddZdBddZdCddZ	dCddZ
dDddZdEd d!ZdFd#d$ZdGd(d)ZdHd.d/ZdId4d5ZdJd8d9ZdKd:d;ZdLd=d>Zd?S )M_IndexByNamez?A schema visitor for generating a field name to field ID index.rG   r   c                 C  s"   i | _ i | _i | _g | _g | _d S rT   )r  _short_name_to_id_combined_index_field_names_short_field_namesrR   r?   r?   rF   rA   9  s
   
z_IndexByName.__init__r   r#   c                 C  s,   t |jts| j|j | j|j d S rT   rd   r   r&   r  r   rz   r  r   r?   r?   rF   r   @  s   z_IndexByName.before_map_valuec                 C  $   t |jts| j  | j  d S rT   rd   r   r&   r  r   r  r   r?   r?   rF   r   E     
z_IndexByName.after_map_valuer   c                 C  s,   t |jts| j|j | j|j dS )z@Short field names omit element when the element is a StructType.Nr  r   r?   r?   rF   r   J  s   z _IndexByName.before_list_elementc                 C  r  rT   r  r   r?   r?   rF   r   P  r  z_IndexByName.after_list_elementrM   c                 C  s    | j |j | j|j dS )zStore the field name.N)r  r   rz   r  r   r?   r?   rF   r   U  s   z_IndexByName.before_fieldc                 C  s   | j   | j  dS )z"Remove the last field name stored.N)r  r   r  r   r?   r?   rF   r   Z  s   
z_IndexByName.after_fieldr   r1   r   r<   c                 C  rp   rT   r  r   r?   r?   rF   r   _  r  z_IndexByName.schemar2   r&   r   builtins.list[dict[str, int]]c                 C  rp   rT   r  r   r?   r?   rF   r2   b  r  z_IndexByName.structr   c                 C  s   |  |j|j | jS )z Add the field name to the index.)
_add_fieldrz   rn   r  r   r?   r?   rF   rM   e  r  z_IndexByName.fieldr   r    r   c                 C  s   |  |jj|jj | jS )z'Add the list element name to the index.)r  ri  rz   rn   r  r   r?   r?   rF   r   j  s   z_IndexByName.listr   r"   r   r   c                 C  s.   |  |jj|jj |  |jj|jj | jS )zAAdd the key name and value name as individual items in the index.)r  rk  rz   rn   rm  r  r   r?   r?   rF   r   o  s   z_IndexByName.maprz   rH   rn   r9   c                 C  s   |}| j rdd| j |g}|| jv r%td| d| j|  d| || j|< | jr?dd| j|g}|| j|< dS dS )a  Add a field name to the index, mapping its full name to its field ID.

        Args:
            name (str): The field name.
            field_id (int): The field ID.

        Raises:
            ValueError: If the field name is already contained in the index.
        .z)Invalid schema, multiple fields for name z: z and N)r  rP   r  r   r  r  )rC   rz   rn   	full_name
short_namer?   r?   rF   r  u  s   

 
z_IndexByName._add_fieldr  r$   c                 C  rp   rT   r  r  r?   r?   rF   r    r  z_IndexByName.primitivec                 C  s   | j  }|| j |S )zReturn an index of combined full and short names.

        Note: Only short names that do not conflict with full names are included.
        )r  copyupdater  )rC   combined_indexr?   r?   rF   by_name  s   
z_IndexByName.by_namer   c                 C  s   dd | j  D }|S )z$Return an index of ID to full names.c                 S  s   i | ]\}}||qS r?   r?   )rL   r   r   r?   r?   rF   r{     r   z&_IndexByName.by_id.<locals>.<dictcomp>)r  r}   )rC   id_to_full_namer?   r?   rF   by_id  s   z_IndexByName.by_idNr  r  r  r  )r   r1   r   r<   rG   r<   )r2   r&   r   r  rG   r<   )rM   r#   r   r<   rG   r<   )r   r    r   r<   rG   r<   )r   r"   r   r<   r   r<   rG   r<   )rz   rH   rn   r9   rG   r   )r  r$   rG   r<   r   r   )r   r   r   r   rA   r   r   r   r   r   r   r   r2   rM   r   r   r  r  r  r  r?   r?   r?   rF   r  6  s$    














	r  r<   c                 C  s*   t | jdkrt }t| | | S tS )zGenerate an index of field names to field IDs.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[str, int]: An index of field names to field IDs.
    r   )r[   r6   r  r  r  r   r_  indexerr?   r?   rF   rB     s
   	
rB   r   c                 C  s   t  }t| | | S )zGenerate an index of field IDs full field names.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[str, int]: An index of field IDs to full names.
    )r  r  r  r  r?   r?   rF   r     s   	
r   c                   @  sL   e Zd ZdZd$ddZd%ddZd&ddZd'ddZd(ddZd)d!d"Z	d#S )*_BuildPositionAccessorsaP  A schema visitor for generating a field ID to accessor index.

    Example:
        >>> from pyiceberg.schema import Schema
        >>> from pyiceberg.types import *
        >>> schema = Schema(
        ...     NestedField(field_id=2, name="id", field_type=IntegerType(), required=False),
        ...     NestedField(field_id=1, name="data", field_type=StringType(), required=True),
        ...     NestedField(
        ...         field_id=3,
        ...         name="location",
        ...         field_type=StructType(
        ...             NestedField(field_id=5, name="latitude", field_type=FloatType(), required=False),
        ...             NestedField(field_id=6, name="longitude", field_type=FloatType(), required=False),
        ...         ),
        ...         required=True,
        ...     ),
        ...     schema_id=1,
        ...     identifier_field_ids=[1],
        ... )
        >>> result = build_position_accessors(schema)
        >>> expected = {
        ...     2: Accessor(position=0, inner=None),
        ...     1: Accessor(position=1, inner=None),
        ...     5: Accessor(position=2, inner=Accessor(position=0, inner=None)),
        ...     6: Accessor(position=2, inner=Accessor(position=1, inner=None))
        ...     3: Accessor(position=2, inner=None),
        ... }
        >>> result == expected
        True
    r   r1   r   dict[Position, Accessor]rG   c                 C     |S rT   r?   r   r?   r?   rF   r        z_BuildPositionAccessors.schemar2   r&   r   'builtins.list[dict[Position, Accessor]]c                 C  sX   i }t |jD ]"\}}|| r"||  D ]\}}t||d||< qt|||j< q|S )N)rv  )	enumerater6   r}   r   rn   )rC   r2   r   r  ru  rM   inner_field_idaccr?   r?   rF   r2     s   z_BuildPositionAccessors.structrM   r#   r   c                 C  r  rT   r?   r   r?   r?   rF   rM     r  z_BuildPositionAccessors.fieldr   r    r   c                 C     i S rT   r?   r   r?   r?   rF   r     r  z_BuildPositionAccessors.listr   r"   r   r   c                 C  r  rT   r?   r   r?   r?   rF   r     s   z_BuildPositionAccessors.mapr  r$   c                 C  r  rT   r?   r  r?   r?   rF   r    r  z!_BuildPositionAccessors.primitiveN)r   r1   r   r  rG   r  )r2   r&   r   r  rG   r  )rM   r#   r   r  rG   r  )r   r    r   r  rG   r  )r   r"   r   r  r   r  rG   r  )r  r$   rG   r  )
r   r   r   r   r   r2   rM   r   r   r  r?   r?   r?   rF   r    s    
 



r  r   c                 C  r  )zGenerate an index of field IDs to schema position accessors.

    Args:
        schema_or_type (Union[Schema, IcebergType]): A schema or type to index.

    Returns:
        Dict[int, Accessor]: An index of field IDs to accessors.
    )r  r  r  r?   r?   rF   r     r   r   next_idCallable[[], int] | Nonec                 C  s   t | t|dS )z'Traverses the schema, and sets new IDs.)next_id_func)r  _SetFreshIDs)r_  r  r?   r?   rF   assign_fresh_schema_ids
  r  r  c                   @  sl   e Zd ZU dZded< d1d2d	d
Zd3ddZd4ddZd5ddZd6d d!Z	d7d%d&Z
d8d+d,Zd9d/d0ZdS ):r  z>Traverses the schema and assigns monotonically increasing ids.rv   old_id_to_new_idNr  r  rG   r   c                   s4   i | _ td |d ur|| _d S  fdd| _d S )N   c                     s   t  S rT   )nextr?   counterr?   rF   r    s    z'_SetFreshIDs.__init__.<locals>.<lambda>)r  	itertoolscountr  )rC   r  r?   r  rF   rA     s   
$z_SetFreshIDs.__init__
current_idr9   c                 C  s   |   }|| j|< |S rT   )r  r  )rC   r  new_idr?   r?   rF   _get_and_increment  s   
z_SetFreshIDs._get_and_incrementr   r1   r   Callable[[], StructType]c                   s"   t | jd fdd|jD iS )Nr;   c                   s   g | ]} j | qS r?   )r  )rL   rn   rR   r?   rF   rN   !  r   z'_SetFreshIDs.schema.<locals>.<listcomp>)r1   r6   r;   r   r?   rR   rF   r     s
   z_SetFreshIDs.schemar2   r&   r   (builtins.list[Callable[[], IcebergType]]c              
     s^    fdd|j D }g }t||j |ddD ]\}}}|t||j| |j|jd qt| S )Nc                   s   g | ]}  |jqS r?   )r  rn   rK   rR   r?   rF   rN   %  rO   z'_SetFreshIDs.struct.<locals>.<listcomp>Trb   )rn   rz   r   r   doc)r6   rf   r   r#   rz   r   r  r&   )rC   r2   r   new_ids
new_fieldsrn   rM   r   r?   rR   rF   r2   $  s   	z_SetFreshIDs.structrM   r#   r   Callable[[], IcebergType]r   c                 C  s   | S rT   r?   r   r?   r?   rF   rM   3  r  z_SetFreshIDs.fieldr   r    r   c                 C  s   |  |j}t|| |jdS )N)r  r   element_required)r  r  r    r  )rC   r   r   r  r?   r?   rF   r   6  s   z_SetFreshIDs.listr   r"   r   r   c                 C  s0   |  |j}|  |j}t|| || |jdS )N)r  rl  r  rn  value_required)r  r  r  r"   r  )rC   r   r   r   r  r  r?   r?   rF   r   >  s   z_SetFreshIDs.mapr  r$   c                 C     |S rT   r?   r  r?   r?   rF   r  I  r  z_SetFreshIDs.primitiverT   )r  r  rG   r   )r  r9   rG   r9   )r   r1   r   r  rG   r1   )r2   r&   r   r  rG   r&   )rM   r#   r   r  rG   r   )r   r    r   r  rG   r    )r   r"   r   r  r   r  rG   r"   )r  r$   rG   r$   )r   r   r   r   r   rA   r  r   r2   rM   r   r   r  r?   r?   r?   rF   r    s   
 





r  rz   rH   c                 C  s   t | st| S | S )a  Make a field name compatible with Avro specification.

    This function sanitizes field names to comply with Avro naming rules:
    - Names must start with [A-Za-z_]
    - Subsequent characters must be [A-Za-z0-9_]

    Invalid characters are replaced with _xHHHH where HHHH is the hex code.
    Names starting with digits get a leading underscore.

    Args:
        name: The original field name

    Returns:
        A sanitized name that complies with Avro specification
    )_valid_avro_name_sanitize_name)rz   r?   r?   rF   make_compatible_nameN  s   r  r_   c                 C  sX   t | std| d }| s|dksdS | dd  D ]}| s)|dks) dS qdS )Nz Can not validate empty avro namer   rh  Fr  T)r[   r   isalphaisalnum)rz   first	characterr?   r?   rF   r  c  s   r  c                 C  s|   g }| d }|  s|dks|t| n|| | dd  D ]}| s3|dks3|t| q!|| q!d|S )Nr   rh  r   )r  r   _sanitize_charr  rP   )rz   sbr  r  r?   r?   rF   r  p  s   

r  r  c                 C  s,   |   rd|  S dtt| dd    S )Nrh  _x   )isdigithexordupper)r  r?   r?   rF   r    s   r  c                 C  s,   t |  t }t|pt j| j| jdS )a_  Sanitize column names to make them compatible with Avro.

    The column name should be starting with '_' or digit followed by a string only contains '_', digit or alphabet,
    otherwise it will be sanitized to conform the avro naming convention.

    Args:
        schema: The schema to be sanitized.

    Returns:
        The sanitized schema.
    r:   r;   )r  r   _SanitizeColumnsVisitorr1   r&   r6   r:   r;   )r   r  r?   r?   rF   sanitize_column_names  s   
r  c                   @  sH   e Zd Zd#ddZd$ddZd%ddZd&ddZd'ddZd(d d!Zd"S ))r  r   r1   r   IcebergType | NonerG   c                 C  r  rT   r?   r   r?   r?   rF   r     r  z_SanitizeColumnsVisitor.schemarM   r#   r   c                 C  s   t |jt|j||j|jdS )Nrn   rz   r   r  r   )r#   rn   r  rz   r  r   r   r?   r?   rF   rM     s   z_SanitizeColumnsVisitor.fieldr2   r&   r   !builtins.list[IcebergType | None]c                 C  s   t dd |D  S )Nc                 S  s   g | ]}|d ur|qS rT   r?   rK   r?   r?   rF   rN     rO   z2_SanitizeColumnsVisitor.struct.<locals>.<listcomp>)r&   r   r?   r?   rF   r2     r  z_SanitizeColumnsVisitor.structr   r    r   c                 C  s   t |j||jdS N)r  rj  r  )r    r  r  r   r?   r?   rF   r     r  z_SanitizeColumnsVisitor.listr   r"   r   r   c                 C  s   t |j|j|||jdS N)r  r  rl  rn  r  )r"   r  r  r  r   r?   r?   rF   r     s   z_SanitizeColumnsVisitor.mapr  r$   c                 C  r  rT   r?   r  r?   r?   rF   r    r  z!_SanitizeColumnsVisitor.primitiveNr   r1   r   r  rG   r  rM   r#   r   r  rG   r  r2   r&   r   r  rG   r  r   r    r   r  rG   r  r   r"   r   r  r   r  rG   r  r  r$   rG   r  )	r   r   r   r   rM   r2   r   r   r  r?   r?   r?   rF   r    s    


	

	r  selectedr   select_full_typesc                 C  s:   t |  t||}t|pt j| jt|| j	dS )a  Prunes a column by only selecting a set of field-ids.

    Args:
        schema: The schema to be pruned.
        selected: The field-ids to be included.
        select_full_types: Return the full struct when a subset is recorded

    Returns:
        The pruned schema.
    r  )
r  r   _PruneColumnsVisitorr1   r&   r6   r:   r   intersectionr;   )r   r  r  r  r?   r?   rF   r     s   
r   c                   @  s   e Zd ZU ded< ded< d1ddZd2ddZd3ddZd4ddZd5ddZd6d"d#Z	d7d&d'Z
ed8d)d*Zed9d,d-Zed:d.d/Zd0S );r  r   r  r_   r  c                 C  s   || _ || _d S rT   )r  r  )rC   r  r  r?   r?   rF   rA     r  z_PruneColumnsVisitor.__init__r   r1   r   r  rG   c                 C  r  rT   r?   r   r?   r?   rF   r     r  z_PruneColumnsVisitor.schemar2   r&   r   r  c           	   
   C  s   |j }g }d}t|D ])\}}|| }|j|kr|| q|d ur4d}|t|j|j||j|jd q|rIt	|t	|krE|du rE|S t
| S d S )NTFr  )r6   r  r   r   r#   rn   rz   r  r   r[   r&   )	rC   r2   r   r6   selected_fields	same_typeidxprojected_typerM   r?   r?   rF   r2     s0   

z_PruneColumnsVisitor.structrM   r#   r   c                 C  sj   |j | jv r-| jr|jS |jjr| |S |jjs*td|j  d|j d|j d|jS |d ur3|S d S )N-Cannot explicitly project List or Map types, :	 of type  was selected)	rn   r  r  r   r   _project_selected_structr   r   rz   r   r?   r?   rF   rM     s&   
z_PruneColumnsVisitor.fieldr   r    r   c                 C  sx   |j | jv r0| jr|S |jr|jjr| |}| ||S |jjs.td|j  d|j d|S |d ur:| ||S d S )Nr  r  r  )	r  r  r  rj  r   r  _project_listr   r   )rC   r   r   projected_structr?   r?   rF   r   
  s$   
z_PruneColumnsVisitor.listr   r"   r   r   c                 C  s   |j | jv r0| jr|S |jr|jjr| |}| ||S |jjs.td|j  d|j d|S |d ur:| ||S |j	| jv rB|S d S )Nz7Cannot explicitly project List or Map types, Map value r  r  )
r  r  r  rn  r   r  _project_mapr   r   r  )rC   r   r   r   r	  r?   r?   rF   r     s(   
z_PruneColumnsVisitor.mapr  r$   c                 C  s   d S rT   r?   r  r?   r?   rF   r  0  r  z_PruneColumnsVisitor.primitiveprojected_fieldc                 C  s(   | rt | tstd| d u rt S | S )NzExpected a struct)rd   r&   r   )r  r?   r?   rF   r  3  s
   z-_PruneColumnsVisitor._project_selected_structr   c                 C  s    | j |kr| S t| j|| jdS r  )rj  r    r  r  )r   r   r?   r?   rF   r  =  s
   

z"_PruneColumnsVisitor._project_listc                 C  s(   | j |kr| S t| j| j| j|| jdS r  )rn  r"   r  r  rl  r  )r   r   r?   r?   rF   r
  F  s   
z!_PruneColumnsVisitor._project_mapN)r  r   r  r_   r  r  r  r  r  r  )r  r  rG   r&   )r   r    r   r   rG   r    )r   r"   r   r   rG   r"   )r   r   r   r   rA   r   r2   rM   r   r   r  staticmethodr  r  r
  r?   r?   r?   rF   r    s    
 






	r  	file_typer   	read_typec                 C  s    | |kr| S t d|  d| )a  Promotes reading a file type to a read type.

    Args:
        file_type (IcebergType): The type of the Avro file.
        read_type (IcebergType): The requested read type.

    Raises:
        ResolveError: If attempting to resolve an unrecognized object type.
    Cannot promote  to r   r  r  r?   r?   rF   promoteT  s   r  r   c                 C     t |tr|S td| )NzCannot promote an int to )rd   r!   r   r  r?   r?   rF   rh  e     
r   c                 C  r  )NzCannot promote an float to )rd   r   r   r  r?   r?   rF   rh  n  r  r%   c                 C  r  )NzCannot promote an string to )rd   r   r   r  r?   r?   rF   rh  w     
r   c                 C  r  )NzCannot promote an binary to )rd   r%   r   r  r?   r?   rF   rh    r  r   c                 C  sH   t |tr| j|jkr| j| jkr|S td|  d| td| )NzCannot reduce precision from r  zCannot promote an decimal to )rd   r   	precisionscaler   r  r?   r?   rF   rh    s
   
r   c                 C  s.   t |trt| dkr|S td|  d| )N   r  r  )rd   r-   r[   r   r  r?   r?   rF   rh    s   r,   c                 C  s"   t |tr|S td|  d| )Nr  r  )rd   r$   r   r  r?   r?   rF   rh    s   
requested_schemaprovided_schemar   c                 C  s   t | t| dS )aZ  
    Check if the `provided_schema` is compatible with `requested_schema`.

    Both Schemas must have valid IDs and share the same ID for the same field names.

    Two schemas are considered compatible when:
    1. All `required` fields in `requested_schema` are present and are also `required` in the `provided_schema`
    2. Field Types are consistent for fields that are present in both schemas. I.e. the field type
       in the `provided_schema` can be promoted to the field type of the same field ID in `requested_schema`

    Raises:
        ValueError: If the schemas are not compatible.
    N)r  _SchemaCompatibilityVisitor)r  r  r?   r?   rF   _check_schema_compatible  s   r  c                   @  sf   e Zd ZU ded< d*ddZd+d	d
Zd,ddZd-ddZd.ddZd/ddZ	d0d#d$Z
d1d'd(Zd)S )2r  r1   r  c                 C  s`   ddl m} ddlm} || _|ddd| _| jd | jd | jd	 |dd
| _d S )Nr   )Console)TableTbold)show_headerheader_styler  zTable fieldzDataframe field)record)rich.consoler  
rich.tabler  r  
rich_table
add_columnconsole)rC   r  r  	RichTabler?   r?   rF   rA     s   z$_SchemaCompatibilityVisitor.__init__r`   r#   rG   r_   c              
     s  z	| j  jW n$ ty-    jr | jdt d Y dS | jdt d Y dS w  jrBjsB| jdt t dS  jjkrV| jdt t dS t	 fddt
tthD rs| jdt t dS ztj j | jdt t W dS  ty   tjtrt jtsd j d	}nd
 j d}| jdt t d|  Y dS | jdt t Y dS w )Nu   ❌MissingFu   ✅Tc                 3  s(    | ]}t  j|ot j|V  qd S rT   )rd   r   )rL   container_typer`   ra   r?   rF   rW     s
    
zC_SchemaCompatibilityVisitor._is_field_compatible.<locals>.<genexpr>zANull type (UnknownType) cannot be promoted to non-primitive type za. UnknownType can only be promoted to primitive types (string, int, boolean, etc.) in V3+ tables.z.Null type (UnknownType) cannot be promoted to zi. This may be due to table format version limitations (V1/V2 tables don't support UnknownType promotion).z - )r  r   rn   r   r   r%  add_rowrH   r   anyr&   r"   r    r  r   rd   r,   r$   )rC   r`   	error_msgr?   r+  rF   _is_field_compatible  sH   "z0_SchemaCompatibilityVisitor._is_field_compatibler   r   Callable[[], bool]c                 C  s0   |  }s| j | j td| j   |S )NzMismatch in fields:
)r'  printr%  r   export_text)rC   r   r   r  r?   r?   rF   r     s   
z"_SchemaCompatibilityVisitor.schemar2   r&   r   !builtins.list[Callable[[], bool]]c                 C  s   dd |D }t |S )Nc                 S  s   g | ]}| qS r?   r?   )rL   r  r?   r?   rF   rN     s    z6_SchemaCompatibilityVisitor.struct.<locals>.<listcomp>)re   )rC   r2   r   r  r?   r?   rF   r2     s   z"_SchemaCompatibilityVisitor.structrM   r   c                 C  s&   |  |}|j| jjvr|S |o| S rT   )r/  rn   r  rt   )rC   rM   r   is_compatibler?   r?   rF   rM     s   

z!_SchemaCompatibilityVisitor.fieldr   r    r   c                 C  s   |  |jo| S rT   )r/  ri  r   r?   r?   rF   r     r  z _SchemaCompatibilityVisitor.listr   r"   r   r   c                 C  s$   t | |j| |j| | gS rT   )re   r/  rk  rm  r   r?   r?   rF   r     s   

z_SchemaCompatibilityVisitor.mapr  r$   c                 C  r   )NTr?   r  r?   r?   rF   r    r  z%_SchemaCompatibilityVisitor.primitiveN)r  r1   )r`   r#   rG   r_   )r   r1   r   r0  rG   r_   )r2   r&   r   r3  rG   r_   )rM   r#   r   r0  rG   r_   )r   r    r   r0  rG   r_   )r   r"   r   r0  r   r0  rG   r_   )r  r$   rG   r_   )r   r   r   r   rA   r/  r   r2   rM   r   r   r  r?   r?   r?   rF   r    s   
 


9




r  )
r_  r`  r5  r0   ra  rb  rc  rd  rG   r/   )
r   r1   r5  r0   ra  rg  rc  rd  rG   r/   )
r2   r&   r5  r0   ra  rg  rc  rd  rG   r/   )
r   r    r5  r0   ra  rg  rc  rd  rG   r/   )
r   r"   r5  r0   ra  rg  rc  rd  rG   r/   )
r  r$   r5  r0   ra  rg  rh  rd  rG   r/   )r|  r`  ra  r}  rG   r/   )r|  r1   ra  r}  rG   r/   )r|  r&   ra  r}  rG   r/   )r|  r    ra  r}  rG   r/   )r|  r"   ra  r}  rG   r/   )r|  r$   ra  r}  rG   r/   )r|  r`  ra  r  rG   r/   )r|  r1   ra  r  rG   r/   )r|  r&   ra  r  rG   r/   )r|  r    ra  r  rG   r/   )r|  r"   ra  r  rG   r/   )r|  r$   ra  r  rG   r/   )r_  r`  rG   rq   )r_  r`  rG   rv   )r_  r`  rG   r<   )r_  r`  rG   r   )r_  r`  rG   r   rT   )r_  r`  r  r  rG   r1   )rz   rH   rG   rH   )rz   rH   rG   r_   )r  rH   rG   rH   )r   r1   rG   r1   r   )r   r1   r  r   r  r_   rG   r1   )r  r   r  r   rG   r   )r  r   r  r   rG   r   )r  r   r  r   rG   r   )r  r%   r  r   rG   r   )r  r   r  r   rG   r   )r  r   r  r   rG   r   )r  r   r  r   rG   r   )r  r,   r  r   rG   r   )r  r1   r  r1   rG   r   )h
__future__r   builtinsr  abcr   r   collections.abcr   dataclassesr   	functoolsr   r   r	   typingr
   r   r   r   r   pydanticr   r   r   pyiceberg.exceptionsr   pyiceberg.typedefr   r   r   pyiceberg.typesr   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   pyarrowpar   r.   r/   r0   r   FIELD_ID_PROPICEBERG_FIELD_NAME_PROPr1   r   r  r  r!  rV  rf  registerrh  rp  r   r  r  dictr9   r  rs   r  rw   rH   r  rB   r   Positionr  r   r  r  r  r  r  r  r  r  r   r  r  r  r_   r  r?   r?   r?   rF   <module>   s   d  188mm!	
"
'
h

>
?



  

