o
    <i                    @  s	  d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlZddlmZmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZmZm Z m!Z! ddl"m#Z# ddl$Z%ddl&m'Z( ddl)m*Z+ ddl,Z$ddl-m.Z/ ddl$m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6 ddl7m8Z8 ddl9m:Z: ddl;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZB ddlCmDZD ddlEmFZFmGZGmHZHmIZI ddlEmJZK ddlLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[m\Z\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZdmeZemfZfmgZgmhZhmiZimjZjmkZkmlZlmmZmmnZnmoZompZpmqZqmrZrmsZsmtZtmuZumvZvmwZw ddlxmyZymzZzm{Z{ ddl|m}Z}m~Z~mZmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmJZJmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZ ddlmZmZmZmZ dd lmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ dd!lmZ dd"lmZ dd#lmZ dd$lmZ dd%lmZmZmZ dd&lmZ dd'lmZmZ erdd(lmZmZ eeСZd)Zd*Zd+Zd,Zd-Zd.Zd/Zd0Zd1Zd2Zd3Zh d4Ze d5Zed/d:d;Zߐd0d>d?ZG d@dA dAeZG dBdC dCe$jjZG dDdE dEetevZG dFdG dGesZedHe{jfd1dRdSZG dTdU dUee%j Zd2dZd[ZG d\d] d]eFe(j ZG d^d_ d_eFd Zd3d4dddeZd3d4dfdgZed5dkdlZd6drdsZd7dzd{Zdd|ejfd8ddZd|ejfd9ddZd:ddZd:ddZed;ddZee%jd<ddZee%jd=ddZee%jee%jee%jd>ddZee%jd?ddZee%jd@ddZee%j dAddZee%jdBddZG dd dee eZdCddZG dd dee ZG dd deeeB  ZG dd deee%jB  ZG dd deee%jB  ZG dd deZdDddZ	ddejdfdEddÄZ
dFddȄZG ddʄ dʃZd|d|ed|fdGdd҄ZG ddԄ dee%je%jdB f ZG ddք dee%j ZdHddلZG ddۄ dee Ze ZG dd݄ d݃ZdސZdߐZG dd deZedHdG dd deɃZdIddZedHdG dd dZG dd deee  ZdJddZedHdG dd dZG dd deee  Z dKddZ!edHdG dd dZ"dLddZ#dMddZ$dNd
dZ%d|ejfdOddZ&dPddZ'dQddZ(dZ)dZ*dRddZ+		dSdTd"d#Z,edHdG d$d% d%Z-dUd*d+Z.dVd-d.Z/dS (W  au  FileIO implementation for reading and writing table files that uses pyarrow.fs.

This file contains a FileIO implementation that relies on the filesystem interface provided
by PyArrow. It relies on PyArrow's `from_uri` method that infers the correct filesystem
type to use. Theoretically, this allows the supported storage types to grow naturally
with the pyarrow library.
    )annotationsN)ABCabstractmethod)CallableIterableIterator)copy)	dataclass)Enum)	lru_cachesingledispatch)TYPE_CHECKINGAnyGenericTypeVarcast)urlparse)ChunkedArray)S3RetryStrategy)FileInfo
FileSystemFileType)to_bytes)ResolveError)
AlwaysTrueBooleanExpression
BoundIsNaNBoundIsNull	BoundTermNotOr)Literal)BoundBooleanExpressionVisitorbindextract_field_idstranslate_column_names)visit)+ADLS_ACCOUNT_KEYADLS_ACCOUNT_NAMEADLS_BLOB_STORAGE_AUTHORITYADLS_BLOB_STORAGE_SCHEMEADLS_CLIENT_IDADLS_CLIENT_SECRETADLS_DFS_STORAGE_AUTHORITYADLS_DFS_STORAGE_SCHEMEADLS_SAS_TOKENADLS_TENANT_IDAWS_ACCESS_KEY_ID
AWS_REGIONAWS_ROLE_ARNAWS_ROLE_SESSION_NAMEAWS_SECRET_ACCESS_KEYAWS_SESSION_TOKENGCS_DEFAULT_LOCATIONGCS_SERVICE_HOST	GCS_TOKENGCS_TOKEN_EXPIRES_AT_MS	HDFS_HOSTHDFS_KERB_TICKET	HDFS_PORT	HDFS_USERS3_ACCESS_KEY_IDS3_ANONYMOUSS3_CONNECT_TIMEOUTS3_ENDPOINTS3_FORCE_VIRTUAL_ADDRESSINGS3_PROXY_URI	S3_REGIONS3_REQUEST_TIMEOUTS3_RESOLVE_REGIONS3_RETRY_STRATEGY_IMPLS3_ROLE_ARNS3_ROLE_SESSION_NAMES3_SECRET_ACCESS_KEYS3_SESSION_TOKENFileIO	InputFileInputStream
OutputFileOutputStream)DataFileDataFileContent
FileFormat)PartitionFieldPartitionFieldValuePartitionKeyPartitionSpecpartition_record_value)PartnerAccessorPreOrderSchemaVisitorSchemaSchemaVisitorPerPrimitiveTypeSchemaWithPartnerVisitor_check_schema_compatiblebuild_position_accessorspre_order_visitpromoteprune_columnssanitize_column_namesr&   visit_with_partner$DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITETableProperties)load_location_provider)TableMetadata)NameMappingapply_name_mapping)
PuffinFile)IdentityTransformTruncateTransform)
EMPTY_DICT
PropertiesRecordTableVersion)
BinaryTypeBooleanTypeDateTypeDecimalType
DoubleType	FixedType	FloatTypeIcebergTypeIntegerTypeListTypeLongTypeMapTypeNestedFieldPrimitiveType
StringType
StructTypeTimestampNanoTypeTimestampTypeTimestamptzNanoTypeTimestamptzTypeTimeTypeUnknownTypeUUIDType	strtobool)ExecutorFactory)Config)millis_to_datetime)unscaled_to_decimal)get_first_property_valueproperty_as_boolproperty_as_int)	Singleton)"truncate_upper_bound_binary_string truncate_upper_bound_text_string)FileScanTask	WriteTaski   zbuffer-sizes   iceberg.schemas   PARQUET:field_ids
   iceberg.ids   iceberg.requireds   docelementkeyvaluedoc>   +00:00Etc/UTCZUTCTbucketstrreturn
str | Nonec              	   C  sB   ddl m} z|| dW S  ttfy    td|   Y d S w )Nr   )resolve_s3_regionr   z$Unable to resolve region for bucket )
pyarrow.fsr   OSError	TypeErrorloggerwarning)r   r    r   Q/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/pyiceberg/io/pyarrow.py_cached_resolve_s3_region   s   r   implS3RetryStrategy | Nonec              	   C  s   z.|  d}t|dk rtd|  d|d d |d }}t|}t||}| W S  ttfyD   t	j
d|  dd Y d S w )N.   zMretry-strategy-impl should be full path (module.CustomS3RetryStrategy), got: z(Could not initialize S3 retry strategy: 
stacklevel)splitlen
ValueErrorjoin	importlibimport_modulegetattrModuleNotFoundErrorAttributeErrorwarningswarn)r   
path_partsmodule_name
class_namemoduleclass_r   r   r   _import_retry_strategy   s   


r   c                      s"   e Zd ZdZd fddZ  ZS )	UnsupportedPyArrowTypeExceptionz:Cannot convert PyArrow type to corresponding Iceberg type.fieldpa.Fieldargsr   c                   s   || _ t j|  d S N)r   super__init__)selfr   r   	__class__r   r   r      s   z(UnsupportedPyArrowTypeException.__init__)r   r   r   r   )__name__
__module____qualname____doc__r   __classcell__r   r   r   r   r      s    r   c                      s   e Zd Zd
 fdd	Z  ZS )PyArrowLocalFileSystempathr   r   r   kwargsr   pyarrow.NativeFilec                   s0   | j tj|dd t j|g|R i |S )NT)	recursive)
create_dirosr   dirnamer   open_output_stream)r   r   r   r   r   r   r   r      s   z)PyArrowLocalFileSystem.open_output_stream)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r      s    r   c                      s   e Zd ZU dZded< ded< ded< efd# fddZd$ddZd%ddZd&ddZ	d'd(ddZ
d)d*dd Zd+d!d"Z  ZS ),PyArrowFileaA  A combined InputFile and OutputFile implementation using pyarrow filesystem.

    This class generates pyarrow.lib.NativeFile instances.

    Args:
        location (str): A URI or a path to a local file.

    Attributes:
        location(str): The URI or path to a local file for a PyArrowFile instance.

    Examples:
        >>> from pyiceberg.io.pyarrow import PyArrowFile
        >>> # input_file = PyArrowFile("s3://foo/bar.txt")
        >>> # Read the contents of the PyArrowFile instance
        >>> # Make sure that you have permissions to read/write
        >>> # file_content = input_file.open().read()

        >>> # output_file = PyArrowFile("s3://baz/qux.txt")
        >>> # Write bytes to a file
        >>> # Make sure that you have permissions to read/write
        >>> # output_file.create().write(b'foobytes')
    r   _filesystemr   _pathint_buffer_sizelocationr   fsbuffer_sizec                   s$   || _ || _|| _t j|d d S )N)r   )r   r   r   r   r   )r   r   r   r   r   r   r   r   r     s   zPyArrowFile.__init__r   r   c              
   C  sv   z	| j | j}W n! ty* } z|jdksdt|v r%td| j | d}~ww |jt	j
kr9td| j |S )zRetrieve a pyarrow.fs.FileInfo object for the location.

        Raises:
            PermissionError: If the file at self.location cannot be accessed due to a permission error such as
                an AWS error code 15.
           AWS Error [code 15]z%Cannot get file info, access denied: Nz&Cannot get file info, file not found: )r   get_file_infor   r   errnor   PermissionErrorr   typer   NotFoundFileNotFoundError)r   	file_infoer   r   r   
_file_info  s   zPyArrowFile._file_infoc                 C  s   |   }|jS )z.Return the total length of the file, in bytes.)r   size)r   r   r   r   r   __len__,  s   zPyArrowFile.__len__boolc                 C  s$   z|    W dS  ty   Y dS w )z"Check whether the location exists.TF)r   r   r   r   r   r   exists1  s   zPyArrowFile.existsTseekablerO   c              
   C  s   z|r| j | j}W |S | j j| j| jd}W |S  ttfy#     tyW } z)|jdks5dt	|v r>td| j
 ||jdksIdt	|v rRtd| j
 | d}~ww )	a!  Open the location using a PyArrow FileSystem inferred from the location.

        Args:
            seekable: If the stream should support seek, or if it is consumed sequential.

        Returns:
            pyarrow.lib.NativeFile: A NativeFile instance for the file located at `self.location`.

        Raises:
            FileNotFoundError: If the file at self.location does not exist.
            PermissionError: If the file at self.location cannot be accessed due to a permission error such as
                an AWS error code 15.
        r   r   Path does not existz"Cannot open file, does not exist: r   r   z!Cannot open file, access denied: N)r   open_input_filer   open_input_streamr   r   r   r   r   r   r   )r   r   
input_filer   r   r   r   open9  s    	zPyArrowFile.openF	overwriterQ   c              
   C  s   z|s|   du rtd| j | jj| j| jd}W |S  ty%     tyE } z|j	dks7dt
|v r@td| j | d}~ww )a  Create a writable pyarrow.lib.NativeFile for this PyArrowFile's location.

        Args:
            overwrite (bool): Whether to overwrite the file if it already exists.

        Returns:
            pyarrow.lib.NativeFile: A NativeFile instance for the file located at self.location.

        Raises:
            FileExistsError: If the file already exists at `self.location` and `overwrite` is False.

        Note:
            This retrieves a pyarrow NativeFile by opening an output stream. If overwrite is set to False,
            a check is first performed to verify that the file does not exist. This is not thread-safe and
            a possibility does exist that the file can be created by a concurrent process after the existence
            check yet before the output stream is created. In such a case, the default pyarrow behavior will
            truncate the contents of the existing file when opening the output stream.
        Tz$Cannot create file, already exists: r   r   r   z#Cannot create file, access denied: N)r   FileExistsErrorr   r   r   r   r   r   r   r   r   )r   r   output_filer   r   r   r   createV  s   zPyArrowFile.createc                 C  s   | S )ar  Return a new PyArrowFile for the location of an existing PyArrowFile instance.

        This method is included to abide by the OutputFile abstract base class. Since this implementation uses a single
        PyArrowFile class (as opposed to separate InputFile and OutputFile implementations), this method effectively returns
        a copy of the same instance.
        r   r   r   r   r   to_input_fileu  s   zPyArrowFile.to_input_file)r   r   r   r   r   r   r   r   )r   r   )r   r   )r   r   )T)r   r   r   rO   )F)r   r   r   rQ   )r   r   )r   r   r   r   __annotations__ONE_MEGABYTEr   r   r   r   r   r  r  r   r   r   r   r   r      s   
 


r   c                      s   e Zd ZU ded< efd/ fddZeefd0ddZd1d2ddZd3ddZ	d4ddZ
d3ddZd2ddZd3ddZd3ddZd5d!d"Zd5d#d$Zd6d'd(Zd7d*d+Zd8d-d.Z  ZS )9PyArrowFileIOz'Callable[[str, str | None], FileSystem]fs_by_scheme
propertiesrq   c                   s   t | j| _t j|d d S )N)r  )r   _initialize_fsr  r   r   )r   r  r   r   r   r     s   zPyArrowFileIO.__init__r   r   r   tuple[str, str, str]c                 C  sl   t | }|js|dd}|dd}||tj| fS |jdv r)|j|j|jfS |j|j|j |j fS )zReturn (scheme, netloc, path) for the given location.

        Uses DEFAULT_SCHEME and DEFAULT_NETLOC if scheme/netloc are missing.
        DEFAULT_SCHEMEfileDEFAULT_NETLOC )hdfsviewfs)r   schemegetr   r   abspathnetloc)r   r  uridefault_schemedefault_netlocr   r   r   parse_location  s   
zPyArrowFileIO.parse_locationNr  r  r   r   c                 C  st   |dv r|   S |dv r| |S |dv r| ||S |dv r#|  S |dv r+|  S |dv r3|  S td| )z+Initialize FileSystem for different scheme.>   oss>   s3s3as3n>   r  r  >   gsgcs>   abfswasbabfsswasbs>   r  z%Unrecognized filesystem type in URI: )_initialize_oss_fs_initialize_s3_fs_initialize_hdfs_fs_initialize_gcs_fs_initialize_azure_fs_initialize_local_fsr   )r   r  r  r   r   r   r    s   
zPyArrowFileIO._initialize_fsc           	   	   C  s  ddl m} | jtt| jttt| jtt	t| jt
tt| jttt| jtdd}| jt }r8||d< | jt }rFt||d< | jt }rTt||d< t| jtt }ra||d< t| jtt }rn||d	< | jt }r|t||d
< |di |S )Nr   S3FileSystemT)endpoint_override
access_key
secret_keysession_tokenregionforce_virtual_addressingproxy_optionsconnect_timeoutrequest_timeoutrole_arnsession_name	anonymousr   )r   r)  r  r  rB   r   r?   r1   rK   r5   rL   r6   rE   r2   r   rC   rD   rA   floatrF   rI   r3   rJ   r4   r@   r   )	r   r)  client_kwargs	proxy_urir1  r2  r3  r4  s3_anonymousr   r   r   r"    s*   
	z PyArrowFileIO._initialize_oss_fsc                 C  s  ddl m} t| jtt}|d u st| jtddu r8t|dp |}|d ur7||kr7t	
d| d| d|  n|}| jtt| jttt| jttt| jtt|d	}| jt }ra||d
< | jt }rot||d< | jt }r}t||d< t| jtt }	r|	|d< t| jtt }
r|
|d< | jtd urt| jtd|d< | jt }rt| }r||d< | jt }rt||d< |di |S )Nr   r(  FTr   z6PyArrow FileIO overriding S3 bucket region for bucket z: provided region z, actual region )r*  r+  r,  r-  r.  r0  r1  r2  r3  r4  r/  retry_strategyr5  r   ) r   r)  r   r  rE   r2   r   rG   r   r   r   r  rB   r?   r1   rK   r5   rL   r6   rD   rA   r6  rF   rI   r3   rJ   r4   rC   rH   r   r@   r   )r   r  r)  provided_regionbucket_regionr7  r8  r1  r2  r3  r4  retry_strategy_implretry_instancer9  r   r   r   r#    sN   

zPyArrowFileIO._initialize_s3_fsc                   s  ddl m} d}|tj||k rtd| dtj dddlm} i  | j	t
 }r3| d< | j	t }r?| d	< | j	t }rK| d
< | j	t }rW| d< | j	t }rc| d< | j	t }	ro|	 d< | j	t }
r{|
 d< | j	t }r| d< | j	t }r| d< | j	t }r| d< g d} fdd|D }|rt|t|krɇ fdd|D }td| d| |di  S )Nr   )versionz20.0.0zpyarrow version >= z9 required for AzureFileSystem support, but found version r   )AzureFileSystemaccount_nameaccount_keyblob_storage_authoritydfs_storage_authorityblob_storage_schemedfs_storage_scheme	sas_token	client_idclient_secret	tenant_id)rH  rI  rJ  c                   s   g | ]}| v r|qS r   r   .0r   r7  r   r   
<listcomp>1      z6PyArrowFileIO._initialize_azure_fs.<locals>.<listcomp>c                   s   g | ]}| vr|qS r   r   rK  rM  r   r   rN  3  rO  zclient_id, client_secret, and tenant_id must all be provided together to use ClientSecretCredential for Azure authentication. Provided: z, Missing: r   )	packagingr?  parsepyarrow__version__ImportErrorr   r@  r  r  r(   r'   r)   r-   r*   r.   r/   r+   r,   r0   r   r   )r   r?  'MIN_PYARROW_VERSION_SUPPORTING_AZURE_FSr@  rA  rB  rC  rD  rE  rF  rG  rH  rI  rJ  credential_keysprovided_keysmissing_keysr   rM  r   r&    sT   z"PyArrowFileIO._initialize_azure_fsc           	      C  s   ddl m} i }|r|| d| S | jt }r ||d< | jt }r.t||d< | jt }r:||d< | jt	 }rF||d< |di |S )	Nr   )HadoopFileSystemz://hostportuserkerb_ticketr   )
r   rY  from_urir  r  r;   r=   r   r>   r<   )	r   r  r  rY  hdfs_kwargsrZ  r[  r\  r]  r   r   r   r$  <  s   z!PyArrowFileIO._initialize_hdfs_fsc                 C  s   ddl m} i }| jt }r||d< | jt }r$tt||d< | jt }r0||d< | jt	 }rFt
|}|j|d< |j|d< |di |S )	Nr   )GcsFileSystemaccess_tokencredential_token_expirationdefault_bucket_locationr  r*  r   )r   r`  r  r  r9   r:   r   r   r7   r8   r   r  r  )r   r`  
gcs_kwargsra  
expirationbucket_locationendpoint	url_partsr   r   r   r%  N  s   

z PyArrowFileIO._initialize_gcs_fsc                 C  s   t  S r   )r   r   r   r   r   r'  _     z"PyArrowFileIO._initialize_local_fsr   c              	   C  :   |  || j\}}}t| ||||t| jttdS )zGet a PyArrowFile instance to read bytes from the file at the given location.

        Args:
            location (str): A URI or a path to a local file.

        Returns:
            PyArrowFile: A PyArrowFile instance for the given location.
        r   r   r   r   r  r  r   r  r   r  BUFFER_SIZEr  r   r   r  r  r   r   r   r   	new_inputb     	
zPyArrowFileIO.new_inputc              	   C  rj  )zGet a PyArrowFile instance to write bytes to the file at the given location.

        Args:
            location (str): A URI or a path to a local file.

        Returns:
            PyArrowFile: A PyArrowFile instance for the given location.
        rk  rl  rn  r   r   r   
new_outputs  rp  zPyArrowFileIO.new_outputstr | InputFile | OutputFileNonec              
   C  s   t |ttfr
|jn|}| || j\}}}| ||}z|| W dS  ty,     t	y3     t
ye } z'|jdksEdt|v rMtd| ||jdksXdt|v r`t	d| | d}~ww )a=  Delete the file at the given location.

        Args:
            location (Union[str, InputFile, OutputFile]): The URI to the file--if an InputFile instance or
                an OutputFile instance is provided, the location attribute for that instance is used as
                the location to delete.

        Raises:
            FileNotFoundError: When the file at the provided location does not exist.
            PermissionError: If the file at the provided location cannot be accessed due to a permission error such as
                an AWS error code 15.
        r   r   z$Cannot delete file, does not exist: r   r   z#Cannot delete file, access denied: N)
isinstancerN   rP   r   r  r  r  delete_filer   r   r   r   r   )r   r   str_locationr  r  r   r   r   r   r   r   delete  s"   zPyArrowFileIO.deletedict[str, Any]c                 C  s   t | j}d|d< |S )zCCreate a dictionary of the PyArrowFileIO fields used when pickling.Nr  )r   __dict__)r   fileio_copyr   r   r   __getstate__  s   
zPyArrowFileIO.__getstate__statec                 C  s   || _ t| j| _dS )z4Deserialize the state into a PyArrowFileIO instance.N)ry  r   r  r  )r   r|  r   r   r   __setstate__  s   zPyArrowFileIO.__setstate__)r  rq   )r   r   r  rq   r   r	  r   )r  r   r  r   r   r   )r   r   )r  r   r   r   )r   r   r   r   )r   rr  r   rs  r   rx  )r|  rx  r   rs  )r   r   r   r  rp   r   staticmethodr  r  r"  r#  r&  r$  r%  r'  ro  rq  rw  r{  r}  r   r   r   r   r   r    s"   
 

 
6
8





r  TschemaSchema | IcebergTypemetadatadict[bytes, bytes]include_field_idsr   file_formatrT   	pa.schemac                 C  s   t | t|||S r   )r&   _ConvertToArrowSchema)r  r  r  r  r   r   r   schema_to_pyarrow  s   r  c                   @  s  e Zd ZU ded< eddfdaddZdbddZdcddZdddd Zded$d%Z	dfd*d+Z
dgd.d/Zdhd2d3Zdid5d6Zdjd8d9Zdkd;d<Zdld>d?ZdmdAdBZdndDdEZdodGdHZdpdJdKZdqdMdNZdrdPdQZdsdSdTZdtdVdWZdudYdZZdvd\d]Zdwd_d`ZdS )xr  r  	_metadataTNr  r  r   r  FileFormat | Noner   rs  c                 C  s   || _ || _|| _d S r   )r  _include_field_ids_file_format)r   r  r  r  r   r   r   r        
z_ConvertToArrowSchema.__init___r\   struct_resultpa.StructTyper  c                 C  s   t jt|| jdS )N)r  )par  listr  )r   r  r  r   r   r   r       z_ConvertToArrowSchema.schemar   field_resultsbuiltins.list[pa.DataType]pa.DataTypec                 C  
   t |S r   r  struct)r   r  r  r   r   r   r       
z_ConvertToArrowSchema.structr   r   field_resultr   c                 C  sx   i }|j r
|j |t< | jr"| jtjkrt|j|t< nt|j|t	< | jtjkr1t|j
 |t< tj|j||j|dS N)namer   nullabler  )r   PYARROW_FIELD_DOC_KEYr  r  rT   ORCr   field_idORC_FIELD_ID_KEYPYARROW_PARQUET_FIELD_ID_KEYrequiredlowerORC_FIELD_REQUIRED_KEYr  r   r  optional)r   r   r  r  r   r   r   r     s   
z_ConvertToArrowSchema.field	list_typer}   element_resultc                 C  s   |  |j|}tj|dS )N)
value_type)r   element_fieldr  
large_list)r   r  r  r  r   r   r   r    s   z_ConvertToArrowSchema.listmap_typer   
key_resultvalue_resultc                 C  s*   |  |j|}|  |j|}tj||dS )N)key_type	item_type)r   	key_fieldvalue_fieldr  map_)r   r  r  r  r  r  r   r   r   map  s   z_ConvertToArrowSchema.map
fixed_typery   c                 C  s   t t|S r   )r  binaryr   r   r  r   r   r   visit_fixed     z!_ConvertToArrowSchema.visit_fixeddecimal_typerw   c                 C  s   t |j|jS r   )r  
decimal128	precisionscaler   r  r   r   r   visit_decimal  s   z#_ConvertToArrowSchema.visit_decimalru   c                 C     t  S r   )r  bool_r   r  r   r   r   visit_boolean     z#_ConvertToArrowSchema.visit_booleanr|   c                 C  r  r   )r  int32r  r   r   r   visit_integer  r  z#_ConvertToArrowSchema.visit_integerr~   c                 C  r  r   )r  int64r  r   r   r   
visit_long  r  z _ConvertToArrowSchema.visit_longrz   c                 C  r  r   )r  float32r  r   r   r   visit_float     z!_ConvertToArrowSchema.visit_floatrx   c                 C  r  r   )r  float64r  r   r   r   visit_double  r  z"_ConvertToArrowSchema.visit_doublerv   c                 C  r  r   )r  date32r  r   r   r   
visit_date  r  z _ConvertToArrowSchema.visit_dater   c                 C  
   t dS )Nus)r  time64r  r   r   r   
visit_time  r  z _ConvertToArrowSchema.visit_timer   c                 C     t jddS )Nr  unitr  	timestampr  r   r   r   visit_timestamp	     z%_ConvertToArrowSchema.visit_timestampr   c                 C  r  )Nnsr  r  r  r   r   r   visit_timestamp_ns  r  z(_ConvertToArrowSchema.visit_timestamp_nsr   c                 C     t jdddS )Nr  r   r  tzr  r  r   r   r   visit_timestamptz  r  z'_ConvertToArrowSchema.visit_timestamptzr   c                 C  r  )Nr  r   r  r  r  r   r   r   visit_timestamptz_ns  r  z*_ConvertToArrowSchema.visit_timestamptz_nsr   c                 C  r  r   )r  large_stringr  r   r   r   visit_string  r  z"_ConvertToArrowSchema.visit_stringr   c                 C  r  r   )r  uuidr  r   r   r   
visit_uuid  r  z _ConvertToArrowSchema.visit_uuidr   c                 C  r  )z\Type `UnknownType` can be promoted to any primitive type in V3+ tables per the Iceberg spec.)r  nullr  r   r   r   visit_unknown  r  z#_ConvertToArrowSchema.visit_unknownrt   c                 C  r  r   )r  large_binaryr  r   r   r   visit_binary  r  z"_ConvertToArrowSchema.visit_binary)r  r  r  r   r  r  r   rs  )r  r\   r  r  r   r  )r  r   r  r  r   r  )r   r   r  r  r   r   )r  r}   r  r  r   r  )r  r   r  r  r  r  r   r  )r  ry   r   r  )r  rw   r   r  )r  ru   r   r  )r  r|   r   r  )r  r~   r   r  )r  rz   r   r  )r  rx   r   r  )r  rv   r   r  )r  r   r   r  )r  r   r   r  )r  r   r   r  )r  r   r   r  )r  r   r   r  )r  r   r   r  )r  r   r   r  )r  r   r   r  )r  rt   r   r  )r   r   r   r  rp   r   r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   r    s4   
 







	












r  r   iceberg_typer{   	pa.scalarc                 C  s*   t |tstd| tj| t|dS )NExpected primitive type, got: )r   r   )rt  r   r   r  scalarr  )r   r  r   r   r   _convert_scalar#  s   
r  c                   @  s   e Zd ZU dZded< d<d=ddZd>ddZd?ddZd?ddZd@ddZ	d@ddZ
d@ddZd@ddZdAdd ZdAd!d"ZdAd#d$ZdAd%d&ZdAd'd(ZdAd)d*ZdAd+d,ZdAd-d.ZdBd/d0ZdBd1d2ZdCd4d5ZdDd8d9ZdDd:d;ZdS )E_ConvertToArrowExpressionzConvert Iceberg bound expressions to PyArrow expressions.

    Args:
        schema: Optional Iceberg schema to resolve full field paths for nested fields.
                If not provided, only the field name will be used (not dotted path).
    Schema | None_schemaNr  c                 C  
   || _ d S r   )r  )r   r  r   r   r   r   3  r  z"_ConvertToArrowExpression.__init__termr   r   str | tuple[str, ...]c                 C  sL   | j dur | j | jj}|dur d|v rt|dS |S | jjS )a@  Get the field name or nested field path for a bound term.

        For nested struct fields, returns a tuple of field names (e.g., ("mazeMetadata", "run_id")).
        For top-level fields, returns just the field name as a string.

        PyArrow requires nested field references as tuples, not dotted strings.
        Nr   )r  find_column_namerefr   r  tupler   r  )r   r  	full_namer   r   r   _get_field_name6  s   
z)_ConvertToArrowExpression._get_field_nameliteralsset[Any]pc.Expressionc                 C  s0   t j|t| jjd}t| ||S Nr   	r  arrayr  r  r   
field_typepcr  isinr   r  r  pyarrow_literalsr   r   r   visit_inJ  s   z"_ConvertToArrowExpression.visit_inc                 C  s2   t j|t| jjd}t| || S r  r   r  r   r   r   visit_not_inN  s   z&_ConvertToArrowExpression.visit_not_inc                 C  s   t | |}t |S r   r  r   r  is_nanr   r  r  r   r   r   visit_is_nanR  s   
z&_ConvertToArrowExpression.visit_is_nanc                 C  s   t | |}t | S r   r	  r  r   r   r   visit_not_nanV  s   z'_ConvertToArrowExpression.visit_not_nanc                 C  s   t | |jddS )NF)nan_is_null)r  r   r  is_nullr   r  r   r   r   visit_is_nullZ     z'_ConvertToArrowExpression.visit_is_nullc                 C  s   t | | S r   )r  r   r  is_validr  r   r   r   visit_not_null]  r  z(_ConvertToArrowExpression.visit_not_nullliteralLiteral[Any]c                 C  s$   t | |t|j| jjkS r   r  r   r  r  r   r  r  r   r  r  r   r   r   visit_equal`     $z%_ConvertToArrowExpression.visit_equalc                 C  s$   t | |t|j| jjkS r   r  r  r   r   r   visit_not_equalc  r  z)_ConvertToArrowExpression.visit_not_equalc                 C  s$   t | |t|j| jjkS r   r  r  r   r   r   visit_greater_than_or_equalf  r  z5_ConvertToArrowExpression.visit_greater_than_or_equalc                 C  s$   t | |t|j| jjkS r   r  r  r   r   r   visit_greater_thani  r  z,_ConvertToArrowExpression.visit_greater_thanc                 C  s$   t | |t|j| jjk S r   r  r  r   r   r   visit_less_thanl  r  z)_ConvertToArrowExpression.visit_less_thanc                 C  s$   t | |t|j| jjkS r   r  r  r   r   r   visit_less_than_or_equalo  r  z2_ConvertToArrowExpression.visit_less_than_or_equalc                 C  s   t t | ||jS r   r  starts_withr   r  r   r  r   r   r   visit_starts_withr     z+_ConvertToArrowExpression.visit_starts_withc                 C  s   t t | ||j S r   r   r  r   r   r   visit_not_starts_withu  s   z/_ConvertToArrowExpression.visit_not_starts_withc                 C  r  NTr  r  r   r   r   r   
visit_truex  r  z$_ConvertToArrowExpression.visit_truec                 C  r  )NFr&  r   r   r   r   visit_false{  r  z%_ConvertToArrowExpression.visit_falsechild_resultc                 C  s   | S r   r   r   r)  r   r   r   	visit_not~  ri  z#_ConvertToArrowExpression.visit_notleft_resultright_resultc                 C  s   ||@ S r   r   r   r,  r-  r   r   r   	visit_and  r  z#_ConvertToArrowExpression.visit_andc                 C  s   ||B S r   r   r.  r   r   r   visit_or  r  z"_ConvertToArrowExpression.visit_orr   )r  r  )r  r   r   r  )r  r   r  r  r   r  )r  r   r   r  )r  r   r  r  r   r  )r   r  )r)  r  r   r  )r,  r  r-  r  r   r  )r   r   r   r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r"  r$  r'  r(  r+  r/  r0  r   r   r   r   r  )  s0   
 


















r  c                      s0  e Zd ZU ded< ded< ded< ded< dE fdd	ZdFddZdFddZdFddZdFddZdGddZ	dGddZ
dFddZdFddZdFddZdFd d!ZdHd$d%ZdHd&d'ZdHd(d)ZdHd*d+ZdHd,d-ZdHd.d/ZdHd0d1ZdHd2d3ZdEd4d5ZdEd6d7ZdId9d:ZdJd=d>ZdJd?d@ZdKdCdDZ  ZS )L!_NullNaNUnmentionedTermsCollectorzset[BoundTerm]is_null_or_not_bound_termsnull_unmentioned_bound_termsis_nan_or_not_bound_termsnan_unmentioned_bound_termsr   rs  c                   s.   t    t | _t | _t | _t | _d S r   )r   r   setr2  r3  r4  r5  r   r   r   r   r     s
   
z*_NullNaNUnmentionedTermsCollector.__init__r  r   c                 C  &   || j v r| j | | j| dS )zJHandle the predicate case where either is_null or is_not_null is included.N)r3  remover2  addr  r   r   r   _handle_explicit_is_null_or_not     
zA_NullNaNUnmentionedTermsCollector._handle_explicit_is_null_or_notc                 C     || j vr| j| dS dS )zKHandle the predicate case where neither is_null or is_not_null is included.N)r2  r3  r9  r  r   r   r   _handle_null_unmentioned     
z:_NullNaNUnmentionedTermsCollector._handle_null_unmentionedc                 C  r7  )zHHandle the predicate case where either is_nan or is_not_nan is included.N)r5  r8  r4  r9  r  r   r   r   _handle_explicit_is_nan_or_not  r;  z@_NullNaNUnmentionedTermsCollector._handle_explicit_is_nan_or_notc                 C  r<  )zIHandle the predicate case where neither is_nan or is_not_nan is included.N)r4  r5  r9  r  r   r   r   _handle_nan_unmentioned  r>  z9_NullNaNUnmentionedTermsCollector._handle_nan_unmentionedr  r  c                 C     |  | | | d S r   r=  r@  r   r  r  r   r   r   r       
z*_NullNaNUnmentionedTermsCollector.visit_inc                 C  rA  r   rB  rC  r   r   r   r    rD  z._NullNaNUnmentionedTermsCollector.visit_not_inc                 C  rA  r   r=  r?  r  r   r   r   r    rD  z._NullNaNUnmentionedTermsCollector.visit_is_nanc                 C  rA  r   rE  r  r   r   r   r    rD  z/_NullNaNUnmentionedTermsCollector.visit_not_nanc                 C  rA  r   r:  r@  r  r   r   r   r    rD  z/_NullNaNUnmentionedTermsCollector.visit_is_nullc                 C  rA  r   rF  r  r   r   r   r    rD  z0_NullNaNUnmentionedTermsCollector.visit_not_nullr  r  c                 C  rA  r   rB  r  r   r   r   r    rD  z-_NullNaNUnmentionedTermsCollector.visit_equalc                 C  rA  r   rB  r  r   r   r   r    rD  z1_NullNaNUnmentionedTermsCollector.visit_not_equalc                 C  rA  r   rB  r  r   r   r   r    rD  z=_NullNaNUnmentionedTermsCollector.visit_greater_than_or_equalc                 C  rA  r   rB  r  r   r   r   r    rD  z4_NullNaNUnmentionedTermsCollector.visit_greater_thanc                 C  rA  r   rB  r  r   r   r   r    rD  z1_NullNaNUnmentionedTermsCollector.visit_less_thanc                 C  rA  r   rB  r  r   r   r   r    rD  z:_NullNaNUnmentionedTermsCollector.visit_less_than_or_equalc                 C  rA  r   rB  r  r   r   r   r"    rD  z3_NullNaNUnmentionedTermsCollector.visit_starts_withc                 C  rA  r   rB  r  r   r   r   r$    rD  z7_NullNaNUnmentionedTermsCollector.visit_not_starts_withc                 C     d S r   r   r   r   r   r   r'       z,_NullNaNUnmentionedTermsCollector.visit_truec                 C  rG  r   r   r   r   r   r   r(    rH  z-_NullNaNUnmentionedTermsCollector.visit_falser)  c                 C  rG  r   r   r*  r   r   r   r+    rH  z+_NullNaNUnmentionedTermsCollector.visit_notr,  r-  c                 C  rG  r   r   r.  r   r   r   r/    rH  z+_NullNaNUnmentionedTermsCollector.visit_andc                 C  rG  r   r   r.  r   r   r   r0    rH  z*_NullNaNUnmentionedTermsCollector.visit_orexprr   c                 C  s   t ||  dS )zCollect bound references categorized by null predicates.

        Categorizes by having at least one is_null or is_not_null in the expr and the remaining.
        N)boolean_expression_visit)r   rI  r   r   r   collect  s   z)_NullNaNUnmentionedTermsCollector.collectr   rs  )r  r   r   rs  )r  r   r  r  r   rs  )r  r   r  r  r   rs  )r)  rs  r   rs  )r,  rs  r-  rs  r   rs  )rI  r   r   rs  )r   r   r   r  r   r:  r=  r?  r@  r  r  r  r  r  r  r  r  r  r  r  r  r"  r$  r'  r(  r+  r/  r0  rK  r   r   r   r   r   r1    s<   
 






















r1  rI  r   r  r  c                 C  s   t | t|S )ar  Convert an Iceberg boolean expression to a PyArrow expression.

    Args:
        expr: The Iceberg boolean expression to convert.
        schema: Optional Iceberg schema to resolve full field paths for nested fields.
                If provided, nested struct fields will use dotted paths (e.g., "parent.child").

    Returns:
        A PyArrow compute expression.
    )rJ  r  )rI  r  r   r   r   expression_to_pyarrow  s   rM  c                 C  sz   t  }||  t|jdd d}t|jdd d}t| }|D ]
}t|t|d}q |D ]
}t|t|d}q-t	||S )zComplementary filter conversion function of expression_to_pyarrow.

    Could not use expression_to_pyarrow(Not(expr)) to achieve this complementary effect because
    ~ in pyarrow.compute.Expression does not handle null.
    c                 S     |   jjS r   r  r   r  r  r   r   r   <lambda>      z6_expression_to_complementary_pyarrow.<locals>.<lambda>)r   c                 S  rN  r   rO  rP  r   r   r   rQ    rR  rP  )
r1  rK  sortedr3  r5  r   r    r   r   rM  )rI  r  	collectorr3  r5  preserve_exprr  r   r   r   $_expression_to_complementary_pyarrow  s   



rV  r   rx  ds.FileFormatc                 K  sT   | t jkrtjdi |S | t jkr#dd | D }tjdi |S td|  )Nc                 S  s   i | ]\}}|d vr||qS )
pre_bufferr   r   )rL  kvr   r   r   
<dictcomp>.  s    z$_get_file_format.<locals>.<dictcomp>zUnsupported file format: r   )rT   PARQUETdsParquetFileFormatr  itemsOrcFileFormatr   )r  r   
orc_kwargsr   r   r   _get_file_format(  s   

rc  iorM   	data_filerR   dict[str, pa.ChunkedArray]c                   sl  |j tjkrG| |j }t|j ddtd|}t	j
j|d  W d    n1 s.w   Y      fdd djd jD S |j tjkr| |j '}t|j |}t	j
j|d   fd	d d D W  d    S 1 sw   Y  d S |j tjkr| |j }| }W d    n1 sw   Y  t| S td
|j  )N)	file_pathT)dictionary_columnsrY  r   )fragmentc                   ,   i | ]}|   td |kdqS rg  posas_pyfilterr  r   column)rL  r  tabler   r   r\  <      z!_read_deletes.<locals>.<dictcomp>rg  r   c                   rj  rk  rm  )rL  r   rq  r   r   r\  E  rs  z"Delete file format not supported: )r  rT   r]  ro  rg  r   rc  r  make_fragmentr^  Scannerfrom_fragmentto_tableunify_dictionariesrp  chunks
dictionaryr  uniquePUFFINreadrm   	to_vectorr   )rd  re  fidelete_fragmentpayloadr   rq  r   _read_deletes4  s6   


$
r  positional_deleteslist[pa.ChunkedArray]start_indexr   	end_indexpa.Arrayc              	   C  sn   t | dkr| d }nttjdd | D  }tt||}t|t	tj
||d}t|t|S )N   r   c                 S     g | ]}|j qS r   )ry  )rL  arrr   r   r   rN  V      z/_combine_positional_deletes.<locals>.<listcomp>)	value_set)r   r  chunked_array	itertoolschainr  ranger  ro  invertis_insubtractr  )r  r  r  
all_chunks
full_rangeresultr   r   r   _combine_positional_deletesR  s   
r  F	pa.Schemaname_mappingNameMapping | Nonedowncast_ns_timestamp_to_usformat_versionrs   r\   c                 C  sJ   t | t }|rt | t||dS |d ur!t| ||d}t||S td)Nr  r  znParquet file does not have field-ids and the Iceberg table does not have 'schema.name-mapping.default' defined)visit_pyarrow_HasIds_ConvertToIceberg_pyarrow_to_schema_without_idsrl   r   )r  r  r  r  has_idsschema_without_idsr   r   r   pyarrow_to_schemad  s   
r  c                 C  s   t | t||dS )Nr  )r  _ConvertToIcebergWithoutIDs)r  r  r  r   r   r   r  z  s   
r  c                 C     t | t S r   )r  _ConvertToLargeTypesr  r   r   r   "_pyarrow_schema_ensure_large_types  r  r  c                 C  r  r   )r  _ConvertToSmallTypesr  r   r   r   "_pyarrow_schema_ensure_small_types  r  r  objpa.DataType | pa.SchemavisitorPyArrowSchemaVisitor[T]c                 C  s   t d|  )a  Apply a pyarrow schema visitor to any point within a schema.

    The function traverses the schema in post-order fashion.

    Args:
        obj (Union[pa.DataType, pa.Schema]): An instance of a Schema or an IcebergType.
        visitor (PyArrowSchemaVisitor[T]): An instance of an implementation of the generic PyarrowSchemaVisitor base class.

    Raises:
        NotImplementedError: If attempting to visit an unrecognized object type.
    zCannot visit non-type: )NotImplementedErrorr  r  r   r   r   r    s   r  c                 C  s   | | tt| |S r   )r  r  r  r  r  r   r   r   r    s   r  r  c                   s    fdd| D }  | |S )Nc                   s   g | ]}t | qS r   )r  rL  r   r  r   r   rN    s    z_.<locals>.<listcomp>)r  )r  r  resultsr   r  r   r    s   5pa.ListType | pa.LargeListType | pa.FixedSizeListTypec                 C  s0   | | j t| j|}|| j || |S r   )before_list_elementr  r  r  after_list_elementr  )r  r  r  r   r   r   r    s   
pa.MapTypec                 C  sV   | | j t| j|}|| j || j t| j|}|| j |	| ||S r   )
before_map_keyr  r  r  after_map_keybefore_map_value
item_fieldr  after_map_valuer  )r  r  r  r  r   r   r   r    s   pa.DictionaryTypec                 C  s*   t dt|  d| j d t| j|S )Nz)Iceberg does not have a dictionary type. z will be inferred as z	 on read.)r   r   r   r  r  r  r   r   r   r    s   r   c              
   C  sh   | j }||  zt||}W n ty( } zt| d| j d| |d }~ww ||  || |S )NzColumn 'z' has an unsupported type: )r   before_fieldr  r   r   r  after_fieldr   )r  r  r  r  r   r   r   r   r    s   

r  c                 C  s(   t j| rtdt|  || S )Nr  )r  types	is_nestedr   r   	primitiver  r   r   r   r    s   
c                   @  s   e Zd Zd7ddZd7ddZd8d
dZd8ddZd9ddZd9ddZd:ddZ	d:ddZ
ed;ddZed<d"d#Zed=d%d&Zed>d*d+Zed?d0d1Zed@d4d5Zd6S )APyArrowSchemaVisitorr   r   r   rs  c                 C     dS )zNOverride this method to perform an action immediately before visiting a field.Nr   r   r   r   r   r   r        z!PyArrowSchemaVisitor.before_fieldc                 C  r  )zMOverride this method to perform an action immediately after visiting a field.Nr   r  r   r   r   r    r  z PyArrowSchemaVisitor.after_fieldr   c                 C  r  )zcOverride this method to perform an action immediately before visiting an element within a ListType.Nr   r   r   r   r   r   r    r  z(PyArrowSchemaVisitor.before_list_elementc                 C  r  )zbOverride this method to perform an action immediately after visiting an element within a ListType.Nr   r  r   r   r   r    r  z'PyArrowSchemaVisitor.after_list_elementr   c                 C  r  )z]Override this method to perform an action immediately before visiting a key within a MapType.Nr   r   r   r   r   r   r    r  z#PyArrowSchemaVisitor.before_map_keyc                 C  r  )z\Override this method to perform an action immediately after visiting a key within a MapType.Nr   r  r   r   r   r    r  z"PyArrowSchemaVisitor.after_map_keyr   c                 C  r  )z_Override this method to perform an action immediately before visiting a value within a MapType.Nr   r   r   r   r   r   r    r  z%PyArrowSchemaVisitor.before_map_valuec                 C  r  )z^Override this method to perform an action immediately after visiting a value within a MapType.Nr   r  r   r   r   r    r  z$PyArrowSchemaVisitor.after_map_valuer  r  r  r   c                 C  r  )zVisit a schema.Nr   r   r  r  r   r   r   r    r  zPyArrowSchemaVisitor.schemar  r  r  builtins.list[T]c                 C  r  )zVisit a struct.Nr   r   r  r  r   r   r   r    r  zPyArrowSchemaVisitor.structr  c                 C  r  )zVisit a field.Nr   r   r   r  r   r   r   r      r  zPyArrowSchemaVisitor.fieldr  pa.ListTyper  c                 C  r  )zVisit a list.Nr   r   r  r  r   r   r   r    r  zPyArrowSchemaVisitor.listr  r  r  r  c                 C  r  )zVisit a map.Nr   r   r  r  r  r   r   r   r    r  zPyArrowSchemaVisitor.mapr  r  c                 C  r  )zVisit a primitive type.Nr   r   r  r   r   r   r    r  zPyArrowSchemaVisitor.primitiveNr   r   r   rs  r   r   r   rs  r   r   r   rs  r   r   r   rs  )r  r  r  r   r   r   )r  r  r  r  r   r   )r   r   r  r   r   r   )r  r  r  r   r   r   )r  r  r  r   r  r   r   r   )r  r  r   r   )r   r   r   r  r  r  r  r  r  r  r  r   r  r  r   r  r  r  r   r   r   r   r    s*    







r  r   
int | Nonec                 C  sB   | j r| j t }rt| S | j t }rt| S dS )zFReturn the Iceberg field ID from Parquet or ORC metadata if available.N)r  r  r  r   decoder  )r   field_id_bytesr   r   r   _get_field_id  s   r  c                   @  sH   e Zd Zd#ddZd$ddZd%ddZd&ddZd'ddZd(d d!Zd"S ))r  r  r  r  r   r   c                 C     |S r   r   r  r   r   r   r     rH  z_HasIds.schemar  r  r  builtins.list[bool]c                 C  s   t |S r   )allr  r   r   r   r  #  r  z_HasIds.structr   r   r  c                 C  s   t t|d u|gS r   )r  r  r  r   r   r   r   &  r  z_HasIds.fieldr  r  r  c                 C  s   |j }t|}|o|d uS r   )r  r  r   r  r  r  
element_idr   r   r   r  )  s   z_HasIds.listr  r  r  r  c                 C  s4   |j }t|}|j}t|}t|d u|d u||gS r   )r  r  r  r  r   r  r  r  r  key_idr  value_idr   r   r   r  .  s
   z_HasIds.mapr  r  c                 C  r  r%  r   r  r   r   r   r  5  rH  z_HasIds.primitiveN)r  r  r  r   r   r   )r  r  r  r  r   r   )r   r   r  r   r   r   )r  r  r  r   r   r   )r  r  r  r   r  r   r   r   )r  r  r   r   	r   r   r   r  r  r   r  r  r  r   r   r   r   r    s    




r  c                   @  s   e Zd ZU dZded< dejfdJddZdKddZdLddZ	dMddZ
dNd"d#ZdOd(d)ZdPd/d0ZdQd4d5ZdRd6d7ZdRd8d9ZdSd;d<ZdSd=d>ZdTd@dAZdSdBdCZdUdEdFZdSdGdHZdIS )Vr  zXConverts PyArrowSchema to Iceberg Schema. Applies the IDs from name_mapping if provided.builtins.list[str]_field_namesFr  r   r  rs   r   rs  c                 C  s   g | _ || _|| _d S r   )r  _downcast_ns_timestamp_to_us_format_version)r   r  r  r   r   r   r   >  r  z_ConvertToIceberg.__init__r   r   r   c                 C  s$   t | }d ur
|S td| d)NzCannot convert z' to Iceberg Field as field_id is empty.)r  r   )r   r   r  r   r   r   	_field_idE  s   z_ConvertToIceberg._field_idr  r  r  r   r\   c                 C  s
   t |j S r   )r\   fieldsr  r   r   r   r  K  r  z_ConvertToIceberg.schemar  r  r  builtins.list[NestedField]c                 C  s   t | S r   )r   r  r   r   r   r  N  r  z_ConvertToIceberg.structr  r{   r   c                 C  sH   |  |}|jr|jt }r| nd }|}t||j||j |dS )N)r  r   )r  r  r  r  r  r   r  r  )r   r   r  r  doc_str	field_docr  r   r   r   r   Q  s   
"z_ConvertToIceberg.fieldr  r  r  r}   c                 C  s8   |j }| jt | |}| j  t|||j dS )N)element_required)r  r  appendLIST_ELEMENT_NAMEr  popr}   r  r  r   r   r   r  W  s
   

z_ConvertToIceberg.listr  r  r  r  r   c                 C  sb   |j }| jt | |}| j  |j}| jt | |}| j  t|||||j	 dS )N)value_required)
r  r  r  MAP_KEY_NAMEr  r  r  MAP_VALUE_NAMEr   r  r  r   r   r   r  ^  s   



z_ConvertToIceberg.mapr  r  r   c                 C  sH  t j|r	t S t j|r'|j}|dkrt S |dkr t S td| t j	|r0t
 S t j|r9t S t|t jrLtt j|}t|j|jS t j|s^t j|s^t j|rat S t j|rjt S t|t jrx|jdkrxt S t j|rtt j|}|jdv rn*|jdkr| jrt d n| j!dkr|j"t#v rt$ S t% S td	td
|j |j"t#v rt& S |j"d u rt S nXt j'|st j(|st j)|rt* S t j+|rtt j,|}t-|j.S t j/|r| j!dk r| j0rd1| j0nd}t2d| j! d| dt3 S t|t j4rt5 S td| )N    @   zUnsupported integer type: r  )smsr  r  zKIceberg does not yet support 'ns' timestamp precision. Downcasting to 'us'.   zIceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write.z*Unsupported precision for timestamp type: r   z<root>zANull type (pa.null()) is not supported in Iceberg format version z	. Field: zQ. Requires format-version=3+ or use a concrete type (string, int, boolean, etc.).zUnsupported type: )6r  r  
is_booleanru   
is_integer	bit_widthr|   r~   r   
is_float32rz   
is_float64rx   rt  Decimal128Typer   rw   r  r  	is_stringis_large_stringis_string_viewr   	is_date32rv   
Time64Typer  r   is_timestampr   r  r   r   r  r  UTC_ALIASESr   r   r   	is_binaryis_large_binaryis_binary_viewrt   is_fixed_size_binaryFixedSizeBinaryTypery   
byte_widthr  r  r   r   r   UuidTyper   )r   r  width
field_pathr   r   r   r  i  sz   $





$
z_ConvertToIceberg.primitivec                 C  s   | j |j d S r   )r  r  r  r  r   r   r   r    s   z_ConvertToIceberg.before_fieldc                 C     | j   d S r   r  r  r  r   r   r   r    r  z_ConvertToIceberg.after_fieldr   c                 C     | j t d S r   )r  r  r  r  r   r   r   r       z%_ConvertToIceberg.before_list_elementc                 C  r  r   r  r  r   r   r   r    r  z$_ConvertToIceberg.after_list_elementr   c                 C  r  r   )r  r  r  r  r   r   r   r    r  z _ConvertToIceberg.before_map_keyc                 C  r  r   r  r  r   r   r   r    r  z_ConvertToIceberg.after_map_keyr   c                 C  r  r   )r  r  r  r  r   r   r   r    r  z"_ConvertToIceberg.before_map_valuec                 C  r  r   r  r  r   r   r   r    r  z!_ConvertToIceberg.after_map_valueN)r  r   r  rs   r   rs  r   r   r   r   )r  r  r  r   r   r\   )r  r  r  r  r   r   )r   r   r  r{   r   r   )r  r  r  r{   r   r}   )r  r  r  r{   r  r{   r   r   )r  r  r   r   r  r  r  r  )r   r   r   r   r  rh   DEFAULT_FORMAT_VERSIONr   r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   r  9  s(   
 







I





r  c                   @  H   e Zd Zd"ddZd#ddZd$ddZd%ddZd&ddZd'dd Zd!S )(r  r  r  r  r  r   c                 C  r  r   r  r  r  r   r   r   r    r  z_ConvertToLargeTypes.schemar  r  builtins.list[pa.Field]c                 C  r  r   r  r  r   r   r   r    r  z_ConvertToLargeTypes.structr   r   r  r  c                 C  
   | |S r   	with_typer  r   r   r   r     r  z_ConvertToLargeTypes.fieldr  r  r  c                 C  r  r   )r  r  r  r   r   r   r    r  z_ConvertToLargeTypes.listr  r  r  r  c                 C     t ||S r   r  r  r  r   r   r   r    r  z_ConvertToLargeTypes.mapr  c                 C  ,   |t  kr
t  S |t  krt  S |S r   )r  stringr  r  r  r  r   r   r   r    
   z_ConvertToLargeTypes.primitiveNr  r  r  r  r   r  r  r  r  r  r   r  r   r   r  r  r   r   r  r  r  r  r   r  r  r  r  r  r  r  r   r  r  r  r   r  r  r   r   r   r   r        




r  c                   @  r  )(r  r  r  r  r  r   c                 C  r  r   r  r  r   r   r   r    r  z_ConvertToSmallTypes.schemar  r  r  c                 C  r  r   r  r  r   r   r   r    r  z_ConvertToSmallTypes.structr   r   r  r  c                 C  r  r   r  r  r   r   r   r     r  z_ConvertToSmallTypes.fieldr  r  r  c                 C  r  r   )r  list_r  r   r   r   r    r  z_ConvertToSmallTypes.listr  r  r  r  c                 C  r  r   r  r  r   r   r   r    r  z_ConvertToSmallTypes.mapr  c                 C  r  r   )r  r  r  r  r  r  r   r   r   r    r  z_ConvertToSmallTypes.primitiveNr   r!  r"  r#  r$  r%  r  r   r   r   r   r    r&  r  c                   @  s   e Zd ZdZd	ddZdS )
r  aa  
    Converts PyArrowSchema to Iceberg Schema with all -1 ids.

    The schema generated through this visitor should always be
    used in conjunction with `new_table_metadata` function to
    assign new field ids in order. This is currently used only
    when creating an Iceberg Schema from a PyArrow schema when
    creating a new Iceberg table.
    r   r   r   r   c                 C  r  )Nr   r   r  r   r   r   r    rH  z%_ConvertToIcebergWithoutIDs._field_idNr  )r   r   r   r   r  r   r   r   r   r    s    
r  r  projected_schematable_schemapartition_specPartitionSpec | Nonefile_project_field_idsset[int]dict[int, Any]c                 C  s   |j |}t|dks|du rtS ||}t|}i }|D ]}	||	D ]}
t|
jt	r=||
j
 | j }r=|||	< q&q|S )z-Apply Column Projection rules to File Schema.r   N)	field_ids
differencer   rp   partition_typer`   fields_by_source_idrt  	transformrn   r  r  	partition)r  r(  r)  r*  r,  project_schema_diffpartition_schema	accessorsprojected_missing_fieldsr  partition_fieldpartition_valuer   r   r   _get_column_projection_values
  s   
r;  taskr   bound_row_filterprojected_field_idslist[ChunkedArray] | Nonecase_sensitivebool | NoneIterator[pa.RecordBatch]c              
   c  s   t |jjdtd d}| |jj }||}|j}|d ur$|n|
dk}t	||||
d}t
|j|||	|j}d }|t urUt||||d}t|||d}t||}t||dd	}tjj|||se|nd d
d |jD d}d}| }|D ]P}|t| }|t| }|}|rt|||t| }||}|jdkrqx|d urtj|g}||}|jdkrqx|  d }t|||||ddV  qxW d    d S 1 sw   Y  d S )NT   rX  r   r  )r@  projected_field_valuesr@  F)select_full_typesc                 S  r  r   r  )rL  colr   r   r   rN  U  r  z+_task_to_record_batches.<locals>.<listcomp>)ri  r  ro  columnsr   )r  r8  allow_timestamp_tz_mismatch) rc  r  r  r  ro  rg  r   rt  physical_schemar  r;  r/  r   r%   r#   rM  rc   r^  ru  rv  rI  
to_batchesr   r  takenum_rowsr  Tablefrom_batchesro  combine_chunks_to_requested_schema)rd  r<  r=  r(  r)  r>  r  r@  r  r*  r  r  arrow_formatfinri  rK  file_schemar8  pyarrow_filtertranslated_row_filterbound_file_filterfile_project_schemafragment_scanner
next_indexbatchesbatchcurrent_indexcurrent_batchindicesrr  r   r   r   _task_to_record_batches#  sl   



	




"ra  tasksIterable[FileScanTask]dict[str, list[ChunkedArray]]c           	        s   i }t tjdd |D }t|dkrGt }|dd  fdd|D }|D ]}| D ]\}}||v r@|| 	| q0|g||< q0q*|S )Nc                 S  r  r   )delete_files)rL  r<  r   r   r   rN    r  z*_read_all_delete_files.<locals>.<listcomp>r   c                 S  s   t |  S r   )r  )r   r   r   r   rQ    s    z(_read_all_delete_files.<locals>.<lambda>c                   s   g | ]} |fqS r   r   )rL  ru  rd  r   r   rN    s    )
r6  r  r  from_iterabler   r   get_or_creater  r`  r  )	rd  rb  deletes_per_fileunique_deletesexecutordeletes_per_filesrw  r  r  r   rf  r   _read_all_delete_files}  s   rm  c                   @  s   e Zd ZU ded< ded< ded< ded< d	ed
< ded< ded< 	 		d*d+ddZed,ddZd-d!d"Zd.d$d%Zd/d(d)Z	dS )0	ArrowScanrj   _table_metadatarM   _ior\   _projected_schemar   _bound_row_filterr   _case_sensitiver  _limitrA  r  TNtable_metadatard  r(  
row_filterr@  limitr   rs  c                 C  sD   || _ || _|| _t| ||d| _|| _|| _t 	t
| _d S )NrE  )ro  rp  rq  r#   r  rr  rs  rt  r   get_boolrg   r  )r   ru  rd  r(  rv  r@  rw  r   r   r   r     s   	zArrowScan.__init__r-  c                   s"    fdd j jD t jS )z>Set of field IDs that should be projected from the data files.c                   s&   h | ]}t  j|ttfs|qS r   )rt  rq  	find_typer   r}   )rL  idr   r   r   	<setcomp>  s    z1ArrowScan._projected_field_ids.<locals>.<setcomp>)rq  r/  unionr$   rr  r   r   r   r   _projected_field_ids  s
   
zArrowScan._projected_field_idsrb  rc  pa.Tablec                 C  sf   t | jdd}| |}zt|}W n ty   |  Y S w tjdd t	|g|D dd}|S )a  Scan the Iceberg table and return a pa.Table.

        Returns a pa.Table with data from the Iceberg table by resolving the
        right columns that match the current table schema. Only data that
        matches the provided row_filter expression is returned.

        Args:
            tasks: FileScanTasks representing the data files and delete files to read from.

        Returns:
            A PyArrow table. Total number of rows will be capped if specified.

        Raises:
            ResolveError: When a required field cannot be found in the file
            ValueError: When a field type in the file cannot be projected to the schema type
        Fr  c                 s  s    | ]
}t j|gV  qd S r   )r  rO  rP  rL  r]  r   r   r   	<genexpr>  s    z%ArrowScan.to_table.<locals>.<genexpr>
permissive)promote_options)
r  rq  to_record_batchesnextStopIterationempty_tabler  concat_tablesr  r  )r   rb  arrow_schemar\  first_batchr  r   r   r   rw    s   
zArrowScan.to_tablerB  c           	      #  s    t j| d}t }d fdd}d}|||D ]1}|D ]'}t|}jd	urA|| jkrA|dj| V  d
} n|V  ||7 }q!|rN d	S qd	S )a  Scan the Iceberg table and return an Iterator[pa.RecordBatch].

        Returns an Iterator of pa.RecordBatch with data from the Iceberg table
        by resolving the right columns that match the current table schema.
        Only data that matches the provided row_filter expression is returned.

        Args:
            tasks: FileScanTasks representing the data files and delete files to read from.

        Returns:
            An Iterator of PyArrow RecordBatches.
            Total number of rows will be capped if specified.

        Raises:
            ResolveError: When a required field cannot be found in the file
            ValueError: When a field type in the file cannot be projected to the schema type
        r   r<  r   r   list[pa.RecordBatch]c                   s   t | g S r   )r  +_record_batches_from_scan_tasks_and_deletes)r<  ri  r   r   r   batches_for_task  s   z5ArrowScan.to_record_batches.<locals>.batches_for_taskFNT)r<  r   r   r  )rm  rp  r   rh  r  r   rt  slice)	r   rb  total_row_countrk  r  limit_reachedr\  r]  current_batch_sizer   r  r   r    s&   
zArrowScan.to_record_batchesri  rd  c                 c  s    d}|D ]e}| j d ur|| j kr d S t| j|| j| j| j | j||j	j
| j| j | j |j	j| jj| j}|D ])}| j d ur`|| j krN n|t| | j kr`|d| j | }|V  |t|7 }q@qd S )Nr   )rt  ra  rp  rr  rq  ro  r  r}  r  r  rg  rs  r  specsspec_idr  r  r   r  )r   rb  ri  r  r<  r\  r]  r   r   r   r    s:   

z5ArrowScan._record_batches_from_scan_tasks_and_deletes)TN)ru  rj   rd  rM   r(  r\   rv  r   r@  r   rw  r  r   rs  )r   r-  )rb  rc  r   r~  )rb  rc  r   rB  )rb  rc  ri  rd  r   rB  )
r   r   r   r  r   propertyr}  rw  r  r  r   r   r   r   rn    s"   
 

".rn  requested_schemarU  r]  pa.RecordBatchr8  rJ  c              
   C  s,   t | |t|||||dt|}tj|S )N)r8  rJ  )re   ArrowProjectionVisitorArrowAccessorr  RecordBatchfrom_struct_array)r  rU  r]  r  r  r8  rJ  struct_arrayr   r   r   rR  ,  s   
rR  c                   @  s   e Zd ZU ded< ded< ded< ded< ded< d	d	ed	fdAddZdBddZdCddZdDd"d#ZdEd)d*Z	dFd-d.Z
dGd3d4ZdHd:d;ZdId>d?Zd@S )Jr  r\   _file_schemar   r  r  r.  _projected_missing_fields_allow_timestamp_tz_mismatchFrU  r  r  r8  rJ  r   rs  c                 C  s"   || _ || _|| _|| _|| _d S r   )r  r  r  r  r  )r   rU  r  r  r8  rJ  r   r   r   r   L  s
   
zArrowProjectionVisitor.__init__r   r   valuesr  c           	      C  s  | j |j}|jjrt|j| jd }|jkr|jt kro|jj	d u p,| j
o,|jj	tv }tj|rd|j	sdtj|jrd|rd|jdkrT|jjdkrT| jrT|j|ddS |jdkrd|jjdv rd||S td|j d| |jt krtj|r|j	d	krtj|jr|jj	tv s|jj	d u r|jdkr|jjdkr| jr|j|ddS |jdkr|jjdv r||S td|j d| t|jttfrtj|jr|jj}|j}||k r||S |j|jkrtt|j|j| jd}||S |S )
Nr  r  r  F)safe>   r  r  r  z#Unsupported schema projection from z to r   )r  
find_fieldr  r  is_primitiver  r  r   r   r  r  r  r  r  r  r  r  r   r   r   rt  r|   r~   r  r  rb   )	r   r   r  
file_fieldtarget_typesource_tz_compatiblesource_widthtarget_widthtarget_schemar   r   r   _cast_if_needed\  sX   






z&ArrowProjectionVisitor._cast_if_needed
arrow_typer  r   c                 C  s>   i }|j r
|j |t< | jrt|j|t< tj|j||j	|dS r  )
r   r  r  r   r  r  r  r   r  r  )r   r   r  r  r   r   r   _construct_field  s   
z'ArrowProjectionVisitor._construct_fieldr  schema_partnerpa.Array | Noner  c                 C     |S r   r   )r   r  r  r  r   r   r   r    rH  zArrowProjectionVisitor.schemar  r   r  r  builtins.list[pa.Array | None]c              	   C  sL  |d u rd S g }g }t |j|ddD ]{\}}|d ur0| ||}|| || ||j q|js8|jd urt|j	| j
d}	| j|j }
rZ|ttj|
|	dt| n#|jd u rl|tjt||	d n|ttj|j|	dt| || ||	 qtd| tjj|t|t|tjr| dS d dS )NTstrictr  r  z7Field is required, and could not be found in the file: )arraysr  mask)zipr  r  r  r  r   r  initial_defaultr  r  r  r  r  r  r  repeatr  r   nullsr   StructArrayfrom_arraysr  rt  r  )r   r  r  r  field_arraysr  r   field_arrayr  r  projected_valuer   r   r   r    s2   
"
"zArrowProjectionVisitor.structr  r  c                 C  r  r   r   )r   r   r  r  r   r   r   r     rH  zArrowProjectionVisitor.fieldr  r}   
list_arrayvalue_arrayc                 C  s   t |tjtjtjfr?|d ur?t |tjrtjntj}t |tjr)tj|j	|}| 
|j|}|| |j|j}||S d S r   )rt  r  	ListArrayLargeListArrayFixedSizeListArrayr  r'  r  r  offsetsr  r  r  r   r   )r   r  r  r  list_initializerarrow_fieldr   r   r   r    s   
zArrowProjectionVisitor.listr  r   	map_arrayr  r  c                 C  s   t |tjrC|d urC|d urC| |j|}| |j|}t| |j|j| |j|j}t |tj	r>tj
|j|||S ||S d S r   )rt  r  MapArrayr  r  r  r  r  r   r  r  r  r   )r   r  r  r  r  r  r   r   r   r    s   
zArrowProjectionVisitor.mapr   r  c                 C  r  r   r   )r   r  r  r   r   r   r    rH  z ArrowProjectionVisitor.primitiveN)rU  r\   r  r   r  r   r8  r.  rJ  r   r   rs  )r   r   r  r  r   r  )r   r   r  r  r   r   )r  r\   r  r  r  r  r   r  )r  r   r  r  r  r  r   r  )r   r   r  r  r  r  r   r  )r  r}   r  r  r  r  r   r  )
r  r   r  r  r  r  r  r  r   r  )r  r   r  r  r   r  )r   r   r   r  rp   r   r  r  r  r  r   r  r  r  r   r   r   r   r  E  s&   
 

4




r  c                   @  sR   e Zd ZU ded< dddZddd	ZdddZdddZdddZdddZ	dS )r  r\   rU  c                 C  r  r   )rU  )r   rU  r   r   r   r     r  zArrowAccessor.__init__partnerr  r   c                 C  s   |S r   r   )r   r  r   r   r   r    rH  zArrowAccessor.schema_partnerpartner_structr  r   r  r   c                 C  s   |d urGz	| j |j}W n
 ty   Y d S w t|tjr#||S t|tjr0|	|
 S t|tjr;|	|S td| dt| d S )NzCannot find z! in expected partner_struct type )rU  r  r  r   rt  r  r  r   rO  rp  rQ  r  r   )r   r  r  r  r  r   r   r   field_partner  s   

zArrowAccessor.field_partnerpartner_listc                 C  s    t |tjtjtjfr|jS d S r   )rt  r  r  r  r  r  )r   r  r   r   r   list_element_partner      z"ArrowAccessor.list_element_partnerpartner_mapc                 C     t |tjr	|jS d S r   )rt  r  r  keysr   r  r   r   r   map_key_partner     zArrowAccessor.map_key_partnerc                 C  r  r   )rt  r  r  r`  r  r   r   r   map_value_partner
  r  zArrowAccessor.map_value_partnerN)rU  r\   )r  r  r   r  )r  r  r  r   r  r   r   r  )r  r  r   r  )r  r  r   r  )
r   r   r   r  r   r  r  r  r  r  r   r   r   r   r    s   
 




r  r   c                 C  s
   t | tS r   )r&   #_PRIMITIVE_TO_PHYSICAL_TYPE_VISITOR)r  r   r   r   _primitive_to_physical  r  r  c                   @  s   e Zd ZdbddZdcddZddddZdeddZdfddZdgd d!Zdhd$d%Z	did(d)Z
djd,d-Zdkd0d1Zdld4d5Zdmd8d9Zdnd<d=Zdod@dAZdpdDdEZdqdGdHZdrdKdLZdsdOdPZdtdSdTZdudWdXZdvd[d\Zdwd_d`ZdaS )xPrimitiveToPhysicalTyper  r\   r  r   r   c                 C     t d| NzExpected primitive-type, got: r   r  r   r   r   r    r  zPrimitiveToPhysicalType.schemar  r   r  r  c                 C  r  r  r  r  r   r   r   r    r  zPrimitiveToPhysicalType.structr   r   r  c                 C  r  r  r  r  r   r   r   r     r  zPrimitiveToPhysicalType.fieldr  r}   r  c                 C  r  r  r  r  r   r   r   r    r  zPrimitiveToPhysicalType.listr  r   r  r  c                 C  r  r  r  r  r   r   r   r    r  zPrimitiveToPhysicalType.mapr  ry   c                 C  r  NFIXED_LEN_BYTE_ARRAYr   r  r   r   r   r  "  rH  z#PrimitiveToPhysicalType.visit_fixedr  rw   c                 C  s    |j dkrdS |j dkrdS dS )N	   INT32   INT64r  )r  r  r   r   r   r  %  r  z%PrimitiveToPhysicalType.visit_decimalboolean_typeru   c                 C  r  )NBOOLEANr   )r   r  r   r   r   r  (  rH  z%PrimitiveToPhysicalType.visit_booleaninteger_typer|   c                 C  r  Nr  r   )r   r  r   r   r   r  +  rH  z%PrimitiveToPhysicalType.visit_integer	long_typer~   c                 C  r  Nr  r   )r   r  r   r   r   r  .  rH  z"PrimitiveToPhysicalType.visit_long
float_typerz   c                 C  r  )NFLOATr   )r   r  r   r   r   r  1  rH  z#PrimitiveToPhysicalType.visit_floatdouble_typerx   c                 C  r  )NDOUBLEr   )r   r  r   r   r   r  4  rH  z$PrimitiveToPhysicalType.visit_double	date_typerv   c                 C  r  r  r   )r   r  r   r   r   r  7  rH  z"PrimitiveToPhysicalType.visit_date	time_typer   c                 C  r  r  r   )r   r  r   r   r   r  :  rH  z"PrimitiveToPhysicalType.visit_timetimestamp_typer   c                 C  r  r  r   r   r  r   r   r   r  =  rH  z'PrimitiveToPhysicalType.visit_timestampr   c                 C  r  r  r   r  r   r   r   r  @  rH  z*PrimitiveToPhysicalType.visit_timestamp_nstimestamptz_typer   c                 C  r  r  r   )r   r  r   r   r   r  C  rH  z)PrimitiveToPhysicalType.visit_timestamptztimestamptz_ns_typer   c                 C  r  r  r   )r   r  r   r   r   r  F  rH  z,PrimitiveToPhysicalType.visit_timestamptz_nsstring_typer   c                 C  r  N
BYTE_ARRAYr   )r   r  r   r   r   r  I  rH  z$PrimitiveToPhysicalType.visit_string	uuid_typer   c                 C  r  r  r   )r   r  r   r   r   r  L  rH  z"PrimitiveToPhysicalType.visit_uuidbinary_typert   c                 C  r  r  r   )r   r  r   r   r   r  O  rH  z$PrimitiveToPhysicalType.visit_binaryunknown_typer   c                 C  r  )NUNKNOWNr   )r   r  r   r   r   r  R  rH  z%PrimitiveToPhysicalType.visit_unknownN)r  r\   r  r   r   r   )r  r   r  r  r   r   )r   r   r  r   r   r   )r  r}   r  r   r   r   )r  r   r  r   r  r   r   r   )r  ry   r   r   )r  rw   r   r   )r  ru   r   r   )r  r|   r   r   )r  r~   r   r   )r  rz   r   r   )r  rx   r   r   )r  rv   r   r   )r  r   r   r   )r  r   r   r   )r  r   r   r   )r  r   r   r   )r  r   r   r   )r  r   r   r   )r  r   r   r   )r  rt   r   r   )r  r   r   r   )r   r   r   r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   r    s.    




















r  c                   @  sd   e Zd ZU ded< ded< ded< ddddZd ddZd!ddZd!ddZd"ddZd"ddZ	dS )#StatsAggregatorr   current_mincurrent_maxr  trunc_lengthNr  r   physical_type_stringr   r   rs  c                 C  sz   d | _ d | _|| _t|}||kr8|dkr|dks!|dkr"|dkr"n|dkr+|dv r+ntd| d| d	| || _d S )
Nr  r  r  r  r  )r  r  zUnexpected physical type z for z, expected )r  r  r  r  r   primitive_type)r   r  r  r  expected_physical_typer   r   r   r   ^  s   
zStatsAggregator.__init__r   bytesc                 C  s   t | j|S r   )r   r  r  r   r   r   	serializew  r  zStatsAggregator.serializeval
Any | Nonec                 C  2   | j d u r
|| _ d S |d urt|| j | _ d S d S r   )r  minr   r  r   r   r   
update_minz  
   

zStatsAggregator.update_minc                 C  r  r   )r  maxr  r   r   r   
update_max  r  zStatsAggregator.update_maxbytes | Nonec                 C  s@   | j d u rd S | | jd u r| j S t| jd| j| j S )N)r  )r  r   r  ro   r3  r  r   r   r   r   min_as_bytes  s   

zStatsAggregator.min_as_bytesc                 C  s   | j d u rd S | jt kr)t| j tstdt| j | j}|d ur'| |S d S | jt	 krKt| j t
s9tdt| j | j}|d urI| |S d S | jd urXt| j d| | j S )Nz'Expected the current_max to be a stringz$Expected the current_max to be bytesz cannot be truncated)r  r  r   rt  r   r   r   r  r   rt   r  r   )r   s_resultb_resultr   r   r   max_as_bytes  s   

zStatsAggregator.max_as_bytesr   )r  r   r  r   r  r  r   rs  )r   r   r   r  )r  r  r   rs  )r   r
  )
r   r   r   r  r   r   r  r	  r  r  r   r   r   r   r  Y  s   
 




r     z^truncate\((\d+)\)$c                   @  s   e Zd ZdZdZdZdZdS )MetricModeTypestruncatenonecountsfullN)r   r   r   TRUNCATENONECOUNTSFULLr   r   r   r   r    s
    r  )frozenc                   @  s"   e Zd ZU ded< dZded< dS )MetricsModer  r   Nr  length)r   r   r   r  r  r   r   r   r   r    s   
 r  modec                 C  s   |    }|dr2tt|}|r+t|d }|dk r!tdtt	j
t|d S td|  |dkr;tt	jS |dkrDtt	jS |dkrMtt	jS td|  )	Nr  r  z'Truncation length must be larger than 0zMalformed truncate: r  r  r  zUnsupported metrics mode: )stripr  
startswithrematchTRUNCATION_EXPRr   r   r  r  r  r  r  r  )r  sanitized_modemr  r   r   r   match_metrics_mode  s    



r$  c                   @  s.   e Zd ZU ded< ded< ded< ded< d	S )
StatisticsCollectorr   r  r   r  r  r  r   column_nameNr   r   r   r  r   r   r   r   r%    s
   
 r%  c                   @  sx   e Zd ZU dZded< ded< ded< ded	< d/ddZd0ddZd1ddZd2ddZd3d"d#Z	d4d(d)Z
d5d,d-Zd.S )6PyArrowStatisticsCollectorr   r   r  r\   r  dict[str, str]_propertiesr   _default_moder  r  c                 C  s0   ddl m} || _|| _| j|j|j| _d S )Nr   rh   )pyiceberg.tablerh   r  r*  r  DEFAULT_WRITE_METRICS_MODE"DEFAULT_WRITE_METRICS_MODE_DEFAULTr+  )r   r  r  rh   r   r   r   r     s   
z#PyArrowStatisticsCollector.__init__r  0Callable[[], builtins.list[StatisticsCollector]]r   "builtins.list[StatisticsCollector]c                 C     | S r   r   r  r   r   r   r    s   z!PyArrowStatisticsCollector.schemar  r   r  ?builtins.list[Callable[[], builtins.list[StatisticsCollector]]]c                 C     t tjdd |D  S )Nc                 S     g | ]}| qS r   r   rL  r  r   r   r   rN    r  z5PyArrowStatisticsCollector.struct.<locals>.<listcomp>r  r  r  r  r   r   r   r       z!PyArrowStatisticsCollector.structr   r   r  c                 C     |j | _| S r   )r  r  r  r   r   r   r        z PyArrowStatisticsCollector.fieldr  r}   r  c                 C  r9  r   )r  r  r  r   r   r   r    r:  zPyArrowStatisticsCollector.listr  r   r  r  c                 C  s$   |j | _| }|j| _| }|| S r   )r  r  r  r   r  r  r  rZ  r[  r   r   r   r    s
   zPyArrowStatisticsCollector.mapr  r   c                 C  s   ddl m} | j| j}|d u rg S t| j}| j|j	 d| }|r*t|}t
|ts?t
|ts?|jtjkr?ttj}|ddk}|rV|jtjtjfv rVttj}t| j|||dgS )Nr   r,  r   )r  r  r  r&  )r-  rh   r  r  r  r$  r+  r*  r  METRICS_MODE_COLUMN_CONF_PREFIXrt  r   rt   r   r  r  r  r  findr  r%  )r   r  rh   r&  metrics_modecol_moder  r   r   r   r  	  s$   


z$PyArrowStatisticsCollector.primitiveN)r  r\   r  r)  )r  r\   r  r0  r   r1  )r  r   r  r3  r   r1  )r   r   r  r0  r   r1  )r  r}   r  r0  r   r1  )r  r   r  r0  r  r0  r   r1  )r  r   r   r1  r   r   r   r  r  r   r  r  r   r  r  r  r   r   r   r   r(    s   
 

	



r(  table_propertiesr)  dict[int, StatisticsCollector]c                 C  s,   t | t| |}i }|D ]}|||j< q|S )aQ  
    Compute the statistics plan for all columns.

    The resulting list is assumed to have the same length and same order as the columns in the pyarrow table.
    This allows the list to map from the column index to the Iceberg column ID.
    For each element, the desired metrics collection that was provided by the user in the configuration
    is computed and then adjusted according to the data type of the column. For nested columns the minimum
    and maximum values are not computed. And truncation is only applied to text of binary strings.

    Args:
        table_properties (from pyiceberg.table.metadata.TableMetadata): The Iceberg table metadata properties.
            They are required to compute the mapping of column position to iceberg schema type id. It's also
            used to set the mode for column metrics collection
    )ra   r(  r  )r  rA  
stats_colsr  	stats_colr   r   r   compute_statistics_plan	  s
   rE  c                   @     e Zd ZU ded< ded< dS )ID2ParquetPathr   r  r   parquet_pathNr'  r   r   r   r   rG  7	     
 rG  c                   @  sh   e Zd ZU dZded< ded< d,dd	Zd-ddZd.ddZd/ddZd0dd Z	d1d%d&Z
d2d)d*Zd+S )3ID2ParquetPathVisitorr   r   r  r  r   r   rs  c                 C  s
   g | _ d S r   )r   r   r   r   r   r   A	  r  zID2ParquetPathVisitor.__init__r  r\   r  +Callable[[], builtins.list[ID2ParquetPath]]builtins.list[ID2ParquetPath]c                 C  r2  r   r   r  r   r   r   r  D	  ri  zID2ParquetPathVisitor.schemar  r   r  :builtins.list[Callable[[], builtins.list[ID2ParquetPath]]]c                 C  r4  )Nc                 S  r5  r   r   r6  r   r   r   rN  J	  r  z0ID2ParquetPathVisitor.struct.<locals>.<listcomp>r7  r  r   r   r   r  G	  r8  zID2ParquetPathVisitor.structr   r   r  c                 C  s*   |j | _| j|j | }| j  |S r   )r  r  r   r  r  r  )r   r   r  r  r   r   r   r   L	  s
   
zID2ParquetPathVisitor.fieldr  r}   r  c                 C  s(   |j | _| jd | }| j  |S )Nzlist.element)r  r  r   r  r  )r   r  r  r  r   r   r   r  U	  s
   
zID2ParquetPathVisitor.listr  r   r  r  c                 C  sP   |j | _| jd | }| j  |j| _| jd | }| j  || S )Nzkey_value.keyzkey_value.value)r  r  r   r  r  r  r;  r   r   r   r  ^	  s   

zID2ParquetPathVisitor.mapr  r   c                 C  s   t | jd| jdgS )Nr   )r  rH  )rG  r  r   r   r  r   r   r   r  n	  r  zID2ParquetPathVisitor.primitiveNrL  )r  r\   r  rK  r   rL  )r  r   r  rM  r   rL  )r   r   r  rK  r   rL  )r  r}   r  rK  r   rL  )r  r   r  rK  r  rK  r   rL  )r  r   r   rL  r@  r   r   r   r   rJ  =	  s   
 




	
	rJ  dict[str, int]c                 C  s&   i }t | t D ]}|j||j< q|S )ax  
    Compute the mapping of parquet column path to Iceberg ID.

    For each column, the parquet file metadata has a path_in_schema attribute that follows
    a specific naming scheme for nested columns. This function computes a mapping of
    the full paths to the corresponding Iceberg IDs.

    Args:
        schema (pyiceberg.schema.Schema): The current table schema.
    )ra   rJ  r  rH  )r  r  pairr   r   r   parquet_path_to_id_mappingr	  s   rP  c                   @  sd   e Zd ZU ded< ded< ded< ded< ded< ded	< d
ed< dddZdddZdddZdS ) DataFileStatisticsr   record_countzdict[int, int]column_sizesvalue_countsnull_value_countsnan_value_countszdict[int, StatsAggregator]column_aggregatesz	list[int]split_offsetsr9  rU   r  r\   r   r   c                 C  s   |j | jvrd S ||j }|j}|js td|j d|j ||j}|t|| j|j  j	|d}|t|| j|j  j
|d}||krTtd|j d|d||S )NzUCannot infer partition value from parquet metadata for a non-linear Partition Field: z with transform )r9  r   r  ztCannot infer partition value from parquet metadata as there are more than one partition values for Partition Field: z. lower_value=z, upper_value=)	source_idrW  r  r3  preserves_orderr   r  r  rY   r  r  )r   r9  r  source_fieldiceberg_transformtransform_funclower_valueupper_valuer   r   r   _partition_value	  sL   z#DataFileStatistics._partition_valuer*  rX   rr   c                   s   t  fdd|jD  S )Nc                   s   g | ]} | qS r   )r`  r  r  r   r   r   rN  	  rO  z0DataFileStatistics.partition.<locals>.<listcomp>)rr   r  )r   r*  r  r   ra  r   r4  	  r#  zDataFileStatistics.partitionrx  c              	   C  sn   i }i }| j  D ]\}}| }|d ur|||< | }|d ur%|||< q	| j| j| j| j| j||| j	dS )N)rR  rS  rT  rU  rV  lower_boundsupper_boundsrX  )
rW  r`  r  r  rR  rS  rT  rU  rV  rX  )r   rb  rc  rZ  agg_min_maxr   r   r   to_serialized_dict	  s&   z%DataFileStatistics.to_serialized_dictN)r9  rU   r  r\   r   r   )r*  rX   r  r\   r   rr   r~  )r   r   r   r  r`  r4  rg  r   r   r   r   rQ  	  s   
 

%rQ  parquet_metadatapq.FileMetaDatastats_columnsparquet_column_mappingc                 C  s  i }i }g }i }i }i }t  }	t| jD ]}
| |
}|dj}|dj}|djr8||k r8|| n|| t| j	D ]}||}||j
 }|| }||d ||  |j7  < |jttjkrjqB||d|j ||< |jr%z|j}|jr||d|j ||< |jttjkrW qB||vrzt|j|j|jj||< W n ty } zt| d|j d|d}~ww t|jtr|jdkr|jj }|j!dur|| "t#|j!|nd |j$dur|| %t#|j$|nd n|| "|j& || %|j' W qB t(j)j*y$ } z|	+| t,-| W Y d}~qBd}~ww |	+| t,-d| qBq|.  |	D ]}|/|d |/|d q8t0| j1||||||dS )aF  
    Compute and return DataFileStatistics that includes the following.

    - record_count
    - column_sizes
    - value_counts
    - null_value_counts
    - nan_value_counts
    - column_aggregates
    - split_offsets

    Args:
        parquet_metadata (pyarrow.parquet.FileMetaData): A pyarrow metadata object.
        stats_columns (Dict[int, StatisticsCollector]): The statistics gathering plan. It is required to
            set the mode for column metrics collection
        parquet_column_mapping (Dict[str, int]): The mapping of the parquet file name to the field ID
    r   z for column ''Nr  z:PyArrow statistics missing for column %d when writing file)rR  rS  rT  rU  rV  rW  rX  )2r6  r  num_row_groups	row_grouprp  data_page_offsetdictionary_page_offsethas_dictionary_pager  num_columnspath_in_schema
setdefaulttotal_compressed_sizer  r  r  r  r  
num_valuesis_stats_set
statisticshas_null_count
null_countr  r  r  physical_typer  r   r&  rt  rw   r  min_rawr  r   max_rawr	  r  r  rR  libArrowNotImplementedErrorr9  r   r   sortr  rQ  rN  )rh  rj  rk  rS  rT  rX  rU  rV  col_aggsinvalidate_colrrn  data_offsetdictionary_offsetrl  rp  r  rD  rx  r   r  r   r   r   *data_file_statistics_from_parquet_metadata	  s   







7r  ru  rj   Iterator[WriteTask]Iterator[DataFile]c                   sr   ddl m m} tjtj|j|jdtj	jdd fd	d
}t
 }|||}t|S )Nr   rf   r  property_namedefault)table_locationrA  r<  r   r   rR   c                   sN    }t| }|kr|n|t pd  fddjD }tj|}j	dj
d}|}|jdd,}tj|f|j dd}|j|d	 W d    n1 saw   Y  W d    n1 spw   Y  t|jjtjtd
}	tjdtj|tjj
rj
jnt t|d jd d d	|	  }
|
S )NFc              	     s    g | ]}t j| d dqS )T)r  rU  r]  r  r  )rR  r  r  r  rU  r<  r   r   rN  Z
  s    z5write_file.<locals>.write_parquet.<locals>.<listcomp>parquet)data_file_namepartition_keyT)r   )r  store_decimal_as_integer)row_group_sizerh  rj  rk  	contentrg  r  r4  file_size_in_bytessort_order_idr  equality_idskey_metadatar   )!r  rd   r   rx  record_batchesr  rO  rP  new_data_locationgenerate_data_file_filenamer  rq  r  pqParquetWriterwriter  writerr  rE  r  rP  rR   	from_argsrS   DATArT   r]  r4  rr   r   default_spec_idrg  )r<  r)  sanitized_schemar\  arrow_tablerg  fofosr  rx  re  rg   rd  location_providerparquet_writer_kwargsr  ru  r  r   write_parquetP
  s\   


z!write_file.<locals>.write_parquet)r<  r   r   rR   )r-  rg   rh   _get_parquet_writer_kwargsr  r   PARQUET_ROW_GROUP_LIMITPARQUET_ROW_GROUP_LIMIT_DEFAULTri   r   r   rh  r  iter)rd  ru  rb  rh   r  rk  
data_filesr   r  r   
write_fileE
  s   
7r  tblr~  target_file_sizeIterator[list[pa.RecordBatch]]c                 C  sT   ddl m} | j| j }tdt|| }| j|d}|||t|dd dd}|S )	Nr   )PackingIteratorr  )max_chunksizec                 S  s   | j S r   )nbytes)xr   r   r   rQ  
  s    z&bin_pack_arrow_table.<locals>.<lambda>F)r`  target_weightlookbackweight_funclargest_bin_first)pyiceberg.utils.bin_packingr  r  rN  r  r   rL  r   )r  r  r  avg_row_size_bytestarget_rows_per_filer\  bin_packed_record_batchesr   r   r   bin_pack_arrow_table
  s   r  provided_schemars  c              
   C  s   | j }z
t||||d}W n/ ty< } z#t|||d}t|j t| j  }tddt| d|d}~ww t	| | dS )z
    Check if the `requested_schema` is compatible with `provided_schema`.

    Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type.

    Raises:
        ValueError: If the schemas are not compatible.
    r  r  r  r  z%PyArrow table contains more columns: z, z4. Update the schema first (hint, use union_by_name).N)
r  r  r   r  r6  _name_to_idr  r   rS  r_   )r  r  r  r  r  r   additional_namesr   r   r    _check_pyarrow_schema_compatible
  s*   
	r  
file_pathsIterator[str]c                 c  s$    |D ]}t | ||d}|V  qd S )N)rd  ru  rg  )parquet_file_to_data_file)rd  ru  r  rg  re  r   r   r   parquet_files_to_data_files
  s
   r  rg  c           
      C  s   |  |}| }t|}W d    n1 sw   Y  |j }| }t|||jd t|t	||j
t|d}tjdtj|tj|| | t|d |jd d d	| }	|	S )N)r  r  r  r   )ro  r   r  read_metadatar  to_arrow_schemar  r  r  rE  r  rP  rR   r  rS   r  rT   r]  r4  specr   r  rg  )
rd  ru  rg  r   input_streamrh  r  r  rx  re  r   r   r   r  
  s4   




r  uncompressedr  rq   c              	   C  s   ddl m} |j|j|j dfD ]}t| | }r&tjd| ddd q| 	|j
|j}t| |j|jd}|tkr>t}||t| |j|jdt| |j|jdt| |j|jdd	S )
Nr   r,  z.*zParquet writer option(s) z not implementedr   r   r  )compressioncompression_leveldata_page_sizedictionary_pagesize_limitwrite_batch_size)r-  rh   PARQUET_ROW_GROUP_SIZE_BYTESPARQUET_BLOOM_FILTER_MAX_BYTES*PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIXfnmatchro  r   r   r  PARQUET_COMPRESSIONPARQUET_COMPRESSION_DEFAULTr   PARQUET_COMPRESSION_LEVEL!PARQUET_COMPRESSION_LEVEL_DEFAULTICEBERG_UNCOMPRESSED_CODECPYARROW_UNCOMPRESSED_CODECPARQUET_PAGE_SIZE_BYTESPARQUET_PAGE_SIZE_BYTES_DEFAULTPARQUET_DICT_SIZE_BYTESPARQUET_DICT_SIZE_BYTES_DEFAULTPARQUET_PAGE_ROW_LIMITPARQUET_PAGE_ROW_LIMIT_DEFAULT)rA  rh   key_patternunsupported_keyscompression_codecr  r   r   r   r  
  sD   
r  df
write_uuiduuid.UUID | Nonecounteritertools.count[int] | NoneIterable[DataFile]c           
      #  s    ddl m}m}m  ptdpt t| j	|j
|jd|  j}t |p.d}t|j||| jd|   rXt||  fddt|D dE d	H  d	S t|  |  |d
}	t||  fdd|	D dE d	H  d	S )zConvert a PyArrow table into a DataFile.

    Returns:
        An iterable that supplies datafiles that represent the table.
    r   )rg   rh   r   r  Fr  c                 3  s$    | ]} t |d V  qdS ))r  task_idr  r  N)r  )rL  r\  )r   r  task_schemar  r   r   r  4  s
    
z+_dataframe_to_data_files.<locals>.<genexpr>)rd  ru  rb  N)r  r  r  c              	   3  s:    | ]}t |jD ]} t||jd V  q
qdS ))r  r  r  r  r  N)r  arrow_table_partitionr  r  )rL  r4  r\  r   r  r  r  r  r   r   r  >  s    
	)r-  rg   rh   r   r  countr  uuid4r   r  WRITE_TARGET_FILE_SIZE_BYTES$WRITE_TARGET_FILE_SIZE_BYTES_DEFAULTr  r  r   rx  r  r  r  is_unpartitionedr  r  _determine_partitions)
ru  r  rd  r  r  rg   rh   r  r  
partitionsr   r  r   _dataframe_to_data_files  sB   
	r  c                   @  rF  )_TablePartitionrW   r  r~  r  Nr'  r   r   r   r   r  L  rI  r  r  rX   r  Iterable[_TablePartition]c                 #  s   dd | j D }t| j |ddD ].\}}||j}||j}|du r-td|j t||}|||j	|j
|}q|||g }	|	 D ]; t fddt| j |ddD | |d}
|ttj fd	dt| j |ddD }||}t|
| d
V  qOdS )a  Based on the iceberg table partition spec, filter the arrow table into partitions with their keys.

    Example:
    Input:
    An arrow table with partition key of ['n_legs', 'year'] and with data of
    {'year': [2020, 2022, 2022, 2021, 2022, 2022, 2022, 2019, 2021],
     'n_legs': [2, 2, 2, 4, 4, 4, 4, 5, 100],
     'animal': ["Flamingo", "Parrot", "Parrot", "Dog", "Horse", "Horse", "Horse","Brittle stars", "Centipede"]}.
    The algorithm:
    - We determine the set of unique partition keys
    - Then we produce a set of partitions by filtering on each of the combinations
    - We combine the chunks to create a copy to avoid GIL congestion on the original table
    c                 S  s   g | ]}d |j  qS )_partition_rG  r  r   r   r   rN  b  rO  z)_determine_partitions.<locals>.<listcomp>Tr  Nz)Could not find column name for field ID: c                   s    g | ]\}}t | | d qS ))r   r   )rV   )rL  r   r  unique_partitionr   r   rN  p  s    )field_valuesr*  r  c                   s<   g | ]\}} | d urt | | knt | qS r   )r  r   r  )rL  r   partition_field_namer  r   r   rN  z  s    )r  r  )r  r  r  rY  r  r   _get_field_from_arrow_tableappend_columnr3  pyarrow_transformr  selectgroup_by	aggregate	to_pylistrW   ro  	functoolsreduceoperatorand_drop_columnsr  rQ  )r  r  r  partition_fieldsr4  r  r[  full_field_namer  unique_partition_fieldsr  filtered_tabler   r  r   r  R  s@   




r  r  c                 C  s<   || j v r	| | S |d}| |d  }t||dd S )a  Get a field from an Arrow table, supporting both literal field names and nested field paths.

    This function handles two cases:
    1. Literal field names that may contain dots (e.g., "some.id")
    2. Nested field paths using dot notation (e.g., "bar.baz" for nested access)

    Args:
        arrow_table: The Arrow table containing the field
        field_path: Field name or dot-separated path

    Returns:
        The field as a PyArrow Array

    Raises:
        KeyError: If the field path cannot be resolved
    r   r   r  N)column_namesr   r  struct_field)r  r  r   r  r   r   r   r    s
   

r  )r   r   r   r   )r   r   r   r   )
r  r  r  r  r  r   r  rT   r   r  )r   r   r  r{   r   r  r   )rI  r   r  r  r   r  )r  rT   r   rx  r   rW  )rd  rM   re  rR   r   rf  )r  r  r  r   r  r   r   r  )
r  r  r  r  r  r   r  rs   r   r\   )r  r  r  r   r  rs   r   r\   )r  r  r   r  )r  r  r  r  r   r   )r  r  r  r  r   r   )r  r  r  r  r   r   )r  r  r  r  r   r   )r  r  r  r  r   r   )r  r  r  r  r   r   )r  r   r  r  r   r   )r  r  r  r  r   r   )r   r   r   r  )r  rR   r(  r\   r)  r\   r*  r+  r,  r-  r   r.  )rd  rM   r<  r   r=  r   r(  r\   r)  r\   r>  r-  r  r?  r@  r   r  r  r*  r+  r  rs   r  rA  r   rB  )rd  rM   rb  rc  r   rd  )r  r\   rU  r\   r]  r  r  r   r  r   r8  r.  rJ  r   r   r  )r  r   r   r   )r  r   r   r  )r  r\   rA  r)  r   rB  )r  r\   r   rN  )rh  ri  rj  rB  rk  rN  r   rQ  )rd  rM   ru  rj   rb  r  r   r  )r  r~  r  r   r   r  )
r  r\   r  r  r  r   r  rs   r   rs  )rd  rM   ru  rj   r  r  r   r  )rd  rM   ru  rj   rg  r   r   rR   )rA  rq   r   rx  )NN)ru  rj   r  r~  rd  rM   r  r  r  r  r   r  )r  rX   r  r\   r  r~  r   r  )r  r~  r  r   r   r  (0  r   
__future__r   builtinsr  r  r   r  loggingr
  r   r  r  r   abcr   r   collections.abcr   r   r   r   dataclassesr	   enumr
   r   r   typingr   r   r   r   r   urllib.parser   rR  r  pyarrow.computecomputer  pyarrow.datasetdatasetr^  pyarrow.libpyarrow.parquetr  r  r   pyarrow._s3fsr   r   r   r   r   pyiceberg.conversionsr   pyiceberg.exceptionsr   pyiceberg.expressionsr   r   r   r   r   r   r    pyiceberg.expressions.literalsr!   pyiceberg.expressions.visitorsr"   r#   r$   r%   r&   rJ  pyiceberg.ior'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   pyiceberg.manifestrR   rS   rT   pyiceberg.partitioningrU   rV   rW   rX   rY   pyiceberg.schemarZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   r-  rg   rh   pyiceberg.table.locationsri   pyiceberg.table.metadatarj   pyiceberg.table.name_mappingrk   rl   pyiceberg.table.puffinrm   pyiceberg.transformsrn   ro   pyiceberg.typedefrp   rq   rr   rs   pyiceberg.typesrt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   pyiceberg.utils.concurrentr   pyiceberg.utils.configr   pyiceberg.utils.datetimer   pyiceberg.utils.decimalr   pyiceberg.utils.propertiesr   r   r   pyiceberg.utils.singletonr   pyiceberg.utils.truncater   r   r   r   	getLoggerr   r   r  rm  ICEBERG_SCHEMAr  r  r  r  r  r  r  DOCr  r   r   r   	Exceptionr   r   LocalFileSystemr   r   r  r]  r  DataTyper  r  
Expressionr  r1  rM  rV  rc  r  r  r  r  r  r  r  r  registerr  FixedSizeListTypeLargeListTypeDictionaryTypeFieldr  r  r   r  r  r  r  r  r;  ra  rm  rn  rR  Arrayr  r  r  r   r  r  r  DEFAULT_TRUNCATION_LENGTHr!  r  r  r$  r%  r  r(  rE  rG  rJ  rP  rQ  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   <module>   sp  $-<h

   3	l_y






	
2 "Z !& $&
DKL5HwH#-:<