o
    ˳iݛ                     @  s  U d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZ d dlmZmZmZmZmZ d d	lmZmZ d d
lmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0m1Z1 d dl2m3Z3m4Z4 d dl5m6Z6m7Z7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z=m>Z> d dl?m@Z@mAZA d dlBmCZC erd dlDZEeFeGZHe@ ZIdZJdZKdZLdZMdZNdZOdZPdZQdZRdZSd ZTd!ZUd"ZVd#ZWd$ZXd%ZYeZd&ej[Z\G d'd( d(eZ]d`d/d0Z^d`d1d2Z_d`d3d4Z`d`d5d6Zad`d7d8Zbd`d9d:Zcd`d;d<Zde]jee^e]jfe_e]jge`e]jheae]jiebe]jjece]jkediZld=emd>< dadBdCZndbdcdFdGZodddIdJZpdedQdRZqdfdUdVZrdgdYdZZseG d[d\ d\ZtG d]d. d.eZuG d^d_ d_eueZvdS )h    )annotationsN)ABCabstractmethod)Callable)	dataclass)Enum)TYPE_CHECKINGAnycast)NamespaceAlreadyExistsErrorNoSuchNamespaceErrorNoSuchTableErrorNotInstalledErrorTableAlreadyExistsError)FileIOload_file_io)ManifestFile)UNPARTITIONED_PARTITION_SPECPartitionSpec)Schema)ToOutputFile)$DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITECommitTableResponseCreateTableTransactionStagedTableTableTableProperties)load_location_provider)TableMetadataTableMetadataV1new_table_metadata)UNSORTED_SORT_ORDER	SortOrder)TableRequirementTableUpdateupdate_table_metadata)
EMPTY_DICT
Identifier
PropertiesRecursiveDictTableVersion)Configmerge_config)property_as_booltokentypezpy-catalog-impliceberg
table_type	warehousemetadata_locationprevious_metadata_locationmanifestzmanifest listzprevious metadatametadataurilocationEXTERNAL_TABLEbotocore_sessionz
    (\d+)              # version number
    -                  # separator
    ([\w-]{36})        # UUID (36 characters, including hyphens)
    (?:\.\w+)?         # optional codec name
    \.metadata\.json   # file extension
    c                   @  s(   e Zd ZdZdZdZdZdZdZdZ	dS )	CatalogTyperesthivegluedynamodbsqlz	in-memorybigqueryN)
__name__
__module____qualname__RESTHIVEGLUEDYNAMODBSQL	IN_MEMORYBIGQUERY rL   rL   N/home/ubuntu/.local/lib/python3.10/site-packages/pyiceberg/catalog/__init__.pyr;   l   s    r;   namestrconfr(   returnCatalogc                 C  s   ddl m} || fi |S )Nr   )RestCatalog)pyiceberg.catalog.restrS   )rN   rP   rS   rL   rL   rM   	load_restv   s   rU   c              
   C  B   zddl m} || fi |W S  ty  } ztd|d }~ww )Nr   )HiveCatalogz@Apache Hive support not installed: pip install 'pyiceberg[hive]')pyiceberg.catalog.hiverW   ImportErrorr   )rN   rP   rW   excrL   rL   rM   	load_hive|      
r[   c              
   C  rV   )Nr   )GlueCatalogz=AWS glue support not installed: pip install 'pyiceberg[glue]')pyiceberg.catalog.gluer]   rY   r   )rN   rP   r]   rZ   rL   rL   rM   	load_glue   r\   r_   c              
   C  rV   )Nr   )DynamoDbCatalogzEAWS DynamoDB support not installed: pip install 'pyiceberg[dynamodb]')pyiceberg.catalog.dynamodbr`   rY   r   )rN   rP   r`   rZ   rL   rL   rM   load_dynamodb   r\   rb   c              
   C  rV   )Nr   )
SqlCatalogznSQLAlchemy support not installed: pip install 'pyiceberg[sql-postgres]' or pip install 'pyiceberg[sql-sqlite]')pyiceberg.catalog.sqlrc   rY   r   )rN   rP   rc   rZ   rL   rL   rM   load_sql   s   re   c              
   C  rV   )Nr   )InMemoryCatalogzESQLAlchemy support not installed: pip install 'pyiceberg[sql-sqlite]')pyiceberg.catalog.memoryrf   rY   r   )rN   rP   rf   rZ   rL   rL   rM   load_in_memory   r\   rh   c              
   C  rV   )Nr   )BigQueryMetastoreCatalogzABigQuery support not installed: pip install 'pyiceberg[bigquery]')$pyiceberg.catalog.bigquery_metastoreri   rY   r   )rN   rP   ri   rZ   rL   rL   rM   load_bigquery   r\   rk   z7dict[CatalogType, Callable[[str, Properties], Catalog]]AVAILABLE_CATALOGScatalog_propertiesr)   CatalogType | Nonec                 C  s|   | t }r4t|tr+|drtjS |drtjS |dr$tjS t	d| t	dt
| t	d|   d)a=  Try to infer the type based on the dict.

    Args:
        name: Name of the catalog.
        catalog_properties: Catalog properties.

    Returns:
        The inferred type based on the provided properties.

    Raises:
        ValueError: Raises a ValueError in case properties are missing, or the wrong type.
    httpthrift)sqlite
postgresqlz/Could not infer the catalog type from the uri: z%Expects the URI to be a string, got: z_URI missing, please provide using --uri, the config or environment variable PYICEBERG_CATALOG____URI)getURI
isinstancerO   
startswithr;   rE   rF   rI   
ValueErrorr/   upper)rN   rm   r7   rL   rL   rM   infer_catalog_type   s   



rz   
str | None
propertiesc                 K  s   | du rt  } t | }t|pi tt|}|t}|t }rF|r/t	d| d| t
| || }r?td| |S t	d| d}|rVt|trVt| }n|s]t| |}|rmt| | ttttf |S t	d| )a  Load the catalog based on the properties.

    Will look up the properties from the config, based on the name.

    Args:
        name: The name of the catalog.
        properties: The properties that are used next to the configuration.

    Returns:
        An initialized Catalog.

    Raises:
        ValueError: Raises a ValueError in case properties are missing or malformed,
            or if it could not determine the catalog based on the properties.
    NzRMust not set both catalog type and py-catalog-impl configurations, but found type z and py-catalog-impl zLoaded Catalog: %sCould not initialize Catalog: z<Could not initialize catalog with the following properties: )_ENV_CONFIGget_default_catalog_nameget_catalog_configr,   r
   r)   rt   TYPEPY_CATALOG_IMPLrx   _import_catalogloggerinforv   rO   r;   lowerrz   rl   dict)rN   r|   envrP   provided_catalog_typecatalog_implcatalogcatalog_typerL   rL   rM   load_catalog   s4   


r   	list[str]c                   C  s   t  S N)r~   get_known_catalogsrL   rL   rL   rM   list_catalogs  s   r   ior   files_to_deleteset[str]	file_typeNonec              	   C  sP   |D ]#}z|  | W q ty%   tjd| d| ttjd Y qw dS )zDelete files.

    Log warnings if failing to delete any file.

    Args:
        io: The FileIO used to delete the object.
        files_to_delete: A set of file paths to be deleted.
        file_type: The type of the file.
    zFailed to delete z file exc_infoN)deleteOSErrorr   warningisEnabledForloggingDEBUG)r   r   r   filerL   rL   rM   delete_files  s   
&r   manifests_to_deletelist[ManifestFile]c              
   C  s   i }|D ]9}|j | ddD ]/}|jj}||ds<z| | W n ty7   tjd| tt	j
d Y nw d||< qqdS )zDelete data files linked to given manifests.

    Log warnings if failing to delete any file.

    Args:
        io: The FileIO used to delete the object.
        manifests_to_delete: A list of manifest contains paths of data files to be deleted.
    F)discard_deletedzFailed to delete data file r   TN)fetch_manifest_entry	data_file	file_pathrt   r   r   r   r   r   r   r   )r   r   deleted_filesmanifest_fileentrypathrL   rL   rM   delete_data_files$  s   	 r   r   Catalog | Nonec                 C  s   z3| d}t|dk rtd| d|d d |d }}t|}t||}|| fi |W S  tyK   tj	d| t
tjd Y d S w )N.   zApy-catalog-impl should be full path (module.CustomCatalog), got: r}   r   )splitlenrx   join	importlibimport_modulegetattrModuleNotFoundErrorr   r   r   r   r   )rN   r   r|   
path_partsmodule_name
class_namemoduleclass_rL   rL   rM   r   9  s   


r   c                   @  s&   e Zd ZU ded< ded< ded< dS )PropertiesUpdateSummaryr   removedupdatedmissingN)rB   rC   rD   __annotations__rL   rL   rL   rM   r   G  s   
 r   c                   @  s<  e Zd ZU dZded< ded< dddZedeee	fdddZ
edeee	fdddZdeee	fdddZedddZeddd Zedd!d"Zedd$d%Zedd'd(Zedd*d+Zedd,d-Zedd0d1Zedd8d9Zee	fdd:d;Ze	fdd<d=Zedd>d?ZeddAdBZedddDdEZeddFdGZeddHdIZede	fddMdNZeddOdPZeddRdSZ eddTdUZ!eddVdWZ"ee#fddZd[Z$dd\d]Z%ee#fdd^d_Z&ee#fddbdcZ'e	dfddedfZ(ee)j*fddjdkZ+eddpdqZ,ddrdsZ-ddtduZ.dd|d}Z/dd~dZ0dS )rR   aT  Base Catalog for table operations like - create, drop, load, list and others.

    The catalog table APIs accept a table identifier, which is fully classified table name. The identifier can be a string or
    tuple of strings. If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.

    The catalog namespace APIs follow a similar convention wherein they also accept a namespace identifier that can be a string
    or tuple of strings.

    Attributes:
        name (str): Name of the catalog.
        properties (Properties): Catalog properties.
    rO   rN   r(   r|   c                 K  s   || _ || _d S r   )rN   r|   selfrN   r|   rL   rL   rM   __init___  s   
zCatalog.__init__N
identifierstr | IdentifierschemaSchema | pa.Schemar8   r{   partition_specr   
sort_orderr"   rQ   r   c                 C     dS )ab  Create a table.

        Args:
            identifier (str | Identifier): Table identifier.
            schema (Schema): Table's schema.
            location (str | None): Location for the table. Optional Argument.
            partition_spec (PartitionSpec): PartitionSpec for the table.
            sort_order (SortOrder): SortOrder for the table.
            properties (Properties): Table properties that can be a string based dictionary.

        Returns:
            Table: the created table instance.

        Raises:
            TableAlreadyExistsError: If a table with the name already exists.
        NrL   r   r   r   r8   r   r   r|   rL   rL   rM   create_tablec      zCatalog.create_tabler   c                 C  r   )a*  Create a CreateTableTransaction.

        Args:
            identifier (str | Identifier): Table identifier.
            schema (Schema): Table's schema.
            location (str | None): Location for the table. Optional Argument.
            partition_spec (PartitionSpec): PartitionSpec for the table.
            sort_order (SortOrder): SortOrder for the table.
            properties (Properties): Table properties that can be a string based dictionary.

        Returns:
            CreateTableTransaction: createTableTransaction instance.
        NrL   r   rL   rL   rM   create_table_transaction~  r   z Catalog.create_table_transactionc                 C  s4   z|  ||||||W S  ty   | | Y S w )ab  Create a table if it does not exist.

        Args:
            identifier (str | Identifier): Table identifier.
            schema (Schema): Table's schema.
            location (str | None): Location for the table. Optional Argument.
            partition_spec (PartitionSpec): PartitionSpec for the table.
            sort_order (SortOrder): SortOrder for the table.
            properties (Properties): Table properties that can be a string based dictionary.

        Returns:
            Table: the created table instance if the table does not exist, else the existing
            table instance.
        )r   r   
load_tabler   rL   rL   rM   create_table_if_not_exists  s
   z"Catalog.create_table_if_not_existsc                 C  r   )a  Load the table's metadata and returns the table instance.

        You can also use this method to check for table existence using 'try catalog.table() except NoSuchTableError'.
        Note: This method doesn't scan data stored in the table.

        Args:
            identifier (str | Identifier): Table identifier.

        Returns:
            Table: the table instance with its metadata.

        Raises:
            NoSuchTableError: If a table with the name does not exist.
        NrL   r   r   rL   rL   rM   r     r   zCatalog.load_tableboolc                 C  r   )zCheck if a table exists.

        Args:
            identifier (str | Identifier): Table identifier.

        Returns:
            bool: True if the table exists, False otherwise.
        NrL   r   rL   rL   rM   table_exists  r   zCatalog.table_existsc                 C  r   )zCheck if a view exists.

        Args:
            identifier (str | Identifier): View identifier.

        Returns:
            bool: True if the view exists, False otherwise.
        NrL   r   rL   rL   rM   view_exists  r   zCatalog.view_exists	namespacec                 C  r   )Check if a namespace exists.

        Args:
            namespace (str | Identifier): Namespace identifier.

        Returns:
            bool: True if the namespace exists, False otherwise.
        NrL   r   r   rL   rL   rM   namespace_exists  r   zCatalog.namespace_existsr3   c                 C  r   )ai  Register a new table using existing metadata.

        Args:
            identifier (Union[str, Identifier]): Table identifier for the table
            metadata_location (str): The location to the metadata

        Returns:
            Table: The newly registered table

        Raises:
            TableAlreadyExistsError: If the table already exists
        NrL   )r   r   r3   rL   rL   rM   register_table  r   zCatalog.register_tabler   c                 C  r   )zDrop a table.

        Args:
            identifier (str | Identifier): Table identifier.

        Raises:
            NoSuchTableError: If a table with the name does not exist.
        NrL   r   rL   rL   rM   
drop_table  r   zCatalog.drop_tablec                 C  r   )ap  Drop a table and purge all data and metadata files.

        Note: This method only logs warning rather than raise exception when encountering file deletion failure.

        Args:
            identifier (str | Identifier): Table identifier.

        Raises:
            NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
        NrL   r   rL   rL   rM   purge_table  r   zCatalog.purge_tablefrom_identifierto_identifierc                 C  r   )aw  Rename a fully classified table name.

        Args:
            from_identifier (str | Identifier): Existing table identifier.
            to_identifier (str | Identifier): New table identifier.

        Returns:
            Table: the updated table instance with its metadata.

        Raises:
            NoSuchTableError: If a table with the name does not exist.
        NrL   )r   r   r   rL   rL   rM   rename_table  r   zCatalog.rename_tabletablerequirementstuple[TableRequirement, ...]updatestuple[TableUpdate, ...]r   c                 C  r   )ai  Commit updates to a table.

        Args:
            table (Table): The table to be updated.
            requirements: (Tuple[TableRequirement, ...]): Table requirements.
            updates: (Tuple[TableUpdate, ...]): Table updates.

        Returns:
            CommitTableResponse: The updated metadata.

        Raises:
            NoSuchTableError: If a table with the given identifier does not exist.
            CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
            CommitStateUnknownException: Failed due to an internal exception on the side of the catalog.
        NrL   )r   r   r   r   rL   rL   rM   commit_table  r   zCatalog.commit_tablec                 C  r   )aG  Create a namespace in the catalog.

        Args:
            namespace (str | Identifier): Namespace identifier.
            properties (Properties): A string dictionary of properties for the given namespace.

        Raises:
            NamespaceAlreadyExistsError: If a namespace with the given name already exists.
        NrL   r   r   r|   rL   rL   rM   create_namespace.  r   zCatalog.create_namespacec                 C  s(   z	|  || W dS  ty   Y dS w )zCreate a namespace if it does not exist.

        Args:
            namespace (str | Identifier): Namespace identifier.
            properties (Properties): A string dictionary of properties for the given namespace.
        N)r   r   r   rL   rL   rM   create_namespace_if_not_exists:  s
   z&Catalog.create_namespace_if_not_existsc                 C  r   )a  Drop a namespace.

        Args:
            namespace (str | Identifier): Namespace identifier.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
            NamespaceNotEmptyError: If the namespace is not empty.
        NrL   r   rL   rL   rM   drop_namespaceF  r   zCatalog.drop_namespacelist[Identifier]c                 C  r   )aH  List tables under the given namespace in the catalog.

        Args:
            namespace (str | Identifier): Namespace identifier to search.

        Returns:
            List[Identifier]: list of table identifiers.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
        NrL   r   rL   rL   rM   list_tablesR  r   zCatalog.list_tablesrL   c                 C  r   )a|  List namespaces from the given namespace. If not given, list top-level namespaces from the catalog.

        Args:
            namespace (str | Identifier): Namespace identifier to search.

        Returns:
            List[Identifier]: a List of namespace identifiers.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
        NrL   r   rL   rL   rM   list_namespaces`  r   zCatalog.list_namespacesc                 C  r   )aG  List views under the given namespace in the catalog.

        Args:
            namespace (str | Identifier): Namespace identifier to search.

        Returns:
            List[Identifier]: list of table identifiers.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
        NrL   r   rL   rL   rM   
list_viewsn  r   zCatalog.list_viewsc                 C  r   )a+  Get properties for a namespace.

        Args:
            namespace (str | Identifier): Namespace identifier.

        Returns:
            Properties: Properties for the given namespace.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
        NrL   r   rL   rL   rM   load_namespace_properties|  r   z!Catalog.load_namespace_propertiesremovalsset[str] | Noner   c                 C  r   )a   Remove provided property keys and updates properties for a namespace.

        Args:
            namespace (str | Identifier): Namespace identifier.
            removals (Set[str]): Set of property keys that need to be removed. Optional Argument.
            updates (Properties): Properties to be updated for the given namespace.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
            ValueError: If removals and updates have overlapping keys.
        NrL   )r   r   r   r   rL   rL   rM   update_namespace_properties  r   z#Catalog.update_namespace_propertiesc                 C  r   )zDrop a view.

        Args:
            identifier (str | Identifier): View identifier.

        Raises:
            NoSuchViewError: If a view with the given name does not exist.
        NrL   r   rL   rL   rM   	drop_view  r   zCatalog.drop_viewr'   c                 C  s   t | tr| S tt| dS )aA  Parse an identifier to a tuple.

        If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.

        Args:
            identifier (str | Identifier): an identifier, either a string or tuple of strings.

        Returns:
            Identifier: a tuple of strings.
        r   )rv   tuplerO   r   r   rL   rL   rM   identifier_to_tuple  s   zCatalog.identifier_to_tuplec                 C  s   t | d S )zExtract table name from a table identifier.

        Args:
            identifier (str | Identifier): a table identifier.

        Returns:
            str: Table name.
        r   rR   r   r   rL   rL   rM   table_name_from  s   
zCatalog.table_name_fromc                 C  s   t | dd S )zExtract table namespace from a table identifier.

        Args:
            identifier (Union[str, Identifier]): a table identifier.

        Returns:
            Identifier: Namespace identifier.
        Nr   r   r   rL   rL   rM   namespace_from  s   
zCatalog.namespace_fromerr-type[ValueError] | type[NoSuchNamespaceError]c                 C  sL   t | }t|dk r|dtdd |D r|dddd |D S )aI  Transform a namespace identifier into a string.

        Args:
            identifier (Union[str, Identifier]): a namespace identifier.
            err (Union[Type[ValueError], Type[NoSuchNamespaceError]]): the error type to raise when identifier is empty.

        Returns:
            Identifier: Namespace identifier.
           zEmpty namespace identifierc                 s  s    | ]	}|  d kV  qdS ) Nstrip.0segmentrL   rL   rM   	<genexpr>  s    z.Catalog.namespace_to_string.<locals>.<genexpr>zPNamespace identifier contains an empty segment or a segment with only whitespacer   c                 s  s    | ]}|  V  qd S r   r   r   rL   rL   rM   r    s    )rR   r   r   anyr   r   r   tuple_identifierrL   rL   rM   namespace_to_string  s   
zCatalog.namespace_to_stringc                 C  r   )z8Check if the catalog supports server-side scan planning.FrL   r   rL   rL   rM   supports_server_side_planning  s   z%Catalog.supports_server_side_planningc                 C  s,   t | }t|dkr|d|  |d S )Nr   z=Invalid database, hierarchical namespaces are not supported: r   rR   r   r   r  rL   rL   rM   identifier_to_database  s   
zCatalog.identifier_to_databaseFtype[ValueError] | type[NoSuchTableError] | type[NoSuchNamespaceError]tuple[str, str]c                 C  s4   t | }t|dkr|d|  |d |d fS )Nr   z9Invalid path, hierarchical namespaces are not supported: r   r   r  r  rL   rL   rM    identifier_to_database_and_table  s   
z(Catalog.identifier_to_database_and_tabler   c                 C  s   t i | j||S r   )r   r|   )r   r|   r8   rL   rL   rM   _load_file_io  s   zCatalog._load_file_ioformat_versionr*   r   c                 C  s   t | tr| S z(dd l}ddlm}m} t tpd}t | |jr.|| |||d} | W S W n	 t	y8   Y nw t
dt| d)Nr   )_ConvertToIcebergWithoutIDsvisit_pyarrowF)downcast_ns_timestamp_to_usr  ztype(schema)=z:, but it must be pyiceberg.schema.Schema or pyarrow.Schema)rv   r   pyarrowpyiceberg.io.pyarrowr  r  r+   get_boolr   r   rx   r/   )r   r  par  r  r  rL   rL   rM   _convert_schema_if_needed  s&   
z!Catalog._convert_schema_if_neededr   baser   r6   c                 C  sT   t |jtjtj}|r(dd |jD }dd |jD }|| t| |t dS dS )z0Delete oldest metadata if config is set to true.c                 S     h | ]}|j qS rL   metadata_filer   logrL   rL   rM   	<setcomp>      z/Catalog._delete_old_metadata.<locals>.<setcomp>c                 S  r  rL   r  r  rL   rL   rM   r    r  N)	r-   r|   r   $METADATA_DELETE_AFTER_COMMIT_ENABLED,METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULTmetadata_logdifference_updater   METADATA)r   r  r6   delete_after_commitremoved_previous_metadata_filescurrent_metadata_filesrL   rL   rM   _delete_old_metadata  s   
zCatalog._delete_old_metadatac                 C  r   )a4  Close the catalog and release any resources.

        This method should be called when the catalog is no longer needed to ensure
        proper cleanup of resources like database connections, file handles, etc.

        Default implementation does nothing. Override in subclasses that need cleanup.
        NrL   r  rL   rL   rM   close#  r   zCatalog.closec                 C  s   | S )z`Enter the context manager.

        Returns:
            Catalog: The catalog instance.
        rL   r  rL   rL   rM   	__enter__,  s   zCatalog.__enter__exc_typetype | Noneexc_valBaseException | Noneexc_tb
Any | Nonec                 C  s   |    dS )a  Exit the context manager and close the catalog.

        Args:
            exc_type: Exception type if an exception occurred.
            exc_val: Exception value if an exception occurred.
            exc_tb: Exception traceback if an exception occurred.
        N)r(  )r   r*  r,  r.  rL   rL   rM   __exit__4  s   zCatalog.__exit__c                 C  s   | j  d| j dS )z6Return the string representation of the Catalog class.z ())rN   	__class__r  rL   rL   rM   __repr__>  s   zCatalog.__repr__rN   rO   r|   rO   )r   r   r   r   r8   r{   r   r   r   r"   r|   r(   rQ   r   r   r   r   r   r8   r{   r   r   r   r"   r|   r(   rQ   r   )r   r   rQ   r   r   r   rQ   r   r   r   rQ   r   )r   r   r3   rO   rQ   r   r   r   rQ   r   )r   r   r   r   rQ   r   )r   r   r   r   r   r   rQ   r   )r   r   r|   r(   rQ   r   )r   r   rQ   r   )r   r   rQ   r   )rL   )r   r   rQ   r(   )r   r   r   r   r   r(   rQ   r   )r   r   rQ   r'   )r   r   rQ   rO   )r   r   r   r   rQ   rO   )rQ   r   )r   r   r   r
  rQ   r  )r|   r(   r8   r{   rQ   r   )r   r   r  r*   rQ   r   )r   r   r  r   r6   r   rQ   r   )rQ   r   )rQ   rR   )r*  r+  r,  r-  r.  r/  rQ   r   )rQ   rO   )1rB   rC   rD   __doc__r   r   r   r   r!   r&   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   staticmethodr   r   r   rx   r  r  r	  r  r  r   DEFAULT_FORMAT_VERSIONr  r'  r(  r)  r0  r3  rL   rL   rL   rM   rR   N  s   
 






	


	

c                      s   e Zd ZdI fddZdeeefdJddZdKddZdLddZ	dMddZ
deeefdNd d!ZdOd*d+ZdPd0d1ZdQd4d5ZdRd6d7ZdRd8d9ZedSd?d@ZedTdCdDZedUdEdFZedVdGdHZ  ZS )WMetastoreCatalogrN   rO   r|   c                   s   t  j|fi | d S r   )superr   r   r2  rL   rM   r   D  s   zMetastoreCatalog.__init__Nr   r   r   r   r8   r{   r   r   r   r"   r(   rQ   r   c              	   C  s   t | ||||||S r   )r   _create_staged_tabler   rL   rL   rM   r   G  s   	z)MetastoreCatalog.create_table_transactionr   c                 C  &   z|  | W dS  ty   Y dS w )NTF)r   r   r   rL   rL   rM   r   T  s   
zMetastoreCatalog.table_existsr   c                 C  r@  )r   TF)r   r   r   rL   rL   rM   r   [  s   	
z!MetastoreCatalog.namespace_existsr   c           
      C  s   |  |}| | t| j|j}|j}t }g }|jD ]}|||7 }|	|j
 qdd |D }dd |jD }	t|| t||t t||t t||	t t||jht d S )Nc                 S  r  rL   )manifest_path)r   r5   rL   rL   rM   r  u  r  z/MetastoreCatalog.purge_table.<locals>.<setcomp>c                 S  r  rL   r  r  rL   rL   rM   r  v  r  )r   r   r   r|   r3   r6   set	snapshots	manifestsaddmanifest_listr!  r   r   MANIFESTMANIFEST_LISTPREVIOUS_METADATAr#  )
r   r   r   r   r6   manifest_lists_to_deleter   snapshotmanifest_paths_to_deleteprev_metadata_filesrL   rL   rM   r   j  s    



zMetastoreCatalog.purge_tabler   c                 C  s   |  |t|tjtj}| |\}}| |||}t||}	|		 }
t
|||||d}| j||
d}t||f||
|| dS )aM  Create a table and return the table instance without committing the changes.

        Args:
            identifier (str | Identifier): Table identifier.
            schema (Schema): Table's schema.
            location (str | None): Location for the table. Optional Argument.
            partition_spec (PartitionSpec): PartitionSpec for the table.
            sort_order (SortOrder): SortOrder for the table.
            properties (Properties): Table properties that can be a string based dictionary.

        Returns:
            StagedTable: the created staged table instance.
        )r8   r   r   r   r|   r|   r8   r   r6   r3   r   r   )r  intrt   r   FORMAT_VERSIONr;  r  _resolve_table_locationr    new_table_metadata_file_locationr    r  r   )r   r   r   r8   r   r   r|   database_name
table_nameproviderr3   r6   r   rL   rL   rM   r?  ~  s&   

z%MetastoreCatalog._create_staged_tablecurrent_tableTable | Nonetable_identifierr'   r   r   r   r   c           
      C  s   |D ]}| |r|jnd  qt|r|jn|  ||d u |r!|jnd d}|r/| |jd nd}t|j|j}|	|}	t
|||	| j|j|	d| dS )N)base_metadatar   enforce_validationr3   r   r   rN  rO  )validater6   r%   _empty_table_metadatar3   _parse_metadata_versionr   r8   r|   rS  r   r  )
r   rW  rY  r   r   requirementupdated_metadatanew_metadata_versionrV  new_metadata_locationrL   rL   rM   _update_and_stage_table  s$   
z(MetastoreCatalog._update_and_stage_tablecurrent_propertiesr   r   *tuple[PropertiesUpdateSummary, Properties]c                 C  s   | j ||d t|}t }t }|r&|D ]}||v r%|| || q|r:| D ]\}}|||< || q,|p>t |}	tt|pGg t|pLg t|	d}
|
|fS )N)r   r   )r   r   r   )	_check_for_overlapr   rB  poprE  items
differencer   list)r   rd  r   r   updated_propertiesr   r   keyvalueexpected_to_changeproperties_update_summaryrL   rL   rM   %_get_updated_props_and_update_summary  s&   

z6MetastoreCatalog._get_updated_props_and_update_summaryrT  rU  c                 C  s   |s|  ||S |dS )N/)_get_default_warehouse_locationrstrip)r   r8   rT  rU  rL   rL   rM   rR    s   
z(MetastoreCatalog._resolve_table_locationc                 C  sf   |  |}|t }r|d}| d| S | jt }r/|d}| d| d| S td)zeReturn the default warehouse location using the convention of `warehousePath/databaseName/tableName`.rq  GNo default path is set, please specify a location when creating a tabler   rt   LOCATIONrs  r|   WAREHOUSE_LOCATIONrx   r   rT  rU  database_propertiesdatabase_locationwarehouse_pathrL   rL   rM   rr       


z0MetastoreCatalog._get_default_warehouse_locationc                 C  sf   |  |}|t }r|d}| d| S | jt }r/|d}| d| d| S td)zqReturn the default warehouse location following the Hive convention of `warehousePath/databaseName.db/tableName`.rq  z.db/rt  ru  rx  rL   rL   rM   "_get_hive_style_warehouse_location  r|  z3MetastoreCatalog._get_hive_style_warehouse_locationr6   r   r   r   metadata_pathc                 C  s   t | || d S r   )r   table_metadata
new_output)r6   r   r~  rL   rL   rM   _write_metadata   s   z MetastoreCatalog._write_metadatar3   rP  c                 C  sX   |  dd }t| }r*z
t|d W n
 ty"   Y dS w t|dS dS )a  Parse the version from the metadata location.

        The version is the first part of the file name, before the first dash.
        For example, the version of the metadata file
        `s3://bucket/db/tb/metadata/00001-6c97e413-d51b-4538-ac70-12fe2a85cb83.metadata.json`
        is 1.
        If the path does not comply with the pattern, the version is defaulted to be -1, ensuring
        that the next metadata file is treated as having version 0.

        Args:
            metadata_location (str): The location of the metadata file.

        Returns:
            int: The version of the metadata file. -1 if the file name does not have valid version string
        rq  r   r   r   )r   TABLE_METADATA_FILE_NAME_REGEX	fullmatchuuidUUIDgrouprx   rP  )r3   	file_namefile_name_matchrL   rL   rM   r^    s   z(MetastoreCatalog._parse_metadata_versionc                 C  s:   |r| rt | t | @ }|rtd| d S d S d S )Nz%Updates and deletes have an overlap: )rB  keysrx   )r   r   overlaprL   rL   rM   rf    s   z#MetastoreCatalog._check_for_overlapc                   C  s   t jdt dS )ao  Return an empty TableMetadata instance.

        It is used to build a TableMetadata from a sequence of initial TableUpdates.
        It is a V1 TableMetadata because there will be a UpgradeFormatVersionUpdate in
        initial changes to bump the metadata to the target version.

        Returns:
            TableMetadata: An empty TableMetadata instance.
        r   )last_column_idr   )r   model_constructr   rL   rL   rL   rM   r]  &  s   z&MetastoreCatalog._empty_table_metadatar4  r5  r6  r7  r8  )r   r   r   r   r8   r{   r   r   r   r"   r|   r(   rQ   r   )
rW  rX  rY  r'   r   r   r   r   rQ   r   )rd  r(   r   r   r   r(   rQ   re  )r8   r{   rT  rO   rU  rO   rQ   rO   )rT  rO   rU  rO   rQ   rO   )r6   r   r   r   r~  rO   rQ   r   )r3   rO   rQ   rP  )r   r   r   r(   rQ   r   )rQ   r   )rB   rC   rD   r   r   r!   r&   r   r   r   r   r?  rc  rp  rR  rr  r}  r:  r  r^  rf  r]  __classcell__rL   rL   r>  rM   r<  C  s8    



,



r<  )rN   rO   rP   r(   rQ   rR   )rN   rO   rm   r)   rQ   rn   r   )rN   r{   r|   r{   rQ   rR   )rQ   r   )r   r   r   r   r   rO   rQ   r   )r   r   r   r   rQ   r   )rN   rO   r   rO   r|   r(   rQ   r   )w
__future__r   r   r   rer  abcr   r   collections.abcr   dataclassesr   enumr   typingr   r	   r
   pyiceberg.exceptionsr   r   r   r   r   pyiceberg.ior   r   pyiceberg.manifestr   pyiceberg.partitioningr   r   pyiceberg.schemar   pyiceberg.serializersr   pyiceberg.tabler   r   r   r   r   r   pyiceberg.table.locationsr   pyiceberg.table.metadatar   r   r    pyiceberg.table.sortingr!   r"   pyiceberg.table.updater#   r$   r%   pyiceberg.typedefr&   r'   r(   r)   r*   pyiceberg.utils.configr+   r,   pyiceberg.utils.propertiesr-   r  r  	getLoggerrB   r   r~   TOKENr   r   ICEBERG
TABLE_TYPErw  METADATA_LOCATIONPREVIOUS_METADATA_LOCATIONrG  rH  rI  r#  ru   rv  r9   BOTOCORE_SESSIONcompileXr  r;   rU   r[   r_   rb   re   rh   rk   rE   rF   rG   rH   rI   rJ   rK   rl   r   rz   r   r   r   r   r   r   rR   r<  rL   rL   rL   rM   <module>   s    




	
	
	

	


2


   x