o
    uyi3                     @  sb  U d Z ddlmZ ddlZddlZddlZddlmZmZ ddl	m
Z
 ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ dd	lmZmZ eeZd
ZdZdZdZ dZ!dZ"dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,dZ-dZ.dZ/dZ0dZ1dZ2d Z3d!Z4d"Z5d#Z6d$Z7d%Z8d&Z9d'Z:d(Z;d)Z<d*Z=d+Z>d,Z?d-Z@d.ZAd/ZBd0ZCd1ZDd2ZEd3ZFd4ZGd5ZHd6ZId7ZJd8ZKd9ZLd:ZMd;ZNd<ZOd=ZPd>ZQd?ZRd@ZSdAZTeG dBdC dCeZUeG dDdE dEeZVG dFdG dGeZWG dHdI dIeZXG dJdK dKeZYdLZZdMZ[dNZ\dOZ]e\e]ge\e]ge\e]ge\ge\ge\e]ge\ge\ge]e\ge]e\ge]e\ge]e\ge]gdPZ^dQe_dR< dbdYdZZ`d[Zadcd]d^Zbedfddd`daZcdS )ea  Base FileIO classes for implementing reading and writing table files.

The FileIO abstraction includes a subset of full filesystem implementations. Specifically,
Iceberg needs to read or write a file at a given location (as a seekable stream), as well
as check if a file exists. An implementation of the FileIO abstract base class is responsible
for returning an InputFile instance, an OutputFile instance, and deleting a file given
its location.
    )annotationsN)ABCabstractmethod)SEEK_SET)TracebackType)DictListOptionalProtocolTypeUnionruntime_checkable)urlparse)
EMPTY_DICT
Propertieszclient.regionzclient.access-key-idzclient.secret-access-keyzclient.session-tokenzclient.role-arnzclient.role-session-namezs3.anonymouszs3.endpointzs3.access-key-idzs3.secret-access-keyzs3.session-tokenz	s3.regionzs3.resolve-regionzs3.proxy-urizs3.connect-timeoutzs3.request-timeoutz	s3.signerzs3.signer.urizs3.signer.endpointzv1/aws/s3/signzs3.role-arnzs3.role-session-namezs3.force-virtual-addressingzs3.retry-strategy-implz	hdfs.hostz	hdfs.portz	hdfs.userzhdfs.kerberos_ticketzadls.connection-stringzadls.credentialzadls.account-namezadls.account-keyzadls.sas-tokenzadls.tenant-idzadls.client-idzadls.client-secretzadls.account-hostzadls.blob-storage-authorityzadls.dfs-storage-authorityzadls.blob-storage-schemezadls.dfs-storage-schemez
adls.tokenzgcs.oauth2.tokenzgcs.oauth2.token-expires-atzgcs.project-idz
gcs.accesszgcs.consistencyzgcs.cache-timeoutzgcs.requester-payszgcs.session-kwargszgcs.service.hostzgcs.default-bucket-locationzgcs.version-awarezhf.endpointzhf.tokenzpyarrow.use-large-types-on-readc                   @  sf   e Zd ZdZeddddZeefdddZed ddZed!ddZ	d"ddZ
ed#ddZdS )$InputStreama  A protocol for the file-like object returned by InputFile.open(...).

    This outlines the minimally required methods for a seekable input stream returned from an InputFile
    implementation's `open(...)` method. These methods are a subset of IOBase/RawIOBase.
    r   sizeintreturnbytesc                 C     d S N )selfr   r   r   X/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/pyiceberg/io/__init__.pyreads      zInputStream.readoffsetwhencec                 C  r   r   r   )r   r   r   r   r   r   seekv   r   zInputStream.seekc                 C  r   r   r   r   r   r   r   telly   r   zInputStream.tellNonec                 C  r   r   r   r    r   r   r   close|   r   zInputStream.closec                 C     dS )zCProvide setup when opening an InputStream using a 'with' statement.Nr   r    r   r   r   	__enter__       zInputStream.__enter__exctypeOptional[Type[BaseException]]excinstOptional[BaseException]exctbOptional[TracebackType]c                 C  r$   z=Perform cleanup when exiting the scope of a 'with' statement.Nr   r   r'   r)   r+   r   r   r   __exit__   r&   zInputStream.__exit__N)r   )r   r   r   r   )r   r   r   r   r   r   r   r   r   r"   )r   r   r'   r(   r)   r*   r+   r,   r   r"   )__name__
__module____qualname____doc__r   r   r   r   r!   r#   r%   r/   r   r   r   r   r   k   s    
r   c                   @  sH   e Zd ZdZedddZedd	d
ZedddZedddZdS )OutputStreama  A protocol for the file-like object returned by OutputFile.create(...).

    This outlines the minimally required methods for a writable output stream returned from an OutputFile
    implementation's `create(...)` method. These methods are a subset of IOBase/RawIOBase.
    br   r   r   c                 C  r   r   r   )r   r8   r   r   r   write   r   zOutputStream.writer"   c                 C  r   r   r   r    r   r   r   r#      r   zOutputStream.closec                 C  r$   )zDProvide setup when opening an OutputStream using a 'with' statement.Nr   r    r   r   r   r%      r&   zOutputStream.__enter__r'   r(   r)   r*   r+   r,   c                 C  r$   r-   r   r.   r   r   r   r/      r&   zOutputStream.__exit__N)r8   r   r   r   r1   )r   r7   r2   )	r3   r4   r5   r6   r   r9   r#   r%   r/   r   r   r   r   r7      s    r7   c                   @  sT   e Zd ZdZdddZeddd	Zedd
dZedddZ	eddddZ
dS )	InputFilea  A base class for InputFile implementations.

    Args:
        location (str): A URI or a path to a local file.

    Attributes:
        location (str): The URI or path to a local file for an InputFile instance.
        exists (bool): Whether the file exists or not.
    locationstrc                 C  
   || _ d S r   	_locationr   r;   r   r   r   __init__      
zInputFile.__init__r   r   c                 C  r$   z.Return the total length of the file, in bytes.Nr   r    r   r   r   __len__   r&   zInputFile.__len__c                 C     | j S )z/The fully-qualified location of the input file.r>   r    r   r   r   r;         zInputFile.locationboolc                 C  r$   zCheck whether the location exists.

        Raises:
            PermissionError: If the file at self.location cannot be accessed due to a permission error.
        Nr   r    r   r   r   exists   r&   zInputFile.existsTseekabler   c                 C  r$   )a  Return an object that matches the InputStream protocol.

        Args:
            seekable: If the stream should support seek, or if it is consumed sequential.

        Returns:
            InputStream: An object that matches the InputStream protocol.

        Raises:
            PermissionError: If the file at self.location cannot be accessed due to a permission error.
            FileNotFoundError: If the file at self.location does not exist.
        Nr   )r   rJ   r   r   r   open   r&   zInputFile.openNr;   r<   r0   r   r<   r   rG   )T)rJ   rG   r   r   )r3   r4   r5   r6   rA   r   rD   propertyr;   rI   rK   r   r   r   r   r:      s    

r:   c                   @  sb   e Zd ZdZdddZeddd	Zedd
dZedddZ	edddZ
eddddZdS )
OutputFilea  A base class for OutputFile implementations.

    Args:
        location (str): A URI or a path to a local file.

    Attributes:
        location (str): The URI or path to a local file for an OutputFile instance.
        exists (bool): Whether the file exists or not.
    r;   r<   c                 C  r=   r   r>   r@   r   r   r   rA      rB   zOutputFile.__init__r   r   c                 C  r$   rC   r   r    r   r   r   rD      r&   zOutputFile.__len__c                 C  rE   )z0The fully-qualified location of the output file.r>   r    r   r   r   r;      rF   zOutputFile.locationrG   c                 C  r$   rH   r   r    r   r   r   rI      r&   zOutputFile.existsr:   c                 C  r$   )z9Return an InputFile for the location of this output file.Nr   r    r   r   r   to_input_file   r&   zOutputFile.to_input_fileF	overwriter7   c                 C  r$   )a   Return an object that matches the OutputStream protocol.

        Args:
            overwrite (bool): If the file already exists at `self.location`
                and `overwrite` is False a FileExistsError should be raised.

        Returns:
            OutputStream: An object that matches the OutputStream protocol.

        Raises:
            PermissionError: If the file at self.location cannot be accessed due to a permission error.
            FileExistsError: If the file at self.location already exists and `overwrite=False`.
        Nr   )r   rR   r   r   r   create   r&   zOutputFile.createNrL   r0   rM   rN   )r   r:   )F)rR   rG   r   r7   )r3   r4   r5   r6   rA   r   rD   rO   r;   rI   rQ   rS   r   r   r   r   rP      s    

rP   c                   @  sR   e Zd ZU dZded< efdddZedd
dZedddZ	edddZ
dS )FileIOz(A base class for FileIO implementations.r   
propertiesc                 C  r=   r   )rU   )r   rU   r   r   r   rA   
  rB   zFileIO.__init__r;   r<   r   r:   c                 C  r$   )zGet an InputFile instance to read bytes from the file at the given location.

        Args:
            location (str): A URI or a path to a local file.
        Nr   r@   r   r   r   	new_input  r&   zFileIO.new_inputrP   c                 C  r$   )zGet an OutputFile instance to write bytes to the file at the given location.

        Args:
            location (str): A URI or a path to a local file.
        Nr   r@   r   r   r   
new_output  r&   zFileIO.new_output!Union[str, InputFile, OutputFile]r"   c                 C  r$   )a  Delete the file at the given path.

        Args:
            location (Union[str, InputFile, OutputFile]): A URI or a path to a local file--if an InputFile instance or
                an OutputFile instance is provided, the location attribute for that instance is used as the URI to delete.

        Raises:
            PermissionError: If the file at location cannot be accessed due to a permission error.
            FileNotFoundError: When the file at the provided location does not exist.
        Nr   r@   r   r   r   delete  r&   zFileIO.deleteN)rU   r   )r;   r<   r   r:   )r;   r<   r   rP   )r;   rX   r   r"   )r3   r4   r5   r6   __annotations__r   rA   r   rV   rW   rY   r   r   r   r   rT     s   
 rT   r;   	warehousez"pyiceberg.io.pyarrow.PyArrowFileIOz pyiceberg.io.fsspec.FsspecFileIO)s3s3as3nossgsfilehdfsviewfsabfsabfsswasbwasbshfzDict[str, List[str]]SCHEMA_TO_FILE_IOio_implr<   rU   r   r   Optional[FileIO]c              
   C  s   z/|  d}t|dk rtd|  d|d d |d }}t|}t||}||W S  tyL } ztj	d|  |d W Y d }~d S d }~ww )N.   z;py-io-impl should be full path (module.CustomFileIO), got: Could not initialize FileIO: )exc_info)
splitlen
ValueErrorjoin	importlibimport_modulegetattrModuleNotFoundErrorloggerwarning)rj   rU   
path_partsmodule_name
class_namemoduleclass_excr   r   r   _import_file_ioD  s   



r   z
py-io-implpathc                 C  sX   t | }|jr*t|j }r!|D ]}t|| }r|  S qd S td|j  d S )Nz-No preferred file implementation for scheme: )r   schemeri   getr   warningswarn)r   rU   
parsed_urlfile_iosfile_io_pathfile_ior   r   r   _infer_file_io_from_schemeU  s   r   Optional[str]c              
   C  s   |  t }rt||  }rtd| |S td| |r(t||  }r(|S |  t }r8t||  }r8|S ztd ddlm	} || W S  t
yY } zt
d|d }~ww )NzLoaded FileIO: %sro   zDefaulting to PyArrow FileIOr   )PyArrowFileIOzCould not load a FileIO, please consider installing one: pip3 install "pyiceberg[pyarrow]", for more options refer to the docs.)r   
PY_IO_IMPLr   ry   infors   r   	WAREHOUSEpyiceberg.io.pyarrowr   rx   )rU   r;   rj   r   warehouse_locationr   er   r   r   load_file_ioa  s.   

r   )rj   r<   rU   r   r   rk   )r   r<   rU   r   r   rk   )rU   r   r;   r   r   rT   )dr6   
__future__r   ru   loggingr   abcr   r   ior   typesr   typingr   r   r	   r
   r   r   r   urllib.parser   pyiceberg.typedefr   r   	getLoggerr3   ry   
AWS_REGIONAWS_ACCESS_KEY_IDAWS_SECRET_ACCESS_KEYAWS_SESSION_TOKENAWS_ROLE_ARNAWS_ROLE_SESSION_NAMES3_ANONYMOUSS3_ENDPOINTS3_ACCESS_KEY_IDS3_SECRET_ACCESS_KEYS3_SESSION_TOKEN	S3_REGIONS3_RESOLVE_REGIONS3_PROXY_URIS3_CONNECT_TIMEOUTS3_REQUEST_TIMEOUT	S3_SIGNERS3_SIGNER_URIS3_SIGNER_ENDPOINTS3_SIGNER_ENDPOINT_DEFAULTS3_ROLE_ARNS3_ROLE_SESSION_NAMES3_FORCE_VIRTUAL_ADDRESSINGS3_RETRY_STRATEGY_IMPL	HDFS_HOST	HDFS_PORT	HDFS_USERHDFS_KERB_TICKETADLS_CONNECTION_STRINGADLS_CREDENTIALADLS_ACCOUNT_NAMEADLS_ACCOUNT_KEYADLS_SAS_TOKENADLS_TENANT_IDADLS_CLIENT_IDADLS_CLIENT_SECRETADLS_ACCOUNT_HOSTADLS_BLOB_STORAGE_AUTHORITYADLS_DFS_STORAGE_AUTHORITYADLS_BLOB_STORAGE_SCHEMEADLS_DFS_STORAGE_SCHEME
ADLS_TOKEN	GCS_TOKENGCS_TOKEN_EXPIRES_AT_MSGCS_PROJECT_ID
GCS_ACCESSGCS_CONSISTENCYGCS_CACHE_TIMEOUTGCS_REQUESTER_PAYSGCS_SESSION_KWARGSGCS_SERVICE_HOSTGCS_DEFAULT_LOCATIONGCS_VERSION_AWAREHF_ENDPOINTHF_TOKENPYARROW_USE_LARGE_TYPES_ON_READr   r7   r:   rP   rT   LOCATIONr   ARROW_FILE_IOFSSPEC_FILE_IOri   rZ   r   r   r   r   r   r   r   r   <module>   s   	$	
/4&

