o
    i3                     @  sV  U d Z ddlmZ ddlZddlZddlZddlmZmZ ddl	m
Z
 ddlmZ ddlmZmZ ddlmZ dd	lmZmZ eeZd
ZdZdZdZdZdZdZdZdZ dZ!dZ"dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,dZ-d Z.d!Z/d"Z0d#Z1d$Z2d%Z3d&Z4d'Z5d(Z6d)Z7d*Z8d+Z9d,Z:d-Z;d.Z<d/Z=d0Z>d1Z?d2Z@d3ZAd4ZBd5ZCd6ZDd7ZEd8ZFd9ZGd:ZHd;ZId<ZJd=ZKd>ZLd?ZMd@ZNdAZOdBZPdCZQeG dDdE dEeZReG dFdG dGeZSG dHdI dIeZTG dJdK dKeZUG dLdM dMeZVdNZWdOZXdPZYdQZZeYeZgeYeZgeYeZgeYgeYgeYeZgeYgeYgeZeYgeZeYgeZeYgeZeYgeZgdRZ[dSe\dT< ddd[d\Z]d]Z^ded_d`Z_edfdfdbdcZ`dS )ga  Base FileIO classes for implementing reading and writing table files.

The FileIO abstraction includes a subset of full filesystem implementations. Specifically,
Iceberg needs to read or write a file at a given location (as a seekable stream), as well
as check if a file exists. An implementation of the FileIO abstract base class is responsible
for returning an InputFile instance, an OutputFile instance, and deleting a file given
its location.
    )annotationsN)ABCabstractmethod)SEEK_SET)TracebackType)Protocolruntime_checkable)urlparse)
EMPTY_DICT
Propertieszclient.profile-namezclient.regionzclient.access-key-idzclient.secret-access-keyzclient.session-tokenzclient.role-arnzclient.role-session-namezs3.profile-namezs3.anonymouszs3.endpointzs3.access-key-idzs3.secret-access-keyzs3.session-tokenz	s3.regionzs3.resolve-regionzs3.proxy-urizs3.connect-timeoutzs3.request-timeoutz	s3.signerzs3.signer.urizs3.signer.endpointzv1/aws/s3/signzs3.role-arnzs3.role-session-namezs3.force-virtual-addressingzs3.retry-strategy-implz	hdfs.hostz	hdfs.portz	hdfs.userzhdfs.kerberos_ticketzadls.connection-stringzadls.credentialzadls.account-namezadls.account-keyzadls.sas-tokenzadls.tenant-idzadls.client-idzadls.client-secretzadls.account-hostzadls.blob-storage-authorityzadls.dfs-storage-authorityzadls.blob-storage-schemezadls.dfs-storage-schemez
adls.tokenz	adls.anonzgcs.oauth2.tokenzgcs.oauth2.token-expires-atzgcs.project-idz
gcs.accesszgcs.consistencyzgcs.cache-timeoutzgcs.requester-payszgcs.session-kwargszgcs.service.hostzgcs.default-bucket-locationzgcs.version-awarezhf.endpointzhf.tokenc                   @  sf   e Zd ZdZeddddZeefdddZed ddZed!ddZ	d"ddZ
ed#ddZdS )$InputStreama  A protocol for the file-like object returned by InputFile.open(...).

    This outlines the minimally required methods for a seekable input stream returned from an InputFile
    implementation's `open(...)` method. These methods are a subset of IOBase/RawIOBase.
    r   sizeintreturnbytesc                 C     d S N )selfr   r   r   S/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/pyiceberg/io/__init__.pyreadp      zInputStream.readoffsetwhencec                 C  r   r   r   )r   r   r   r   r   r   seeks   r   zInputStream.seekc                 C  r   r   r   r   r   r   r   tellv   r   zInputStream.tellNonec                 C  r   r   r   r   r   r   r   closey   r   zInputStream.closec                 C     dS )zCProvide setup when opening an InputStream using a 'with' statement.Nr   r   r   r   r   	__enter__|       zInputStream.__enter__exctypetype[BaseException] | NoneexcinstBaseException | NoneexctbTracebackType | Nonec                 C  r   z=Perform cleanup when exiting the scope of a 'with' statement.Nr   r   r"   r$   r&   r   r   r   __exit__   r!   zInputStream.__exit__N)r   )r   r   r   r   )r   r   r   r   r   r   r   r   r   r   )r   r   r"   r#   r$   r%   r&   r'   r   r   )__name__
__module____qualname____doc__r   r   r   r   r   r   r    r*   r   r   r   r   r   h   s    
r   c                   @  sH   e Zd ZdZedddZedd	d
ZedddZedddZdS )OutputStreama  A protocol for the file-like object returned by OutputFile.create(...).

    This outlines the minimally required methods for a writable output stream returned from an OutputFile
    implementation's `create(...)` method. These methods are a subset of IOBase/RawIOBase.
    br   r   r   c                 C  r   r   r   )r   r3   r   r   r   write   r   zOutputStream.writer   c                 C  r   r   r   r   r   r   r   r      r   zOutputStream.closec                 C  r   )zDProvide setup when opening an OutputStream using a 'with' statement.Nr   r   r   r   r   r       r!   zOutputStream.__enter__r"   r#   r$   r%   r&   r'   c                 C  r   r(   r   r)   r   r   r   r*      r!   zOutputStream.__exit__N)r3   r   r   r   r,   )r   r2   r-   )	r.   r/   r0   r1   r   r4   r   r    r*   r   r   r   r   r2      s    r2   c                   @  sT   e Zd ZdZdddZeddd	Zedd
dZedddZ	eddddZ
dS )	InputFilea  A base class for InputFile implementations.

    Args:
        location (str): A URI or a path to a local file.

    Attributes:
        location (str): The URI or path to a local file for an InputFile instance.
        exists (bool): Whether the file exists or not.
    locationstrc                 C  
   || _ d S r   	_locationr   r6   r   r   r   __init__      
zInputFile.__init__r   r   c                 C  r   z.Return the total length of the file, in bytes.Nr   r   r   r   r   __len__   r!   zInputFile.__len__c                 C     | j S )z/The fully-qualified location of the input file.r9   r   r   r   r   r6         zInputFile.locationboolc                 C  r   zCheck whether the location exists.

        Raises:
            PermissionError: If the file at self.location cannot be accessed due to a permission error.
        Nr   r   r   r   r   exists   r!   zInputFile.existsTseekabler   c                 C  r   )a  Return an object that matches the InputStream protocol.

        Args:
            seekable: If the stream should support seek, or if it is consumed sequential.

        Returns:
            InputStream: An object that matches the InputStream protocol.

        Raises:
            PermissionError: If the file at self.location cannot be accessed due to a permission error.
            FileNotFoundError: If the file at self.location does not exist.
        Nr   )r   rE   r   r   r   open   r!   zInputFile.openNr6   r7   r+   r   r7   r   rB   )T)rE   rB   r   r   )r.   r/   r0   r1   r<   r   r?   propertyr6   rD   rF   r   r   r   r   r5      s    

r5   c                   @  sb   e Zd ZdZdddZeddd	Zedd
dZedddZ	edddZ
eddddZdS )
OutputFilea  A base class for OutputFile implementations.

    Args:
        location (str): A URI or a path to a local file.

    Attributes:
        location (str): The URI or path to a local file for an OutputFile instance.
        exists (bool): Whether the file exists or not.
    r6   r7   c                 C  r8   r   r9   r;   r   r   r   r<      r=   zOutputFile.__init__r   r   c                 C  r   r>   r   r   r   r   r   r?      r!   zOutputFile.__len__c                 C  r@   )z0The fully-qualified location of the output file.r9   r   r   r   r   r6      rA   zOutputFile.locationrB   c                 C  r   rC   r   r   r   r   r   rD      r!   zOutputFile.existsr5   c                 C  r   )z9Return an InputFile for the location of this output file.Nr   r   r   r   r   to_input_file   r!   zOutputFile.to_input_fileF	overwriter2   c                 C  r   )a   Return an object that matches the OutputStream protocol.

        Args:
            overwrite (bool): If the file already exists at `self.location`
                and `overwrite` is False a FileExistsError should be raised.

        Returns:
            OutputStream: An object that matches the OutputStream protocol.

        Raises:
            PermissionError: If the file at self.location cannot be accessed due to a permission error.
            FileExistsError: If the file at self.location already exists and `overwrite=False`.
        Nr   )r   rM   r   r   r   create   r!   zOutputFile.createNrG   r+   rH   rI   )r   r5   )F)rM   rB   r   r2   )r.   r/   r0   r1   r<   r   r?   rJ   r6   rD   rL   rN   r   r   r   r   rK      s    

rK   c                   @  sR   e Zd ZU dZded< efdddZedd
dZedddZ	edddZ
dS )FileIOz(A base class for FileIO implementations.r   
propertiesc                 C  r8   r   )rP   )r   rP   r   r   r   r<     r=   zFileIO.__init__r6   r7   r   r5   c                 C  r   )zGet an InputFile instance to read bytes from the file at the given location.

        Args:
            location (str): A URI or a path to a local file.
        Nr   r;   r   r   r   	new_input  r!   zFileIO.new_inputrK   c                 C  r   )zGet an OutputFile instance to write bytes to the file at the given location.

        Args:
            location (str): A URI or a path to a local file.
        Nr   r;   r   r   r   
new_output  r!   zFileIO.new_outputstr | InputFile | OutputFiler   c                 C  r   )a  Delete the file at the given path.

        Args:
            location (Union[str, InputFile, OutputFile]): A URI or a path to a local file--if an InputFile instance or
                an OutputFile instance is provided, the location attribute for that instance is used as the URI to delete.

        Raises:
            PermissionError: If the file at location cannot be accessed due to a permission error.
            FileNotFoundError: When the file at the provided location does not exist.
        Nr   r;   r   r   r   delete  r!   zFileIO.deleteN)rP   r   )r6   r7   r   r5   )r6   r7   r   rK   )r6   rS   r   r   )r.   r/   r0   r1   __annotations__r
   r<   r   rQ   rR   rT   r   r   r   r   rO      s   
 rO   r6   	warehousez"pyiceberg.io.pyarrow.PyArrowFileIOz pyiceberg.io.fsspec.FsspecFileIO)s3s3as3nossgsfilehdfsviewfsabfsabfsswasbwasbshfzdict[str, list[str]]SCHEMA_TO_FILE_IOio_implr7   rP   r   r   FileIO | Nonec                 C  s   z/|  d}t|dk rtd|  d|d d |d }}t|}t||}||W S  tyG   tj	d|  t
tjd Y d S w )N.   z;py-io-impl should be full path (module.CustomFileIO), got: Could not initialize FileIO: )exc_info)splitlen
ValueErrorjoin	importlibimport_modulegetattrModuleNotFoundErrorloggerwarningisEnabledForloggingDEBUG)re   rP   
path_partsmodule_name
class_namemoduleclass_r   r   r   _import_file_io=  s   



r~   z
py-io-implpathc                 C  s\   t | }|jr,t|j }r!|D ]}t|| }r|  S qd S tjd|j dd d S )Nz-No preferred file implementation for scheme: rh   )
stacklevel)r	   schemerd   getr~   warningswarn)r   rP   
parsed_urlfile_iosfile_io_pathfile_ior   r   r   _infer_file_io_from_schemeN  s   r   
str | Nonec              
   C  s   |  t }rt||  }rtd| |S td| |r(t||  }r(|S |  t }r8t||  }r8|S ztd ddlm	} || W S  t
yY } zt
d|d }~ww )NzLoaded FileIO: %srj   zDefaulting to PyArrow FileIOr   )PyArrowFileIOzCould not load a FileIO, please consider installing one: pip3 install "pyiceberg[pyarrow]", for more options refer to the docs.)r   
PY_IO_IMPLr~   rt   inforn   r   	WAREHOUSEpyiceberg.io.pyarrowr   rs   )rP   r6   re   r   warehouse_locationr   er   r   r   load_file_ioZ  s.   

r   )re   r7   rP   r   r   rf   )r   r7   rP   r   r   rf   )rP   r   r6   r   r   rO   )ar1   
__future__r   rp   rw   r   abcr   r   ior   typesr   typingr   r   urllib.parser	   pyiceberg.typedefr
   r   	getLoggerr.   rt   AWS_PROFILE_NAME
AWS_REGIONAWS_ACCESS_KEY_IDAWS_SECRET_ACCESS_KEYAWS_SESSION_TOKENAWS_ROLE_ARNAWS_ROLE_SESSION_NAMES3_PROFILE_NAMES3_ANONYMOUSS3_ENDPOINTS3_ACCESS_KEY_IDS3_SECRET_ACCESS_KEYS3_SESSION_TOKEN	S3_REGIONS3_RESOLVE_REGIONS3_PROXY_URIS3_CONNECT_TIMEOUTS3_REQUEST_TIMEOUT	S3_SIGNERS3_SIGNER_URIS3_SIGNER_ENDPOINTS3_SIGNER_ENDPOINT_DEFAULTS3_ROLE_ARNS3_ROLE_SESSION_NAMES3_FORCE_VIRTUAL_ADDRESSINGS3_RETRY_STRATEGY_IMPL	HDFS_HOST	HDFS_PORT	HDFS_USERHDFS_KERB_TICKETADLS_CONNECTION_STRINGADLS_CREDENTIALADLS_ACCOUNT_NAMEADLS_ACCOUNT_KEYADLS_SAS_TOKENADLS_TENANT_IDADLS_CLIENT_IDADLS_CLIENT_SECRETADLS_ACCOUNT_HOSTADLS_BLOB_STORAGE_AUTHORITYADLS_DFS_STORAGE_AUTHORITYADLS_BLOB_STORAGE_SCHEMEADLS_DFS_STORAGE_SCHEME
ADLS_TOKEN	ADLS_ANON	GCS_TOKENGCS_TOKEN_EXPIRES_AT_MSGCS_PROJECT_ID
GCS_ACCESSGCS_CONSISTENCYGCS_CACHE_TIMEOUTGCS_REQUESTER_PAYSGCS_SESSION_KWARGSGCS_SERVICE_HOSTGCS_DEFAULT_LOCATIONGCS_VERSION_AWAREHF_ENDPOINTHF_TOKENr   r2   r5   rK   rO   LOCATIONr   ARROW_FILE_IOFSSPEC_FILE_IOrd   rU   r~   r   r   r   r   r   r   r   <module>   s   	
/4&

