o
    ̳i                      @   s  U d dl Z d dlZd dlmZ d dlmZ d dlZd dlm	Z	m
Z
 dededejfdd	Zi d
ejdejdejdejdejdejdededededededejdejdejdejdejZeeef ed< ejejejejejdZeeef ed< dddiiZeeeeeeB f f ed < d!Ze	jdd"d#eded$e	d%ededB dejee eB ejf B fd&d'Z!d(ededB d$e	de"ee	f fd)d*Z#d+edededB de$fd,d-Z%d(ed%edefd.d/Z&dS )0    N)Callable)Any)PolarsFrameTypedataset_download	sql_querypathreturnc                 C   s>   t |}t| |}|W  d    S 1 sw   Y  d S N)sqlite3connectplread_database)r   r   conndf r   M/home/ubuntu/.local/lib/python3.10/site-packages/kagglehub/polars_datasets.pywrapped_read_database   s   $r   .csv.tsvz.json.jsonl.parquet.featherz.sqlitez.sqlite3z.dbz.db3z.s3dbz.dl3z.xlsz.xlsxz.xlsbz.xlsmz.ods%SUPPORTED_READ_FUNCTIONS_BY_EXTENSION)r   r   r   r   r   %SUPPORTED_SCAN_FUNCTIONS_BY_EXTENSION	separator	STATIC_KWARGS_BY_EXTENSIONz/Loading from a SQLite file requires a SQL query)polars_frame_typepolars_kwargshandler   r   c             
   C   s   |du ri n|}t j|d }t|||\}}t| |}z|t|||i t||}	W n tyC }
 z
d|
 }t||
d}
~
ww |t	j
u rn|t	ju rnt|	trj|	 D ]\}}t|tjrg| |	|< qW|	S |	 }	|	S )a  Creates polars LazyFrame(s) or DataFrame(s) from a file in the dataset

    Args:
        handle: (string) The dataset handle
        path: (string) Path to a file within the dataset
        polars_frame_type:
            (PolarsFrameType) Optional control for which Frame to return: LazyFrame or DataFrame. The default is
            PolarsFrameType.LAZY_FRAME.

            PolarsFrameType.LAZY_FRAME: We attempt to use a scan_* method if it's available for the provided file
            extension. Otherwise, we use a read_* method to produce a DataFrame and return the result after calling
            .lazy() on it. This satisfies the requested polars_frame_type as a LazyFrame, but does require loading the
            file in memory.

            PolarsFrameType.DATA_FRAME: We use whatever read_* method corresponds to the provided file extension and
            return the resulting DatFrame.
        polars_kwargs:
            (dict) Optional set of kwargs to pass to the polars `read_*` method while constructing the DataFrame(s)
        sql_query:
            (string) Argument to be used for SQLite files. Required when reading a SQLite file. See polars documentation
            for details: https://docs.pola.rs/api/python/stable/reference/api/polars.read_database.html

    Returns:
        - dict[int | str, LazyFrame] or dict[int | str, DataFrame] for Excel-like files with multiple sheets
        - A polars LazyFrame or DataFrame for all others

    Raises:
        ValueError: If the file extension is not supported or the file fails to read
    N   zError reading file: )osr   splitext_validate_io_functionr   _build_args_build_kwargs	Exception
ValueErrorr   
DATA_FRAME
LAZY_FRAME
isinstancedictitemsr   	DataFramelazy)r   r   r   r   r   file_extensionio_functionio_frame_typefilepathresulteread_error_messagekeyvaluer   r   r   load_polars_datasetI   s0   %





r8   r/   c                 C   sr   | t vrd|  ddt   }t|d t |  }|tu r$|s$tt|tju s-| tvr2|tjfS t|  tj	fS )NzUnsupported file extension: 'z"'. Supported file extensions are: z, )
r   joinkeysr'   r   MISSING_SQL_QUERY_ERROR_MESSAGEr   r(   r   r)   )r/   r   r   extension_error_messageread_functionr   r   r   r#      s   

r#   r=   c                 C   s   | t kr|gS ||gS r	   )r   )r=   r   r   r   r   r   r$      s   r$   c                 C   s    | t vri nt |  }i ||S r	   )r   )r/   r   static_kwargsr   r   r   r%      s   r%   )'r!   r
   collections.abcr   typingr   polarsr   kagglehub.datasetsr   r   strr-   r   read_csv	read_jsonread_ndjsonread_parquetread_ipc
read_excelr   r+   __annotations__scan_csvscan_ndjsonscan_parquetscan_ipcr   r   boolr;   r)   intr8   tupler#   listr$   r%   r   r   r   r   <module>   s   
 	
!(

C

