o
    ̳i!                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZ d d	lmZ e eZg d
Ze	jde	jde	jdiZe	jh de	jddhe	jh diZ	d.ddddededB dedB dedB def
ddZ		d/dededede e eB dB ddf
dd Z!dddddd!d"e	dededededB d#ed$e
dB d%edefd&d'Z"dddd(d"e	dededededB d#edefd)d*Z#d"e	d+eddfd,d-Z$dS )0    N)Any)ApiBlobType)registry)KaggleDatasetAdapterPolarsFrameType)create_dataset_or_version)normalize_patternsupload_files_and_directories)parse_dataset_handle)EXTRA_CONSOLE_BLOCK)z.git/z*/.git/z.cache/z.huggingface/zhf-datasetszpandas-datasetszpolars-datasets>   	hf_kwargs	sql_querypandas_kwargsr   r   >   r   polars_kwargspolars_frame_typeFforce_download
output_dirhandlepathr   r   returnc                C   s@   t | }tjd|  di td tj||||d\}}|S )a  Download dataset files
    Args:
        handle: (string) the dataset handle
        path: (string) Optional path to a file within a dataset
        force_download: (bool) Optional flag to force download a dataset, even if it's cached or already in output_dir.
        output_dir: (string) Optional output directory for direct download, bypassing the default cache.
    Returns:
        A string requesting the path to the requested dataset files.
    zDownloading Dataset:  ...)extrar   )r
   loggerinfoto_urlr   r   dataset_resolver)r   r   r   r   hresolved_path_ r    F/home/ubuntu/.local/lib/python3.10/site-packages/kagglehub/datasets.pydataset_download"   s   
r"    local_dataset_dirversion_notesignore_patternsc                 C   sZ   t | }td|  d | rd}t|t|tjt	t
|dd}t||| dS )a  Upload dataset files.
    Args:
        handle: (string) the dataset handle.
        local_dataset_dir: (string) path to a file in a local directory.
        version_notes: (string) Optional to write dataset versions.
        ignore_patterns (str or list[str], optional):
            Additional ignore patterns to DEFAULT_IGNORE_PATTERNS.
            Files matching any of the patterns are not uploaded.
            Patterns are standard wildcards that can be matched by
            https://docs.python.org/3/library/fnmatch.html.
            Use a pattern ending with "/" to ignore the whole dir,
            e.g., ".git/" is equivalent to ".git/*".
    zUploading Dataset r   z1The dataset handle should not include the version)default
additional)	item_typer&   N)r
   r   r   r   is_versioned
ValueErrorr	   r   DATASETr   DEFAULT_IGNORE_PATTERNSr   )r   r$   r%   r&   r   is_versioned_exceptiontokensr    r    r!   dataset_upload=   s   
r0   r   r   r   r   r   adapterr   r   r   c                C   s   t | |||||d |dur|ntj}zG| tju r)ddl}|jj|||||dW S | tju r=ddl	}|j
j||||dW S | tju rRddl}|jj|||||dW S |  d}	t|	 tys   t|  }
d|
 d	|
 d
}t|dw )a  Load a Kaggle Dataset into a python object based on the selected adapter

    Args:
        adapter: (KaggleDatasetAdapter) The adapter used to load the dataset
        handle: (string) The dataset handle
        path: (string) Path to a file within the dataset
        pandas_kwargs:
            (dict) Optional set of kwargs to pass to the pandas `read_*` method while constructing the DataFrame(s)
        sql_query:
            (string) Argument to be used for SQLite files. Required when reading a SQLite file. See pandas documentation
            for details: https://pandas.pydata.org/docs/reference/api/pandas.read_sql_query.html
        hf_kwargs:
            (dict) Optional set of kwargs to pass to Dataset.from_pandas() while constructing the Dataset
        polars_frame_type: (PolarsFrameType) Optional control for which Frame to return: LazyFrame or DataFrame. The
            default is PolarsFrameType.LAZY_FRAME.
        polars_kwargs:
            (dict) Optional set of kwargs to pass to the polars `read_*` method while constructing the DataFrame(s)
    Returns:
        A python object based on the selected adapter:
            - 'pandas': A DataFrame (or dict[int | str, DataFrame] for Excel-like files with multiple sheets)
            - 'hugging_face': A Huggingface Dataset (via pandas)
            - 'polars':
                A LazyFrame or DataFrame (or dict[int | str, LazyFrame] / dict[int | str, DataFrame] for Excel-like
                files with multiple sheets)
    r1   Nr   r   r   r   )r   r   )r   r   r   z is not yet implementedz*The 'dataset_load' function requires the 'z3' extras. Install them with 'pip install kagglehub[z]')validate_dataset_load_argsr   
LAZY_FRAMEr   HUGGING_FACEkagglehub.hf_datasetshf_datasetsload_hf_datasetPANDASkagglehub.pandas_datasetspandas_datasetsload_pandas_datasetPOLARSkagglehub.polars_datasetspolars_datasetsload_polars_datasetNotImplementedErrorImportError.DATASET_LOAD_ADAPTER_OPTIONAL_DEPENDENCIES_MAP)r2   r   r   r   r   r   r   r   	kagglehubnot_implemented_error_messageadapter_optional_dependencyimport_warning_messager    r    r!   dataset_load_   sN   %





rI   r3   c                C   s$   t jdtdd t| |||||dS )NzaUse dataset_load() instead of load_dataset(). load_dataset() will be removed in a future version.   )
stacklevelr3   )warningswarnDeprecationWarningrI   )r2   r   r   r   r   r   r    r    r!   load_dataset   s   
rO   kwargsc                 K   sl   t |  }g }| D ]\}}||vr|d ur|| q
t|dkr$d S d|}t| d|  d d S )Nr   z, z are invalid for z and will be ignored)_DATASET_LOAD_VALID_KWARGS_MAPitemsappendlenjoinr   warning)r2   rP   valid_kwargsinvalid_kwargs_listkeyvalueinvalid_kwargsr    r    r!   r4      s   

r4   )N)r#   N)%loggingrL   typingr   &kagglesdk.blobs.types.blob_api_servicer   rE   r   kagglehub.datasets_enumsr   r   kagglehub.datasets_helpersr   kagglehub.gcs_uploadr   r	   kagglehub.handler
   kagglehub.loggerr   	getLogger__name__r   r-   r6   r:   r>   rD   rQ   strboolr"   listr0   rI   rO   r4   r    r    r    r!   <module>   s    



	

(	

X	
