o
    .i$                     @   s  d dl Z d dlmZ d dlmZmZ d dlmZmZm	Z	m
Z
mZmZmZ d dlmZ d dlZd dlmZ d dlmZmZ d dlmZmZ d d	lmZ 			dd
edee deeeef  dee def
ddZ		dd
ededee deeeef  def
ddZdddZ dS )    N)chain)OptionalUnion)
CommitInfoCommitOperationAddCommitOperationDeleteDatasetCardDatasetCardDataHfApiHfFileSystem)HfHubHTTPError)DatasetInfosDict)get_dataset_config_namesget_dataset_default_config_name)load_datasetload_dataset_builder)MetadataConfigsrepo_idrevisiontokentrust_remote_codereturnc              	   C   sd  t |   t| |||d}t d| t| |||d}t d| |r-|}|| n|d}t d| t| |||d}|j| |ddd	||d
ud}td |j	|j
}	}
|D ]$}t d| t| |||d}|j| |d| d|	|d td q]t| |	|d |sttjj|d}z|j| dd|d	d W n	 ty   Y nw t d|
  |S )a  Convert Hub [script-based dataset](dataset_script) to Parquet [data-only dataset](repository_structure), so that
    the dataset viewer will be supported.

    This function:
    - makes a copy of the script on the "main" branch into a dedicated branch called "script" (if it does not already exist)
    - creates a pull request to the Hub dataset to convert it to Parquet files (and deletes the script from the main branch)

    If in the future you need to recreate the Parquet files from the "script" branch, pass the `revision="script"` argument.

    Note that you should pass the `trust_remote_code=True` argument only if you trust the remote code to be executed locally on your machine.

    Args:
        repo_id (`str`): ID of the source Hub dataset repository, in the following format: `<user>/<dataset_name>` or
            `<org>/<dataset_name>`.
        revision (`str`, *optional*): Branch of the source Hub dataset repository. Defaults to the `"main"` branch.
        token (`bool` or `str`, *optional*): Authentication token for the Hugging Face Hub.
        trust_remote_code (`bool`, defaults to `False`): Whether you trust the remote code of the Hub script-based
            dataset to be executed locally on your machine. This option should only be set to `True` for repositories
            where you have read the code and which you trust.

            <Changed version="2.20.0">

            `trust_remote_code` defaults to `False` if not specified.

            </Changed>

    Returns:
        `huggingface_hub.CommitInfo`
    )r   r   r   z
configs = zdefault_config = r   z	config = )r   r   zConvert dataset to ParquetzConvert dataset to Parquet.TN)config_namecommit_messagecommit_description	create_prr   set_default   zAdd 'z' config data files)r   r   r   r   )r   r   endpointr   scriptdataset)branch	repo_typer   exist_okz;You can find your PR to convert the dataset to Parquet at: )printr   r   removepopr   push_to_hubtimesleeppr_revisionpr_url_delete_filesr
   datasetsconfigHF_ENDPOINTcreate_branchr   )r   r   r   r   configsdefault_configr/   r!   commit_infor+   r,   api r6   @/home/ubuntu/.local/lib/python3.10/site-packages/datasets/hub.pyconvert_to_parquet   sZ   
#

	
r8   r   c              
   C   s  g }t tjj|d}t| |||dd}t|jj  D ]}||}|j	| kr0|
t|jd qt| }	|	jddrL||	jd v rL|	jd | t|	j}
|
r}|
|d}t }|
| tjj|v rt|tjj |	jtjj< n	|	jtjjd}t|	j}|r||d}t }|| d|v r|d |	jd< n|	jdd}|
ttjjt|	 d ttjj|d}|j| |d	| d
d	| d|d|dd}td|j   |S )a=  Delete a dataset configuration from a [data-only dataset](repository_structure) on the Hub.

    Args:
        repo_id (`str`): ID of the Hub dataset repository, in the following format: `<user>/<dataset_name>` or
            `<org>/<dataset_name>`.
        config_name (`str`): Name of the dataset configuration.
        revision (`str`, *optional*): Branch to delete the configuration from. Defaults to the `"main"` branch.
        token (`bool` or `str`, *optional*): Authentication token for the Hugging Face Hub.

    Returns:
        `huggingface_hub.CommitInfo`
    r   F)r   r   r   )path_in_repoconfig_namesNdataset_info)r9   path_or_fileobjzDelete 'z' configz	' config.r!   T)
operationsr   r   r   r#   r   r   z6You can find your PR to delete the dataset config at: )!r   r.   r/   r0   r   r   
data_filesvaluesresolve_pathr   appendr   r9   r   loaddatagetr&   r   from_dataset_card_datar'   r	   to_dataset_card_dataMETADATA_CONFIGS_FIELDr   r   REPOCARD_FILENAMEstrencoder
   create_commitr%   r,   )r   r   r   r   r=   fsbuilder	data_filedata_file_resolved_pathdataset_cardmetadata_configs_dataset_card_datadataset_infosr5   r4   r6   r6   r7   delete_from_hubi   sZ   







rU   c           
      C   s  |  dd }ttjj|d}|j| dd}|rg }g }g }|D ]3}	|	dv r'q |	| dkr9|j|	| d|dd	 q |	d
krC||	 q |	drN||	 q ||	 q |r`|jd
| d|dd	 |rq|D ]}	|j|	| d|dd	 qd|r|D ]}	|j|	| d|dd	 qud S d S d S )N/r   r!   )r#   >   	README.md.gitattributesz.pyzDelete loading script)r#   r   r   zdataset_infos.jsonz Delete legacy dataset_infos.jsonz$Delete loading script auxiliary filezDelete data file)	splitr
   r.   r/   r0   list_repo_filesdelete_filerA   endswith)

dataset_idr   r   dataset_namehf_api
repo_fileslegacy_json_filepython_filesr>   filenamer6   r6   r7   r-      sn   
&r-   )NNN)NN)!r)   	itertoolsr   typingr   r   huggingface_hubr   r   r   r   r	   r
   r   huggingface_hub.utilsr   datasets.configr.   datasets.infor   datasets.inspectr   r   datasets.loadr   r   datasets.utils.metadatar   rI   boolr8   rU   r-   r6   r6   r6   r7   <module>   sL    $	
U
H