o
    ॵi                     @   s   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZ d dlmZmZ d d	lmZmZmZmZ G d
d deZdS )    N)Union)DatasetBuilder)HubApi)DatasetContextConfig)CsvDatasetBuilderIterableDatasetBuilderTaskSpecificDatasetBuilder)DataDownloadConfig)DataDownloadManagerDataStreamingDownloadManager)META_FILES_FORMATDatasetPathNameDownloadModeMetaDataFieldsc                   @   s<   e Zd ZdZdefddZdeedf fddZd	d
 Z	dS )DataFilesManagerz"The modelscope data-files manager.dataset_context_configc                 C   s$  |j | _ |j| _|j| _|j| _|j| _|jj| _|jj| _|jj| _|j	| _	|j
| _
|j| _|jp4t }|j |_ |j|_|j|_|j|_tj|j| j| j | jtj|_|j	tjk}t||_t||_d|_t }|| j | j| j| _| j|_| jdd|_ ||_|| _!tj"|jdd d S )NFnum_proc   T)exist_ok)#dataset_name	namespaceversionsubset_namesplitdata_meta_configmeta_data_filesmeta_args_mapzip_data_filesdownload_modeuse_streamingconfig_kwargsinput_config_kwargsdownload_configr	   ospathjoincache_root_dirr   DATA_FILES_NAME	cache_dirr   FORCE_REDOWNLOADboolforce_downloadforce_extractuse_etagr   get_dataset_access_config
oss_configgetr   r   makedirs)selfr   r"   is_force_downloadapi r5   g/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/msdatasets/data_files/data_files_manager.py__init__   sB   




zDataFilesManager.__init__returnNc                 C   s   | j r
tj| jdS | jsdS tt| j }tt| j }|du r'i }|r/|	t
jrB|| j || jj_t| jd}|S |rVtj|d tv rVt| jd}|S tdtj|d  d)z Build download manager. )r   NzDataset meta file extensions "z" is not implemented yet)r   r   get_builder_instancer   r   nextitervaluesr   r0   r   ARGS_BIG_DATAupdater!   r   r   r#   r$   splitextr   r   NotImplementedError)r2   meta_data_filemeta_args_map_filebuilderr5   r5   r6   get_data_files_builderB   sB   


z'DataFilesManager.get_data_files_builderc                 C   sZ   | j jrt| j jd}||S | j jj| j j_t| j jd}|j|| j	j
dd | S )z( Fetch the data-files from dataset-hub. )r"   F)
dl_managerr   try_from_hf_gcs)r   r   r   r"   as_streaming_datasetr   r   r
   download_and_preparer   value
as_dataset)r2   rD   rF   r5   r5   r6   fetch_data_filesb   s    
z!DataFilesManager.fetch_data_files)
__name__
__module____qualname____doc__r   r7   r   r   rE   rL   r5   r5   r5   r6   r      s
    + r   )r#   typingr   datasetsr   modelscope.hub.apir   4modelscope.msdatasets.context.dataset_context_configr   .modelscope.msdatasets.download.dataset_builderr   r   r   .modelscope.msdatasets.download.download_configr	   /modelscope.msdatasets.download.download_managerr
   r   modelscope.utils.constantr   r   r   r   objectr   r5   r5   r5   r6   <module>   s   