o
    8wi                      @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
Zd dlZd dlZd dlZd dlmZmZ ejjeZdd Zdedee d	ee d
edeej deeeef fddZG dd dejZdS )    N)Path)OptionalUnion)camelcase_to_snakecasefilenames_for_dataset_splitc                 C   s   t |  jS N)r   statst_mtime)cached_directory_path r   b/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/datasets/packaged_modules/cache/cache.py_get_modification_time   s   r   dataset_nameconfig_name	cache_dirconfig_kwargscustom_featuresreturnc              
      s  |s srt |pdj d}nd }tjt|pt jj}| 	d}t
|d |d< d|}tj||} fddttj||pIdddD }	|	sd	d ttj|dddD }	td
d |	D }
td|  |ryd| dnd |
rd|
  d tt|	tdd }|jdd  \}} fddttj|d||D }|st|dkrtd|  dd| d|  d|d  d	|jd }d| d| dtt| d }t| |||fS )!Ndefaultr   r   /___c                    sN   g | ]#}t j|r% s#s#tt|d jddd t|jd kr|qS zdataset_info.jsonzutf-8)encodingr   ospathisdirjsonloadsr   	read_textparts.0r
   r   r   r   
<listcomp>*   s    
	z'_find_hash_in_cache.<locals>.<listcomp>*c                 S   s   g | ]
}t j|r|qS r   )r   r   r   r$   r   r   r   r&   8   s    
c                 S   s   h | ]	}t |jd  qS )r   )r   r#   r$   r   r   r   	<setcomp>>   s    z&_find_hash_in_cache.<locals>.<setcomp>zCouldn't find cache for z for config '' z!
Available configs in the cache: )keyc                    sX   g | ](}t j|r* s#s#tt|d jddd t|jd krt|jd qS r   r   )r%   _cached_directory_pathr   r   r   r&   H   s    
   zThere are multiple 'z' configurations in the cache: z, zR
Please specify which configuration to reload from the cache, e.g.
	load_dataset('z', 'r   z')r   z/Found the latest cached dataset configuration 'z' at z (last modified on z).)datasetsBuilderConfigcreate_config_idr   r   
expanduserstrconfigHF_DATASETS_CACHEsplitr   joinglobsorted
ValueErrorr   r   r#   lentimectimeloggerwarning)r   r   r   r   r   	config_idnamespace_and_dataset_namecached_relative_path#cached_datasets_directory_path_rootcached_directory_pathsavailable_configsr
   versionhashother_configswarning_msgr   r   r   _find_hash_in_cache   sh   




rJ   c                       s   e Zd Z														ddee dee dee dee dee dee d	eej d
eej deee	ef  dee deeee
eejjf  dee dee dee f fddZdejfddZddee fddZdd Zdd Z  ZS )CacheN0.0.0r   r   r   rF   rG   	base_pathinfofeaturestokenrepo_id
data_filesdata_dirstorage_optionswriter_batch_sizec                    s   |
d u r|d u rt d|d ur||d< |d ur||d< |dkr3|dkr3t|
p(|||||d\}}}n|dks;|dkr?tdt j||||||||	|
||d d S )NzArepo_id or dataset_name is required for the Cache dataset builderrR   rS   auto)r   r   r   r   r   z0Pass both hash='auto' and version='auto' instead)r   r   r   rF   rG   rM   rN   rP   rQ   rT   rU   )r:   rJ   NotImplementedErrorsuper__init__)selfr   r   r   rF   rG   rM   rN   rO   rP   rQ   rR   rS   rT   rU   r   	__class__r   r   rY   c   s:   
zCache.__init__r   c                 C   s   t  S r   )r/   DatasetInforZ   r   r   r   _info   s   zCache._info
output_dirc                 O   sR   t j| jstd| j d| j |d ur%|| jkr't| j| d S d S d S )NzCache directory for z doesn't exist at )r   r   existsr   r:   r   shutilcopytree)rZ   r`   argskwargsr   r   r   download_and_prepare   s
   zCache.download_and_preparec                    sL   t  jjtjrt jj }ntd j d j	  fdd|D S )NzMissing splits info for z in cache directory c                    s6   g | ]}t j|jd t j j|jd|jdidqS )filesarrow)r   r6   filetype_suffixshard_lengths)name
gen_kwargs)r/   SplitGeneratorrk   r   r   r   rj   )r%   
split_infor^   r   r   r&      s    z+Cache._split_generators.<locals>.<listcomp>)

isinstancerN   splitsr/   	SplitDictlistvaluesr:   r   r   )rZ   
dl_managersplit_infosr   r^   r   _split_generators   s   
zCache._split_generatorsc           	      c   s    t |D ]W\}}t|dF}z t tj|D ]\}}tj|g}| d| |fV  qW n tyL } zt	d| dt
| d|   d }~ww W d    n1 sWw   Y  qd S )Nrb_zFailed to read file 'z' with error z: )	enumerateopenpaipcopen_streamTablefrom_batchesr:   r>   errortype)	rZ   rg   file_idxfilef	batch_idxrecord_batchpa_tableer   r   r   _generate_tables   s"    zCache._generate_tables)NNNrL   NNNNNNNNNNr   )__name__
__module____qualname__r   r3   r/   r]   Featuresr   boolrr   dictrR   DataFilesDictintrY   r_   rf   rv   r   __classcell__r   r   r[   r   rK   b   s`    	
0rK   )r8   r    r   rb   r<   pathlibr   typingr   r   pyarrowr{   r/   datasets.configdatasets.data_filesdatasets.namingr   r   utilslogging
get_loggerr   r>   r   r3   r   r   tuplerJ   ArrowBasedBuilderrK   r   r   r   r   <module>   s8    
J