o
    5tiU                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZ ddlmZ ddlZdd	lmZ dd
l m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddl$m'Z' e'(e)Z*dZ+ede,eZ-dideeee,f  de,fddZ.de,de/fddZ0de,de/fddZ1de,de/fddZ2de-de-fddZ3djd"e,d#e,de,fd$d%Z4	dkd"e,d#e,deej5e6f fd&d'Z7dide,d(e,d)ee, de,fd*d+Z8d,e,d-e,de,fd.d/Z9d0e,de,fd1d2Z:did3d4Z;	dide,fd5d6Z<did7eee,e=f  de,fd8d9Z>did:e,d;eee,e/f  de=fd<d=Z?G d>d? d?e@ZAdid@ee, fdAdBZBddejCjDdddCdfdDeeE dEee= dFeejCjD dGeeeF  dHeFdIeGdJeGfdKdLZH		C		MdldNe,d:e,dHeFdIeGdJeGdOeGdej5fdPdQZIdmdRdSZJdmdTdUZK	dndWdXZL	dodej5fdYdZZMdid:e,d;eee,e/f  dee, fd[d\ZN		 		]	 		 	!			dpde,fd^d_ZOd`da ZPdbdc ZQddde ZRdfejSfdgdhZTdS )qz
Utilities for working with the local dataset cache.
This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
Copyright by the AllenNLP authors.
    N)closingcontextmanager)partial)sha256)Path)ListOptionalTypeTypeVarUnion)urlparse)DownloadConfig)ExtractManager)FileLock   )__version__config   )logging.incompleteThf_modules_cachereturnc                 C   s   | dur| nt j} t| } | tjvrEtj|  tj| dd tjtj	| dsEt
tj	| dd	 W d   | S 1 s@w   Y  | S )z
    Add hf_modules_cache to the python path.
    By default hf_modules_cache='~/.cache/huggingface/modules'.
    It can also be set with the environment variable HF_MODULES_CACHE.
    This is used to add modules such as `datasets_modules`
    NTexist_okz__init__.pyw)r   HF_MODULES_CACHEstrsyspathappendosmakedirsexistsjoinopen)r    r&   M/home/ubuntu/.local/lib/python3.10/site-packages/evaluate/utils/file_utils.pyinit_hf_modules)   s   

r(   url_or_filenamec                 C   s   t | }|jdv S )N)httphttpss3gshdfsftp)r   scheme)r)   parsedr&   r&   r'   is_remote_url<   s   
r2   c                 C   s$   t | jdkptjt | jd S )N z:/)r   r0   r!   r   ismountr)   r&   r&   r'   is_local_pathA   s   $r6   c                 C   s   t | jdkotj|  S )Nr3   )r   r0   r!   r   isabsr5   r&   r&   r'   is_relative_pathH   s   r8   r   c                 C   s6   t jt jt jt| }t| trt|S |S )z'Convert relative path to absolute path.)r!   r   abspath
expanduser
expandvarsr   
isinstancer   )r   abs_path_strr&   r&   r'   relative_to_absolute_pathL   s    r>   FT
identifierfilenamec                 C   s6   |r|rt jnt j}n|rt jnt j}d|| |fS N/)r   "CLOUDFRONT_DATASETS_DISTRIB_PREFIXS3_DATASETS_BUCKET_PREFIX!CLOUDFRONT_METRICS_DISTRIB_PREFIXS3_METRICS_BUCKET_PREFIXr$   )r?   r@   use_cdndatasetendpointr&   r&   r'   hf_bucket_urlR   s   rJ   c                 C   s   t t| |||d|dS )N)r?   r@   rG   rH   )max_retries)	http_headrJ   )r?   r@   rG   rH   rK   r&   r&   r'   
head_hf_s3Z   s   rM   namerevisionc                 C   s   |pt j}t jj| ||dS )Nr   rN   rO   )r   HUB_DEFAULT_VERSIONHUB_EVALUATE_URLformatrP   r&   r&   r'   
hf_hub_urlc   s   
rT   	base_name	pathnamesc                 G   s8   t | rtj| gdd |D R  S t| g|R   S )Nc                 s   s(    | ]}t |tjd d V  qdS rB   N)r   replacer!   seplstrip).0pathnamer&   r&   r'   	<genexpr>j   s   & z#url_or_path_join.<locals>.<genexpr>)r2   	posixpathr$   r   as_posix)rU   rV   r&   r&   r'   url_or_path_joinh   s   r`   url_or_pathc                 C   s&   t | r| d | d S tj| S rA   )r2   rindexr!   r   dirname)ra   r&   r&   r'   url_or_path_parento   s   rd   c                 C   sV   |  d}t|}| }|r | d}t|}|d|  7 }| dr)|d7 }|S )a  
    Convert `url` into a hashed filename in a repeatable way.
    If `etag` is specified, append its hash to the url's, delimited
    by a period.
    If the url ends with .h5 (Keras HDF5 weights) adds '.h5' to the name
    so that TF 2.0 can identify it as a HDF5 file
    (see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1380)
    utf-8.z.py)encoder   	hexdigestendswith)urletag	url_bytesurl_hashr@   
etag_bytes	etag_hashr&   r&   r'   hash_url_to_filenamev   s   
	

rp   c                 K   s   |du rt d	i |}|jptj}t|trt|}t| tr#t| } t| r@t| ||j	|j
|j|j|j|j|j|j|jd}ntj| rI| }nt| rUtd|  dtd|  d|du rc|S |jrrt|jdj||jd}|S )
a  
    Given something that might be a URL (or might be a local path),
    determine which. If it's a URL, download the file and cache it, and
    return the path to the cached file. If it's already a local path,
    make sure the file exists and then return the path.

    Return:
        Local path (string)

    Raises:
        FileNotFoundError: in case of non-recoverable file
            (non-existent or no cache on disk)
        ConnectionError: in case of unreachable url
            and no cache on disk
        ValueError: if it couldn't parse the url or filename correctly
        requests.exceptions.ConnectionError: in case of internet connection issue
    N)
	cache_dirforce_downloadproxiesresume_download
user_agentlocal_files_onlyuse_etagrK   tokendownload_desczLocal file z doesn't existzunable to parse z as a URL or as a local path)rq   )force_extractr&   )r   rq   r   DOWNLOADED_EVALUATE_PATHr<   r   r   r2   get_from_cacherr   rs   rt   ru   rv   rw   rK   rx   ry   r!   r   r#   r6   FileNotFoundError
ValueErrorextract_compressed_filer   extractrz   )r)   download_configdownload_kwargsrq   output_pathr&   r&   r'   cached_path   sB   

r   ru   c                 C   s   dt  dtj }|dtj 7 }tjr|dtj 7 }tjr'|dtj 7 }tjr2|dtj	 7 }t
| trJ|dddd	 |  D  7 }|S t
| trU|d|  7 }|S )
Nz	datasets/z	; python/z
; pyarrow/z; torch/z; tensorflow/z; jax/z; c                 s   s"    | ]\}}| d | V  qdS rW   r&   )r[   kvr&   r&   r'   r]      s     z*get_datasets_user_agent.<locals>.<genexpr>)r   r   
PY_VERSIONPYARROW_VERSIONTORCH_AVAILABLETORCH_VERSIONTF_AVAILABLE
TF_VERSIONJAX_AVAILABLEJAX_VERSIONr<   dictr$   itemsr   )ru   uar&   r&   r'   get_datasets_user_agent   s   
"
r   rj   rx   c                 C   s.   i }|  tjrddlm} ||dtd}|S )zHandle the HF authenticationr   )build_hf_headersevaluate)rx   library_namelibrary_version)
startswithr   HF_ENDPOINThuggingface_hub.utilsr   r   )rj   rx   headersr   r&   r&   r'   "get_authentication_headers_for_url   s
   r   c                   @   s   e Zd ZdS )OfflineModeIsEnabledN)__name__
__module____qualname__r&   r&   r&   r'   r      s    r   msgc                 C   s(   t jrt| du rddt|  dS )zaRaise an OfflineModeIsEnabled error (subclass of ConnectionError) if HF_EVALUATE_OFFLINE is True.NzOffline mode is enabled.zOffline mode is enabled. )r   HF_EVALUATE_OFFLINEr   r   )r   r&   r&   r'   !_raise_if_offline_mode_is_enabled   s   

r         ?	func_argsfunc_kwargs
exceptionsstatus_codesrK   base_wait_timemax_wait_timec                 C   s   |pd}|pi }d}	 z| |i |W S  |yU }	 z6||ks'|r)|	j j|vr)|	t||d|  }
t|  d|
 d||  d t|
 |d7 }W Y d }	~	nd }	~	ww q)	Nr&   r   Tr   z timed out, retrying in zs... []r   )responsestatus_codeminloggerinfotimesleep)funcr   r   r   r   rK   r   r   retryerr
sleep_timer&   r&   r'   _retry   s    
 
r         $@methodtimeoutc                 K   s   t d|  d\}}|sd|d7 }ztjd|  ||d|}	d}W n> tjjtjjfya }
 z,||kr7|
t|  d| d||  d t	||d	|d   }t
| W Y d
}
~
nd
}
~
ww |r|	S )a  Wrapper around requests to retry in case it fails with a ConnectTimeout, with exponential backoff.

    Note that if the environment variable HF_EVALUATE_OFFLINE is set to 1, then a OfflineModeIsEnabled error is raised.

    Args:
        method (str): HTTP method, such as 'GET' or 'HEAD'.
        url (str): The URL of the resource to fetch.
        max_retries (int): Maximum number of retries, defaults to 0 (no retries).
        base_wait_time (float): Duration (in seconds) to wait before retrying the first time. Wait time between
            retries then grows exponentially, capped by max_wait_time.
        max_wait_time (float): Maximum amount of time between two retries, in seconds.
        **params: Params to pass to :obj:`requests.request`.
    Tried to reach )r   Fr   )r   rj   r   Tz request to z timed out, retrying... [r   r   Nr&   )r   requestsrequestupperr   ConnectTimeoutConnectionErrorr   r   r   r   r   )r   rj   rK   r   r   r   paramstriessuccessr   r   r   r&   r&   r'   _request_with_retry  s"    r   c                 C   sl   t d|   z$ttjj| |d}|d W d    W dS 1 s$w   Y  W dS  ty5   Y dS w )Nr   r   r   FT)r   r   urllibr   urlopenread	Exception)rj   r   rr&   r&   r'   ftp_head9  s   r   c              
   C   s   t d|   z1td|  d|j  ttjj| |d}t	|| W d    W d S 1 s1w   Y  W d S  tj
jyK } zt|d d }~ww )Nr   zGetting through FTP z into r   )r   r   r   rN   r   r   r   r   shutilcopyfileobjerrorURLErrorr   )rj   	temp_filer   r   er&   r&   r'   ftp_getC  s   &
r         Y@c	              
   C   s   t |pi }t|dd|d< |dkrd|dd|d< td| d	|||||d
}	|	jdkr1d S |	jd}
|
d urA|t|
 nd }tj	dd	|||pLdt
  d}|	jddD ]}|t| || qZW d    d S 1 stw   Y  d S )N
user-agentru   r   zbytes=d-RangeGETT)r   rj   streamrs   r   cookiesrK   r   i  zContent-LengthBDownloading)unit
unit_scaletotalinitialdescdisablei   )
chunk_size)copydeepcopyr   getr   r   r   intr   tqdmis_progress_bar_enablediter_contentupdatelenwrite)rj   r   rs   resume_sizer   r   r   rK   r   r   content_lengthr   progresschunkr&   r&   r'   http_getM  s@   

"r   c              
   C   s>   t |pi }t|dd|d< td| ||||||d}|S )Nr   r   HEAD)r   rj   rs   r   r   allow_redirectsr   rK   )r   r   r   r   r   )rj   rs   r   r   r   r   rK   r   r&   r&   r'   rL   o  s   
rL   c                 C   s@   t | |d}t| |dd}|  |jr|jd}|S d }|S )Nrx      )r   rK   ETag)r   rL   raise_for_statusokr   r   )rj   rx   r   r   rk   r&   r&   r'   request_etag  s   r   d   c                    s~  |du rt j}t|trt|}tj|dd d}d}d}d}d}t| dd}tj	||}tj
|r:|s:|s:|S t| |
d}|durH||d< |s| drTt| }zt| d|||	|d	}|jd
kr|rl|jdnd}|j D ]\}}|drd| v r| d| 7 } |j}qsd}d| v rd| vr| d7 } nN|jdkrd| v s|jdkrd| v s|jdkrtd| std|js|jdkrd| v rd}td|   n|jdkrt j| v r|
du rtd|  dW n ttjjfy } z|}W Y d}~nd}~ww |s_tj
|r|s|S |rtd| d|dur.|jdkr.td|  t d |   |durGtd!|  d"t!| d#|durXtd!|  d$|j d#td!|  t| |}tj	||}tj
|rw|sw|S |d% }t"| |r|d&  t# fd'd(}|}tj
 rt$ j%}nd)}n
t&t'j(|dd*}d)}| +}t|  d+|j)  | drt*| | nt+| ||||||	|d, W d   n	1 sw   Y  td-|  d.|  t,-|j)| td/|  | |d0}|d1 }t.|d2d3d4}t/0|| W d   n1 sw   Y  W d   |S W d   |S 1 s8w   Y  |S )5a  
    Given a URL, look for the corresponding file in the local cache.
    If it's not there, download it. Then return the path to the cached file.

    Return:
        Local path (string)

    Raises:
        FileNotFoundError: in case of non-recoverable file
            (non-existent or no cache on disk)
        ConnectionError: in case of unreachable url
            and no cache on disk
    NTr   F)rk   r   r   zftp://)r   rs   r   rK   r      r   download_warningzdrive.google.comz	&confirm=zconfirm=z
&confirm=ti  zfirebasestorage.googleapis.comi  i  z7^https?://github.com/.*?/.*?/releases/download/.*?/.*?$z#^https://.*?s3.*?amazonaws.com/.*?$zndownloader.figstatic.comz"Couldn't get ETag version for url i  zUnauthorized for URL zY. Please use the parameter ``token=True`` after logging in with ``huggingface-cli login``z6Cannot find the requested files in the cached path at zi and outgoing traffic has been disabled. To enable file online look-ups, set 'local_files_only' to False.i  zCouldn't find file at r   zCouldn't reach z ()z (error z.lockr   c                  3   s8    t  d} | V  W d    d S 1 sw   Y  d S )Nza+b)r%   )fincomplete_pathr&   r'   _resumable_file_manager  s   "z/get_from_cache.<locals>._resumable_file_managerr   )dirdeletezB not found in cache or force_download set to True, downloading to )rs   r   r   r   rK   r   zstoring z in cache at zcreating metadata file for )rj   rk   z.jsonr   re   )encoding)1r   HF_EVALUATE_CACHEr<   r   r   r!   r"   rp   r   r$   r#   r   r   r   rL   r   r   r   r   r   rematchrj   r   r   r   r   OSErrorr   r   Timeoutr}   r   reprr   r   statst_sizer   tempfileNamedTemporaryFilerN   r   r   r   mover%   jsondump)rj   rq   rr   rs   etag_timeoutrt   ru   rv   rw   rK   rx   ry   	connectedr   r   rk   
head_errorr@   
cache_pathr   r   r   r   	lock_pathr  temp_file_managerr   r   meta	meta_path	meta_filer&   r   r'   r|     s   











 
0
00r|   c                         fdd}|S )Nc                    s(   d  d | jd ur| jnd | _| S Nr3   z

)r$   __doc__fndocstrr&   r'   docstring_decorator@     $z1add_start_docstrings.<locals>.docstring_decoratorr&   r"  r#  r&   r!  r'   add_start_docstrings?     r&  c                     r  )Nc                    s(   | j d ur| j ndd d  | _ | S r  )r  r$   r  r!  r&   r'   r#  H  r$  z/add_end_docstrings.<locals>.docstring_decoratorr&   r%  r&   r!  r'   add_end_docstringsG  r'  r(  c                 C   s   t dd | D S )Nc                 s   s    | ]}|  jV  qd S N)r  r  )r[   r   r&   r&   r'   r]   P  s    z(estimate_dataset_size.<locals>.<genexpr>)sum)pathsr&   r&   r'   estimate_dataset_sizeO  s   r,  r   c                 C   s>   t  }	 | d}|s	 t|S ||7 }|dr	 t|S q)NTr      
)	bytearrayr   ri   bytes)r   resbr&   r&   r'   readlineS  s   

r2  r)  )FT)FTr   )r   r   r   r   )r   )Nr   NNr   r   N)NNNTr   r   )NFNr   FNFTr   NN)Ur  r   ior  r!   r^   r  r   r   r  r   r   
contextlibr   r   	functoolsr   hashlibr   pathlibr   typingr   r   r	   r
   r   urllib.parser   r   datasetsr   datasets.utils.extractr   datasets.utils.filelockr   r3   r   r   r   
get_loggerr   r   INCOMPLETE_SUFFIXr   r   r(   boolr2   r6   r8   r>   rJ   Responser   rM   rT   r`   rd   rp   r   r   r   r   r   r   r   r   RequestExceptiontupler   floatr   r   r   r   r   rL   r   r|   r&  r(  r,  	RawIOBaser2  r&   r&   r&   r'   <module>   s    
 	
 	

 C$





'


#
(

 7