o
    üN i<  ã                   @   s¾  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZmZmZ ddlm Z m!Z!m"Z"m#Z#m$Z$ erpd dl%m&Z& e  'd¡Z(dedede)fdd„Z*dede)fdd„Z+						d%dede
e de)de)de)de
d de
e	e,e,f  defdd„Z-						d&d e,de
e de)de
d d!e)d"e
e de
e	e,e,f  deee
e, f fd#d$„Z.dS )'é    N)ÚPath)ÚTYPE_CHECKINGÚDictÚOptionalÚTuple)Úurlparse)ÚZipFileÚ
is_zipfileé   )Ú	CacheFile)Ú	PathOrStrÚget_cache_dir)ÚFileLock)ÚMeta)ÚSchemeClientÚget_scheme_clientÚget_supported_schemesÚhf_get_from_cache)Ú_lock_file_pathÚ_meta_file_pathÚcheck_tarfileÚfind_latest_cachedÚresource_to_filename)ÚProgressÚcached_pathÚpathÚurl_or_filenameÚreturnc                 C   sV   z
dd l }| | ¡W S  ty*   t|ƒ ¡  d¡r'dd l}| d|› d¡ Y dS w )Nr   z.rarz;The file has a 'rar' extension but rarfile is unavailable: z$
See https://rarfile.readthedocs.io/F)ÚrarfileÚ
is_rarfileÚImportErrorÚstrÚlowerÚendswithÚwarningsÚwarn)r   r   r   r$   © r&   úL/home/ubuntu/.local/lib/python3.10/site-packages/cached_path/_cached_path.pyÚ_is_rarfile#   s   
ÿ÷r(   Ú	file_pathc                 C   s   t | ƒpt | ¡pt| |ƒS )N)r	   ÚtarfileÚ
is_tarfiler(   )r)   r   r&   r&   r'   Ú_is_archive4   s
   ÿýr,   FÚ	cache_dirÚextract_archiveÚforce_extractÚquietÚprogressr   Úheadersc                 C   s   t | tƒs	t| ƒ} d}d}|  d¡}	|rQ|	dkrQ| d|	… }
| |	d d… }t|
|d||||d}| ¡ s<t| › dƒ‚|| }| ¡ sOtd|› d	|
› dƒ‚|S t| ƒ}|j	t
ƒ v rvt| ||||d
\}}|rut|| ƒru|j|jd  }nd|j	dkr„t|  ddd¡ƒS | }t| ƒ ¡ } t|r‘|ntƒ ƒ ¡ }|jddd |  ¡ rÅ| }|  ¡ } |rÄ| ¡ rÄt|| ƒrÄt| ttj |¡ƒƒd }|| }n|j	dkrÒtd| › dƒ‚td|› dƒ‚|durÎtj |¡rît |¡rî|sî|S tt|ƒƒÏ tj |¡rt |¡r|rt d| |¡ n	|W  d  ƒ S t  d| |¡ t!j"|dd t#j$tj %|¡d d}z‚t& '|¡rWt& (|¡}t)|ƒ | *|¡ W d  ƒ n	1 sQw   Y  n@t+|| ƒr}ddl,}| -|¡}| *|¡ W d  ƒ n	1 sww   Y  nt.|ƒ}| *|¡ W d  ƒ n	1 s’w   Y  t ||¡ t/j0| ||dd}| 1¡  W t!j"|dd nt!j"|dd w W d  ƒ |S 1 sÇw   Y  |S |S )a  
    Given something that might be a URL or local path, determine which.
    If it's a remote resource, download the file and cache it, and
    then return the path to the cached file. If it's already a local path,
    make sure the file exists and return the path.

    For URLs, the following schemes are all supported out-of-the-box:

    * ``http`` and ``https``,
    * ``s3`` for objects on `AWS S3`_,
    * ``gs`` for objects on `Google Cloud Storage (GCS)`_, and
    * ``hf`` for objects or repositories on `HuggingFace Hub`_.

    If you have `Beaker-py`_ installed you can also use URLs of the form:
    ``beaker://{user_name}/{dataset_name}/{file_path}``.

    You can also extend ``cached_path()`` to handle more schemes with :func:`add_scheme_client()`.

    .. _AWS S3: https://aws.amazon.com/s3/
    .. _Google Cloud Storage (GCS): https://cloud.google.com/storage
    .. _HuggingFace Hub: https://huggingface.co/
    .. _Beaker-py: https://github.com/allenai/beaker-py

    Examples
    --------

    To download a file over ``https``::

        cached_path("https://github.com/allenai/cached_path/blob/main/README.md")

    To download an object on GCS::

        cached_path("gs://allennlp-public-models/lerc-2020-11-18.tar.gz")

    To download the PyTorch weights for the model `epwalsh/bert-xsmall-dummy`_
    on HuggingFace, you could do::

        cached_path("hf://epwalsh/bert-xsmall-dummy/pytorch_model.bin")

    For paths or URLs that point to a tarfile or zipfile, you can append the path
    to a specific file within the archive to the ``url_or_filename``, preceeded by a "!".
    The archive will be automatically extracted (provided you set ``extract_archive`` to ``True``),
    returning the local path to the specific file. For example::

        cached_path("model.tar.gz!weights.th", extract_archive=True)

    .. _epwalsh/bert-xsmall-dummy: https://huggingface.co/epwalsh/bert-xsmall-dummy

    Parameters
    ----------

    url_or_filename :
        A URL or path to parse and possibly download.

    cache_dir :
        The directory to cache downloads. If not specified, the global default cache directory
        will be used (``~/.cache/cached_path``). This can be set to something else with
        :func:`set_cache_dir()`.

    extract_archive :
        If ``True``, then zip or tar.gz archives will be automatically extracted.
        In which case the directory is returned.

    force_extract :
        If ``True`` and the file is an archive file, it will be extracted regardless
        of whether or not the extracted directory already exists.

        .. caution::
            Use this flag with caution! This can lead to race conditions if used
            from multiple processes on the same file.

    quiet :
        If ``True``, progress displays won't be printed.

    progress :
        A custom progress display to use. If not set and ``quiet=False``, a default display
        from :func:`~cached_path.get_download_progress()` will be used.

    headers :
        Custom headers to add to HTTP requests.
        Example: ``{"Authorization": "Bearer YOUR_TOKEN"}`` for private resources.
        Only used for HTTP/HTTPS resources.

    Returns
    -------
    :class:`pathlib.Path`
        The local path to the (potentially cached) resource.

    Raises
    ------
    ``FileNotFoundError``

        If the resource cannot be found locally or remotely.

    ``ValueError``
        When the URL is invalid.

    ``Other errors``
        Other error types are possible as well depending on the client used to fetch
        the resource.

    Nú!r   r
   T)r-   r.   r/   r0   r1   r2   z9 uses the ! syntax, but does not specify an archive file.ú'z' not found within ')r0   r1   r2   z
-extractedÚfilezfile://Ú ©ÚparentsÚexist_okzfile z
 not foundzunable to parse z as a URL or as a local pathz_Extraction directory for %s (%s) already exists, overwriting it since 'force_extract' is 'True'zExtracting %s to %s)Úignore_errors)Údir)ÚetagÚextraction_dir)2Ú
isinstancer!   Úfindr   Úis_dirÚ
ValueErrorÚexistsÚFileNotFoundErrorr   Úschemer   Úget_from_cacher,   ÚparentÚnameÚreplacer   Ú
expanduserr   ÚmkdirÚresolveÚis_filer   Úosr   ÚgetmtimeÚisdirÚlistdirr   r   ÚloggerÚwarningÚinfoÚshutilÚrmtreeÚtempfileÚmkdtempÚsplitr*   r+   Úopenr   Ú
extractallr(   r   ÚRarFiler   r   ÚnewÚto_file)r   r-   r.   r/   r0   r1   r2   Úextraction_pathr<   Úexclamation_indexÚarchive_pathÚ	file_nameÚcached_archive_pathr)   ÚparsedÚorig_url_or_filenameÚextraction_nameÚtmp_extraction_dirÚtar_filer   Úrar_fileÚzip_fileÚmetar&   r&   r'   r   <   sÄ   
o
ù	ÿ

ÿ€
ÿÿ€

üõþ€ÿ€
ÿü
"
Õ-Ó-ÚurlÚno_downloadsÚ_clientc              
   C   sl  |   d¡rt| |ƒdfS t|r|ntƒ ƒ ¡ }|jddd |p%t| |d}z| ¡ }W n1 |jy]   t	 
d| ¡ t| |ƒ}	|	rVt	 d| |	¡ t t|	ƒ¡}
|	|
jf Y S t	 d| ¡ ‚ w t| |ƒ}|| }t	 d	|¡ tt|ƒdd
´ tj |¡rƒt	 d| ¡ nœ|r‰t|ƒ‚| ¡ }t|ƒj}t	 d| |¡ ddlm}m} |du }|p«||d}|r²| ¡  z9t | ƒdkr»| nd| dd… › }|j!d|› d|d}||||ƒ}| "|¡ |j#||j$|j$d W |rë| %¡  n|ró| %¡  w w W d  ƒ n1 sþw   Y  t	 d|¡ tj&| ||d}
|
 '¡  W d  ƒ ||fS W d  ƒ ||fS 1 s-w   Y  ||fS )z§
    Given a URL, look for the corresponding dataset in the local cache.
    If it's not there, download it. Then return the path to the cached file and the ETag.
    zhf://NTr7   )r2   zrConnection error occurred while trying to fetch ETag for %s. Will attempt to use latest cached version of resourcezQETag request failed with recoverable error, using latest cached version of %s: %szVETag request failed with recoverable error, but no cached version of %s could be foundzwaiting to acquire lock on %s)Úread_only_okzcache of %s is up-to-datez(%s not found in cache, downloading to %sr
   )ÚBufferedWriterWithProgressÚget_download_progress)r0   é   u   â€¦iâÿÿÿzDownloading [cyan i]z[/])Útotal)rr   Ú	completedzcreating metadata file for %s)r<   )(Ú
startswithr   r   r   rI   rJ   r   Úget_etagÚrecoverable_errorsrQ   rR   r   rS   r   Ú	from_pathr   r<   Úerrorr   Údebugr   r   rM   r   rB   rC   Úget_sizer   r1   ro   rp   ÚstartÚlenÚadd_taskÚget_resourceÚupdateÚtotal_writtenÚstopr\   r]   )rk   r-   r0   r1   rl   rm   r2   Úclientr<   Úlatest_cachedrj   ÚfilenameÚ
cache_pathÚsizeÚ
cache_filero   rp   Ústart_and_cleanupÚdisplay_urlÚtask_idÚwriter_with_progressr&   r&   r'   rE   1  s–   
ý
ýþè

"
ý€ÿ
ÿ€êý

Ú(Ú
þ(Ø(rE   )NFFFNN)NFNFNN)/ÚloggingrM   rT   r*   rV   Úpathlibr   Útypingr   r   r   r   Úurllib.parser   Úzipfiler   r	   r‡   r   Úcommonr   r   Ú	file_lockr   rj   r   Úschemesr   r   r   r   Úutilr   r   r   r   r   Úrich.progressr   Ú	getLoggerrQ   Úboolr(   r,   r!   r   rE   r&   r&   r&   r'   Ú<module>   s†    

ùÿþýüûúù
ø xùÿþýüûúùø