o
    ߥi0_                     @   s@  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZ d dlZd dlmZ d dlmZ d d	lmZmZ d d
lmZmZmZmZmZmZm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ ddl,m-Z-m.Z.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4m5Z5 e+ Z6e#dddddfde7de7dee7 dee7 deee7df dee8 dee dee7 dee7 fddZ9e"dddddfde7de7dee7 dee7edf dee7 deeee7f  dee8 dee de7fdd Z:de#dddddd!d"e7de7d#e7dee7 dee7 deee7df dee8 dee dee7 dee7 fd$d%Z;dde%fde7de7de7d#e7fd&d'Z<de7de7de7fd(d)Z=d*d+ Z>		d8d,e7de7d-e7ded.eee7e7f  d/e?fd0d1Z@	d9d,e7de7d-e7d/e?ded.eee7e7f  fd2d3ZA	d9d,e7de7d-e7ded.eee7e7f  f
d4d5ZBd6d7 ZCdS ):    N)ThreadPoolExecutor)partial)	CookieJar)Path)DictOptionalUnion)Retry)tqdm)HubApiModelScopeConfig)API_FILE_DOWNLOAD_CHUNK_SIZEAPI_FILE_DOWNLOAD_RETRY_TIMESAPI_FILE_DOWNLOAD_TIMEOUT	FILE_HASHMODELSCOPE_DOWNLOAD_PARALLELS)MODELSCOPE_PARALLEL_DOWNLOAD_THRESHOLD_MBTEMPORARY_FOLDER_NAME)DEFAULT_DATASET_REVISIONDEFAULT_MODEL_REVISIONREPO_TYPE_DATASETREPO_TYPE_MODELREPO_TYPE_SUPPORT)get_dataset_cache_rootget_model_cache_root)
get_logger   )FileDownloadErrorInvalidParameterNotExistError)ModelFileSystemCache)file_integrity_validationget_endpointmodel_id_to_group_owner_nameFmodel_id	file_pathrevision	cache_dir
user_agentlocal_files_onlycookies	local_dirreturnc                 C   s   t | |t||||||d	S )aF  Download from a given URL and cache it if it's not already present in the local cache.

    Given a URL, this function looks for the corresponding file in the local
    cache. If it's not there, download it. Then return the path to the cached
    file.

    Args:
        model_id (str): The model to whom the file to be downloaded belongs.
        file_path(str): Path of the file to be downloaded, relative to the root of model repo.
        revision(str, optional): revision of the model file to be downloaded.
            Can be any of a branch, tag or commit hash.
        cache_dir (str, Path, optional): Path to the folder where cached files are stored.
        user_agent (dict, str, optional): The user-agent info in the form of a dictionary or a string.
        local_files_only (bool, optional):  If `True`, avoid downloading the file and return the path to the
            local cached file if it exists. if `False`, download the file anyway even it exists.
        cookies (CookieJar, optional): The cookie of download request.
        local_dir (str, optional): Specific local directory path to which the file will be downloaded.

    Returns:
        string: string of local file or if networking is off, last version of
        file cached on disk.

    Raises:
        NotExistError: The file is not exist.
        ValueError: The request parameter error.

    Note:
        Raises the following errors:

            - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
            if `use_auth_token=True` and the token cannot be found.
            - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
            if ETag cannot be determined.
            - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            if some parameter value is invalid
    	repo_typer&   r'   r(   r)   r*   r+   )_repo_file_downloadr   )r$   r%   r&   r'   r(   r)   r*   r+    r0   P/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/hub/file_download.pymodel_file_download'      .r2   
dataset_idc                 C   s   t | |t||||||d	S )a  Download raw files of a dataset.
    Downloads all files at the specified revision. This
    is useful when you want all files from a dataset, because you don't know which
    ones you will need a priori. All files are nested inside a folder in order
    to keep their actual filename relative to that folder.

    An alternative would be to just clone a dataset but this would require that the
    user always has git and git-lfs installed, and properly configured.

    Args:
        dataset_id (str): A user or an organization name and a dataset name separated by a `/`.
        file_path (str): The relative path of the file to download.
        revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
            commit hash. NOTE: currently only branch and tag name is supported
        cache_dir (str, Path, optional): Path to the folder where cached files are stored, dataset file will
            be save as cache_dir/dataset_id/THE_DATASET_FILES.
        local_dir (str, optional): Specific local directory path to which the file will be downloaded.
        user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
        local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
            local cached file if it exists.
        cookies (CookieJar, optional): The cookie of the request, default None.
    Raises:
        ValueError: the value details.

    Returns:
        str: Local folder path (string) of repo snapshot

    Note:
        Raises the following errors:
        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
        if `use_auth_token=True` and the token cannot be found.
        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
        ETag cannot be determined.
        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
        if some parameter value is invalid
    r-   )r/   r   )r4   r%   r&   r'   r+   r(   r)   r*   r0   r0   r1   dataset_file_downloada   r3   r5   r-   repo_idr.   c             	   C   s  |st }|tvrtd|tf t| |||d\}	}
|r0|
|}|d ur,td |S tdt }dt	j
|di}|d u rCt	 }g }d }|t kr|j| ||d}|j| |d|d u r^d	n|d
}|D ]*}|d dkrmqd|d |kr|
|rtd|d  d |
|  S |} nqdn|tkrt| \}}|st}d}d}	 |j|||dd||d}d|v r|d dkstd| |d |d f  d S |d d }d	}|D ],}|d dkrq|d |kr|
|rtd|d  d |
|  S |}d} nqt||k s
|rn|d7 }q|d u rtd|| f |t kr)t| ||}n|tkr9|j|d |||d}t|||	|
||S )Nz'Invalid repo type: %s, only support: %s)r+   r'   r.   z>File exists in local cache, but we're not sure it's up to datezCannot find the requested files in the cached path and outgoing traffic has been disabled. To enable look-ups and downloads online, set 'local_files_only' to False.z
user-agent)r(   )r&   r*   TF)r$   r&   	recursiveuse_cookiesTypetreer   zFile Namez$ already in cache, skip downloading!r   d   /)dataset_name	namespacer&   	root_pathr7   page_number	page_sizeCode   z=Get dataset: %s file list failed, request_id: %s, message: %s	RequestIdMessageDataFilesz"The file path: %s not exist in: %s)	file_namer>   r?   r&   )r   r   r   $create_temporary_directory_and_cacheget_file_by_pathloggerwarning
ValueErrorr   r   get_user_agentget_cookiesget_valid_revisionget_model_filesexistsdebugget_file_by_infor   r#   r   list_repo_treeprintlenr   get_file_download_urlget_dataset_file_urldownload_file)r6   r%   r.   r&   r'   r(   r)   r*   r+   temporary_cache_dircachecached_file_path_apiheaders
repo_filesfile_to_download_meta	repo_filegroup_or_ownernamerA   rB   files_list_treeis_existurl_to_downloadr0   r0   r1   r/      s   



	


$

r/   c           	      C   s   |t krt }n|tkrt }t| \}}|d ur%tj|t}t	|}n$|d u r+|}t
|tr4t|}tj|t||}|dd}t	|||}tj|dd ||fS )N.___Texist_ok)r   r   r   r   r#   ospathjoinr   r    
isinstancer   strreplacemakedirs)	r$   r+   r'   r.   default_cache_rootrd   re   r\   r]   r0   r0   r1   rJ     s&   


rJ   c                 C   s0   t j|}t j|}d}|jt | ||dS )a  Format file download url according to `model_id`, `revision` and `file_path`.
    e.g., Given `model_id=john/bert`, `revision=master`, `file_path=README.md`,
    the resulted download url is: https://modelscope.cn/api/v1/models/john/bert/repo?Revision=master&FilePath=README.md

    Args:
        model_id (str): The model_id.
        file_path (str): File path
        revision (str): File revision.

    Returns:
        str: The file url.
    zQ{endpoint}/api/v1/models/{model_id}/repo?Revision={revision}&FilePath={file_path})endpointr$   r&   r%   )urllibparse
quote_plusformatr"   )r$   r%   r&   download_url_templater0   r0   r1   rY   5  s   rY   c              
   C   s  | \}}}}}}}}|d u ri nt |}	tt j|	d< ttddgd}
|d||f  }	 zwd}tj	
|rYt|d}|dtj}|| W d    n1 sTw   Y  || }||krdW d S d	||f |	d
< t|d+}tj|d|	|td}|jtdD ]}|r|| |t| qW d    W d S 1 sw   Y  W d S  ty } z|
jd||d}
td||f  |
  W Y d }~nd }~ww q/)NX-Request-IDr   GETtotalbackoff_factorallowed_methods_%s_%sTr   rbbytes=%s-%sRangeab+streamr`   r*   timeout
chunk_sizeerrorz-Downloading: %s failed, reason: %s will retry)copydeepcopyrq   uuiduuid4hexr	   r   rm   rn   rS   openseekioSEEK_ENDupdaterequestsgetr   iter_contentr   writerX   	Exception	incrementrL   rM   sleep)paramsmodel_file_pathprogressstartendurlrI   r*   r`   get_headersretrypart_file_namepartial_lengthfdownload_startrchunker0   r0   r1   download_part_with_retryM  sh   


r   r   rI   r`   	file_sizec                 C   s  t ddd|ddd}d}g }tj||}	tjtj|	dd tt|| D ]}
|
| }|
d	 | d	 }||	|||| |||f q(|d	 |k r[||	||d	 |d	 | |||f t	d
krat	nd
}t
|dd}t|t| W d    n1 s|w   Y  |  ttj||d=}|D ]1}|d d|d |d f  }t|d}||  W d    n1 sw   Y  t| qW d    d S 1 sw   Y  d S )NBT   r   Downloadingunit
unit_scaleunit_divisorr~   initialdesci   
rk   r      download)max_workersthread_name_prefixwbr         r   )r
   rm   rn   ro   rs   dirnamerangeintappendr   r   listmapr   closer   r   readremove)r   r+   rI   r*   r`   r   r   	PART_SIZEtasksr%   idxr   r   	parallelsexecutoroutput_filetaskr   	part_filer0   r0   r1   parallel_downloadw  sN   
"r   c              
   C   s  |du ri nt |}tt j|d< tj||}tj	tj
|dd td| | ttddgd}	 ztd	dd
|ddd}	d}
tj|rjt|d}|dtj}
|	|
 W d   n1 sew   Y  |
|krpW nid|
|d f |d< t|d-}tj| d||td}|  |jtdD ]}|r|	t| || qW d   n1 sw   Y  |	  W n ty } z|j d| |d}|!  W Y d}~nd}~ww q6td| | dS )a  Download remote file, will retry 5 times before giving up on errors.

    Args:
        url(str):
            actual download url of the file
        local_dir(str):
            local directory where the downloaded file stores
        file_name(str):
            name of the file stored in `local_dir`
        file_size(int):
            The file size.
        cookies(CookieJar):
            cookies used to authentication the user, which is used for downloading private repos
        headers(Dict[str, str], optional):
            http headers to carry necessary info when requesting the remote file

    Raises:
        FileDownloadError: File download failed.

    Nr{   Trk   downloading %s to %sr   r|   r}   r   r   r   r   r   r   r   r   r   r   r   r   storing %s in cache at %s)"r   r   rq   r   r   r   rm   rn   ro   rs   r   rL   rT   r	   r   r
   rS   r   r   r   r   r   r   r   r   raise_for_statusr   r   rX   r   r   r   r   r   )r   r+   rI   r   r*   r`   r   temp_file_pathr   r   r   r   r   r   r   r0   r0   r1   http_get_model_file  sx   


(r   c                 C   s  d}t tjd|dd}|du ri nt|}tt j|d< | }t	
d| |j ttdd	gd
}		 zO| }
d|
 |d< tj| d||td}|  |jd}|dur[t|nd}tddd||
dd}|jtdD ]}|r}|t| || qm|  W n ty } z|	jd	| |d}	|	  W Y d}~nd}~ww q4W d   n1 sw   Y  t	
d| | tj !|j}||krt"|j d|||f }t	#| t$|t%|jtj &|| dS )aj  Download remote file, will retry 5 times before giving up on errors.

    Args:
        url(str):
            actual download url of the file
        local_dir(str):
            local directory where the downloaded file stores
        file_name(str):
            name of the file stored in `local_dir`
        cookies(CookieJar):
            cookies used to authentication the user, which is used for downloading private repos
        headers(Dict[str, str], optional):
            http headers to carry necessary info when requesting the remote file

    Raises:
        FileDownloadError: File download failed.

    r   F)modedirdeleteNr{   r   r   r|   r}   Tz	bytes=%d-r   r   zContent-Lengthr   r   r   r   r   r   r   z}File %s download incomplete, content_length: %s but the                     file downloaded length: %s, please download again)'r   tempfileNamedTemporaryFiler   r   rq   r   r   r   rL   rT   re   r	   r   tellr   r   r   r   r`   r   r
   r   r   r   rX   r   r   r   r   r   rm   rn   getsizer   r   r   rr   ro   )r   r+   rI   r*   r`   r~   temp_file_managerr   	temp_filer   downloaded_sizer   content_lengthr   r   r   downloaded_lengthmsgr0   r0   r1   http_get_file  s   


(
r   c                 C   s   t d d |d k r%tdkr%t| ||d ||d u rd n| |d d nt| ||d |d ||d tj||d }t|v rGt	||t  |
||S )Ni  Sizer   r   )r`   r*   r   )r   r`   r*   )r   r   r   get_dictr   rm   rn   ro   r   r!   put_file)r   	file_metar\   r]   r`   r*   r   r0   r0   r1   r[   G  s0   	r[   )NN)N)Dr   r   rm   r   rv   r   concurrent.futuresr   	functoolsr   http.cookiejarr   pathlibr   typingr   r   r   r   requests.adaptersr	   r
   modelscope.hub.apir   r   modelscope.hub.constantsr   r   r   r   r   r   r   modelscope.utils.constantr   r   r   r   r   modelscope.utils.file_utilsr   r   modelscope.utils.loggerr   errorsr   r   r   utils.cachingr    utils.utilsr!   r"   r#   rL   rq   boolr2   r5   r/   rJ   rY   r   r   r   r   r   r[   r0   r0   r0   r1   <module>   s8  $	
=	
>	

 
/
3
V
R