o
    ߥiE                      @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZ d d
lmZ ddlmZm Z m!Z! e Z"edddddddddf
de#dee# dee#edf deee	e#f  dee$ dee deee#e
e# f  deee#e
e# f  dee# deee
e# e#f  deee
e# e#f  de#fddZ%edddddddddf
de#dee# dee#edf dee# deee	e#f  dee$ dee deee#e
e# f  deee#e
e# f  deee
e# e#f  deee
e# e#f  de#fddZ&deddddddddddd e#d!ee# dee# dee#edf deee	e#f  dee$ dee deee#e
e# f  deee#e
e# f  dee# deee
e# e#f  deee
e# e#f  fd"d#Z'd$e#fd%d&Z(d'ee#e
e# f fd(d)Z)d'e
e# fd*d+Z*dedddddfd,e
e# d-ed.e#d e#d/ed0e#d1e#d!ee# dee# dee deee#e
e# f  deee#e
e# f  deee
e# e#f  deee
e# e#f  fd2d3Z+dS )4    N)	CookieJarPath)DictListOptionalUnion)HubApiModelScopeConfig)InvalidParameter)ModelFileSystemCache)model_id_to_group_owner_name)DEFAULT_DATASET_REVISIONDEFAULT_MODEL_REVISIONREPO_TYPE_DATASETREPO_TYPE_MODELREPO_TYPE_SUPPORT)
get_logger   )$create_temporary_directory_and_cachedownload_fileget_file_download_urlFmodel_idrevision	cache_dir
user_agentlocal_files_onlycookiesignore_file_patternallow_file_pattern	local_dirallow_patternsignore_patternsreturnc                 C   s    t | t|||||||||
|	dS )a
  Download all files of a repo.
    Downloads a whole snapshot of a repo's files at the specified revision. This
    is useful when you want all files from a repo, because you don't know which
    ones you will need a priori. All files are nested inside a folder in order
    to keep their actual filename relative to that folder.

    An alternative would be to just clone a repo but this would require that the
    user always has git and git-lfs installed, and properly configured.

    Args:
        model_id (str): A user or an organization name and a repo name separated by a `/`.
        revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
            commit hash. NOTE: currently only branch and tag name is supported
        cache_dir (str, Path, optional): Path to the folder where cached files are stored, model will
            be save as cache_dir/model_id/THE_MODEL_FILES.
        user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
        local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
            local cached file if it exists.
        cookies (CookieJar, optional): The cookie of the request, default None.
        ignore_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be ignored in downloading, like exact file names or file extensions.
        allow_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be downloading, like exact file names or file extensions.
        local_dir (str, optional): Specific local directory path to which the file will be downloaded.
        allow_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, only files matching at least one pattern are downloaded, priority over allow_file_pattern.
            For hugging-face compatibility.
        ignore_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern.
            For hugging-face compatibility.
    Raises:
        ValueError: the value details.

    Returns:
        str: Local folder path (string) of repo snapshot

    Note:
        Raises the following errors:
        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
        if `use_auth_token=True` and the token cannot be found.
        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
        ETag cannot be determined.
        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
        if some parameter value is invalid
    	repo_typer   r   r   r   r   r   r   r    r"   r!   )_snapshot_downloadr   )r   r   r   r   r   r   r   r   r    r!   r"    r'   T/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/hub/snapshot_download.pysnapshot_download   s   :r)   
dataset_idc                 C   s    t | t|||||||||
|	dS )a
  Download raw files of a dataset.
    Downloads all files at the specified revision. This
    is useful when you want all files from a dataset, because you don't know which
    ones you will need a priori. All files are nested inside a folder in order
    to keep their actual filename relative to that folder.

    An alternative would be to just clone a dataset but this would require that the
    user always has git and git-lfs installed, and properly configured.

    Args:
        dataset_id (str): A user or an organization name and a dataset name separated by a `/`.
        revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
            commit hash. NOTE: currently only branch and tag name is supported
        cache_dir (str, Path, optional): Path to the folder where cached files are stored, dataset will
            be save as cache_dir/dataset_id/THE_DATASET_FILES.
        local_dir (str, optional): Specific local directory path to which the file will be downloaded.
        user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
        local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
            local cached file if it exists.
        cookies (CookieJar, optional): The cookie of the request, default None.
        ignore_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be ignored in downloading, like exact file names or file extensions.
            Use regression is deprecated.
        allow_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be downloading, like exact file names or file extensions.
        allow_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, only files matching at least one pattern are downloaded, priority over allow_file_pattern.
            For hugging-face compatibility.
        ignore_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern.
            For hugging-face compatibility.
    Raises:
        ValueError: the value details.

    Returns:
        str: Local folder path (string) of repo snapshot

    Note:
        Raises the following errors:
        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
        if `use_auth_token=True` and the token cannot be found.
        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
        ETag cannot be determined.
        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
        if some parameter value is invalid
    r$   )r&   r   )r*   r   r   r    r   r   r   r   r   r!   r"   r'   r'   r(   dataset_snapshot_downloadc   s   ;r+   )r%   r   r   r   r   r   r   r   r    r!   r"   repo_idr%   c                C   s,  |st }|tvrtd|tf t| |	||d\}}|r2t|jdkr'tdtd|  |	 S dt
j|di}dtjvrGtt |d	< t }|d u rRt
 }g }|t kr|j| ||d
}|d }dtjv rk|ni |ddi}|jd ur}|j|d< |j| |d|d u rdn||d}t|||| |d d ||||||||
d ng|tkrt| \}}|st}||| |}d}d}	 |j|||dd||d}d|v r|d dkstd| |d |d f  d S |d d }t|||| |||||||||||
d t||k rn|d7 }q|j|d tj|	 S )Nz'Invalid repo type: %s, only support: %s)r    r   r%   r   zCannot find the requested files in the cached path and outgoing traffic has been disabled. To enable look-ups and downloads online, set 'local_files_only' to False.z6We can not confirm the cached file is for revision: %sz
user-agent)r   CI_TESTzsnapshot-identifier)r   r   RevisionSnapshotTruecached_model_revisionTF)r   r   	recursiveuse_cookiesheaders)r%   r   r   r   r   r"   r!   r   d   /)dataset_name	namespacer   	root_pathr2   page_number	page_sizeCode   z=Get dataset: %s file list failed, request_id: %s, message: %s	RequestIdMessageDataFiles)revision_info) r   r   r   r   lencached_files
ValueErrorloggerwarningget_root_locationr
   get_user_agentosenvironstruuiduuid4r	   get_cookiesget_valid_revision_detailr1   get_model_files_download_file_listsr   r   r   dataset_download_statisticslist_repo_treeprintsave_model_versionpathjoin)r,   r%   r   r   r   r   r   r   r   r    r!   r"   temporary_cache_dircacher4   _api
repo_filesrevision_detailsnapshot_headergroup_or_ownernamer:   r;   files_list_treer'   r'   r(   r&      s   




%r&   patternc                 C   s&   zt |  W dS  ty   Y dS w )NTF)recompileBaseException)rb   r'   r'   r(   _is_valid_regex3  s   
rf   patternsc                 C   s*   t | tr| g} | d urdd | D } | S )Nc                 S   s"   g | ]}| d s|n|d qS )r6   *)endswith).0itemr'   r'   r(   
<listcomp>?  s    z'_normalize_patterns.<locals>.<listcomp>)
isinstancerL   )rg   r'   r'   r(   _normalize_patterns;  s   
rn   c                 C   s0   | d urg }| D ]}t |r|| q|S d S )N)rf   append)rg   regex_patternsrk   r'   r'   r(   _get_valid_regex_patternE  s   
rq   r\   rZ   rY   apir`   r_   c                    s  t |}t |}t |}t |}t|}| D ]  d dkrqzU|r/t fdd|D r/W q|r>t fdd|D r>W q|rMt fdd|D rMW q|d ur`|r`t fdd|D s`W q|d urs|rst fd	d|D ssW qW n ty } ztd
|  W Y d }~nd }~ww | rtj	 d }t
d| d q|tkrt| d |	d}n|tkr|j d |||	d}t| ||||
 qd S )NTypetreec                       g | ]
}t   d  |qS r   fnmatchrj   rb   	repo_filer'   r(   rl   m      z(_download_file_lists.<locals>.<listcomp>c                    ru   r   rv   rx   ry   r'   r(   rl   s  r{   c                    s    g | ]}t | d  duqS )NameN)rc   searchrx   ry   r'   r(   rl   y  s    c                 3        | ]}t   d  |V  qdS r   Nrv   rx   ry   r'   r(   	<genexpr>  
    
z'_download_file_lists.<locals>.<genexpr>c                 3   r~   r   rv   rx   ry   r'   r(   r     r   z The file pattern is invalid : %sr|   zFile z$ already in cache, skip downloading!r   )r   	file_pathr   )	file_namer7   r8   r   )rn   rq   any	ExceptionrF   rG   existsrJ   rW   basenamedebugr   r   r   get_dataset_file_urlr   )r\   rZ   rY   r,   rr   r`   r_   r4   r%   r   r   r   r   r!   r"   ignore_regex_patterner   urlr'   ry   r(   rR   P  sv   

rR   ),rw   rJ   rc   rM   http.cookiejarr   pathlibr   typingr   r   r   r   modelscope.hub.apir	   r
   modelscope.hub.errorsr   modelscope.hub.utils.cachingr   modelscope.hub.utils.utilsr   modelscope.utils.constantr   r   r   r   r   modelscope.utils.loggerr   file_downloadr   r   r   rF   rL   boolr)   r+   r&   rf   rn   rq   rR   r'   r'   r'   r(   <module>   sB  	

K	

M	

 
	
