o
    c۷i^                     @   sX  U d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 eed< zd dlZd dlZdZW n ey@   dZY nw e eZdd
dZdee ded	eee  fddZdededB d	ee fddZdee d	eeeef  fddZdee d	efddZ					d dedeeB dB deeB dB dedB deded	efddZdS )!    N)Path)Iterator)TokenizerVersion)MultiModalVersion_hub_installedTFreturnc                   C   s   t stdd S )NzxPlease install the `huggingface_hub` package to use this method.
Run `pip install mistral-common[hf-hub]` to install it.)r   ImportError r	   r	   \/home/ubuntu/vllm_env/lib/python3.10/site-packages/mistral_common/tokens/tokenizers/utils.py_assert_hub_installed   s
   r   lst
chunk_sizec                 c   s.    t dt| |D ]}| |||  V  q	dS )a  Chunk a list into smaller lists of a given size.

    Args:
        lst: The list to chunk.
        chunk_size: The size of each chunk.

    Returns:
        An iterator over the chunks.

    Examples:
        >>> all_chunks = list(chunks([1, 2, 3, 4, 5], 2))
    r   N)rangelen)r   r   ir	   r	   r
   chunks   s   r   repo_idrevisionc                 C   s   t   ttjjtjjdg| d }|du r?|d tjj }|	 r?|
d}| }W d   n1 s:w   Y  |rP|d | }| rPt|S g S )zlist the files of a local Hugging Face repo.

    Args:
        repo_id: The Hugging Face repo ID.
        revision: The revision of the model to use. If `None`, the latest revision will be used.
    models/Nrefsr	snapshots)r   r   huggingface_hub	constantsHF_HUB_CACHEREPO_ID_SEPARATORjoinsplitDEFAULT_REVISIONis_fileopenreadis_diroslistdir)r   r   
repo_cacherevision_filefilerevision_dirr	   r	   r
   list_local_hf_repo_files0   s   

r*   filesc                    s   g }t tj}t tjdg   fdd|D dg }| D ]&}t|}|j}d|j}|dkr8|||f q||v rC|||f q|S )zFilter the valid tokenizer files from a list of files.

    Args:
        files: The list of files to filter.

    Returns:
        The list of tuples of file names and paths to the valid tokenizer files.
     c                    s$   g | ]} D ]	}d | | qqS )z.model.r	   ).0vmmm_versionsr	   r
   
<listcomp>X   s   $ z1_filter_valid_tokenizer_files.<locals>.<listcomp>z.modeltekken.json)	listr   __members__r   r   namer   suffixesappend)r+   valid_tokenizer_filesinstruct_versionssentencepiece_suffixesr(   pathlib_file	file_namesuffixr	   r0   r
   _filter_valid_tokenizer_filesK   s   	
r?   c                 C   s   t | }t|dkrtdt|dkr:|D ]\}}d|kr"|  S qt|dd dd d }td	| d
 |S |d d }|S )zGet one valid tokenizer file from a list of files.

    Args:
        files: The list of files to filter.

    Returns:
        The path to the tokenizer file.
    r   zNo tokenizer file found.   r3   c                 S   s   | d S )Nr   r	   )xr	   r	   r
   <lambda>x   s    z.get_one_valid_tokenizer_file.<locals>.<lambda>)keyz,Multiple valid tokenizer files found. Using .)r?   r   
ValueErrorsortedloggerwarning)r+   $valid_tokenizer_file_names_and_filesr=   tokenizer_filer	   r	   r
   get_one_valid_tokenizer_filef   s   	rL   	cache_dirtokenforce_downloadlocal_files_onlyc              
   C   s*  t   |r|rtd|sYzt }|j| ||d}d}W nR tjtjtjfyX } z)|r/|t	| |d}d}t
d t|dkrNtd|  d	| d
|W Y d}~nd}~ww t	| |d}t|dkrptd|  d	| dzt|d}	W n ty   td|  dw tj| ||	||||d}
|
S )a  Download the tokenizer file of a Mistral model from the Hugging Face Hub.

    See [here](https://huggingface.co/mistralai/models) for a list of our OSS models.

    Note:
        You need to install the `huggingface_hub` package to use this method.

        Please run `pip install mistral-common[hf-hub]` to install it.

    Args:
        repo_id: The Hugging Face repo ID.
        cache_dir: The directory where the tokenizer will be cached.
        token: The Hugging Face token to use to download the tokenizer.
        revision: The revision of the model to use. If `None`, the latest revision will be used.
        force_download: Whether to force the download of the tokenizer. If `True`, the tokenizer will be downloaded
            even if it is already cached.
        local_files_only: Whether to only use local files. If `True`, the tokenizer will be downloaded only if it is
            already cached.

    Returns:
        The downloaded tokenizer local path for the given model ID.
    zSYou cannot force the download of the tokenizer if you only want to use local files.)r   rN   F)r   r   TzBCould not connect to the Hugging Face Hub. Using local files only.r   zXCould not connect to the Hugging Face Hub and no local files were found for the repo ID z and revision z6. Please check your internet connection and try again.Nz%No local files found for the repo ID zz. Please check the repo ID and the revision or try to download the tokenizer without setting `local_files_only` to `True`.)r+   z*No valid tokenizer file found in the repo rE   )r   rM   filenamerN   r   rP   rO   )r   rF   r   HfApilist_repo_filesrequestsConnectionError	HTTPErrorTimeoutr*   rH   infor   FileNotFoundErrorrL   hf_hub_download)r   rM   rN   r   rO   rP   hf_api
repo_fileserK   tokenizer_pathr	   r	   r
   download_tokenizer_from_hf_hub   sZ   
	r_   )r   N)NNNFF)loggingr$   pathlibr   typingr   rT   %mistral_common.tokens.tokenizers.baser   &mistral_common.tokens.tokenizers.imager   bool__annotations__r   huggingface_hub.constantsr   r   	getLogger__name__rH   r   r4   strintr   r*   tupler?   rL   r_   r	   r	   r	   r
   <module>   sR   
 

""

