o
    V۷i                     @   sl   d dl m Z  d dlmZmZ d dlZddlmZ 			ddejdee dee d	eee	  d
ef
ddZ
dS )    )copy)ListOptionalN   )LLTokenizerhf_tokenizern_vocab	eos_tokenslicesreturnc                 C   sT   t | tjr&t| j}|  |  | }|du r| j}t	||||dS t
d)aN  
    Create a new tokenizer from a fast Hugging Face tokenizer.
    This is an expensive operation (~1s), so the result should be cached.
    It currently only supports fast tokenizers, which are then handled
    by the Rust tokenizers library.

    Args:
        hf_tokenizer: transformers.PreTrainedTokenizerFast - the tokenizer to wrap
        n_vocab: int - override the size of the vocabulary
        eos_token: int - override the EOS token
        slices: List[str] - configuration for slicer optimization; pass [] to disable,
            or None to use the default configuration
    N)r   r	   r
   z"Only fast tokenizers are supported)
isinstancetransformersPreTrainedTokenizerFastr   backend_tokenizer
no_paddingno_truncationto_streos_token_idr   
ValueError)r   r   r	   r
   r   s r   C/home/ubuntu/vllm_env/lib/python3.10/site-packages/llguidance/hf.pyfrom_tokenizer	   s   r   )NNN)r   typingr   r   r   _libr   r   intstrr   r   r   r   r   <module>   s$    
