o
    ]Û·i  ã                	   @   sò   zd dl Z d dlZd dlmZ d dlmZ W n ey!   edƒ‚w d dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZmZ d dlZG dd	„ d	ƒZddeejef dededB defdd„Z	
ddeejeef dededefdd„ZddgZdS )é    N)ÚMistralTokenizer)ÚPreTrainedTokenizerBasez@vllm is not installed. Please install it with "pip install vllm")ÚCharacterLevelParserÚTokenEnforcerÚFormatEnforcerAnalyzerÚTokenEnforcerTokenizerData)Ú#build_token_enforcer_tokenizer_data)ÚListÚOptionalÚUnionc                   @   s8   e Zd Zdefdd„Zdee dejdejfdd„Z	d	S )
ÚVLLMLogitsProcessorÚtoken_enforcerc                 C   s"   || _ |r	t|ƒnd | _d | _d S )N)r   r   ÚanalyzerÚmask)Úselfr   Úanalyze© r   úX/home/ubuntu/vllm_env/lib/python3.10/site-packages/lmformatenforcer/integrations/vllm.pyÚ__init__   s   
zVLLMLogitsProcessor.__init__Ú	input_idsÚscoresÚreturnc                 C   sp   |}| j r| j  || ¡ ¡ | j |¡j}| jd ur#| j tj	 ¡ n	t
 |tj	 ¡| _d| j|< || j }|S )Nr   )r   Úreport_raw_logitsÚtolistr   Úget_allowed_tokensÚallowed_tokensr   Úfill_ÚmathÚinfÚtorchÚ	full_like)r   r   r   Útoken_sequencer   r   r   r   Ú__call__   s   


zVLLMLogitsProcessor.__call__N)
Ú__name__Ú
__module__Ú__qualname__r   r   r	   Úintr   ÚTensorr"   r   r   r   r   r      s    "r   FÚ	tokenizerÚuse_bitmaskÚ
vocab_sizer   c                 C   sd   |d u rt | dƒr| j ¡  ¡ }t | dƒr|  ¡ } t| tƒr$t| ||ƒS t | dƒr,| j} t| ||ƒS )NÚ
llm_engineÚget_tokenizerr(   )	Úhasattrr+   Úget_model_configÚget_vocab_sizer,   Ú
isinstancer   r   r(   )r(   r)   r*   r   r   r   Ú(build_vllm_token_enforcer_tokenizer_data#   s   



r1   ÚllmÚcharacter_level_parserr   c                 C   s&   t | tƒs	t| ƒ} t| |ƒ}t||ƒS )zåBuild the logits processor function that llama.cpp will use to filter the tokens generated by the model. The result
    can be passed in the logits_processor list that is sent to the call or generate() method of llama.cpp models.)r0   r   r1   r   r   )r2   r3   r   r   r   r   r   Úbuild_vllm_logits_processor1   s   


r4   )FN)F)r   ÚvllmÚ!vllm.transformers_utils.tokenizerr   Útransformersr   ÚImportErrorÚlmformatenforcerr   r   r   r   Ú*lmformatenforcer.integrations.transformersr   Útypingr	   r
   r   r   r   ÚLLMÚboolr&   r1   r4   Ú__all__r   r   r   r   Ú<module>   s.    ÿ*þÿþ
þ