o
    i                  	   @   s   zd dl Z d dlZd dlmZ d dlmZ W n ey!   edw d dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZmZ d dlZG dd	 d	Zddeejef dededB defddZ	
ddeejeef dededefddZddgZdS )    N)MistralTokenizer)PreTrainedTokenizerBasez@vllm is not installed. Please install it with "pip install vllm")CharacterLevelParserTokenEnforcerFormatEnforcerAnalyzerTokenEnforcerTokenizerData)#build_token_enforcer_tokenizer_data)ListOptionalUnionc                   @   s8   e Zd ZdefddZdee dejdejfddZ	d	S )
VLLMLogitsProcessortoken_enforcerc                 C   s"   || _ |r	t|nd | _d | _d S )N)r   r   analyzermask)selfr   analyze r   _/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/lmformatenforcer/integrations/vllm.py__init__   s   
zVLLMLogitsProcessor.__init__	input_idsscoresreturnc                 C   sp   |}| j r| j ||  | j|j}| jd ur#| jtj	  n	t
|tj	 | _d| j|< || j }|S )Nr   )r   report_raw_logitstolistr   get_allowed_tokensallowed_tokensr   fill_mathinftorch	full_like)r   r   r   token_sequencer   r   r   r   __call__   s   


zVLLMLogitsProcessor.__call__N)
__name__
__module____qualname__r   r   r	   intr   Tensorr"   r   r   r   r   r      s    "r   F	tokenizeruse_bitmask
vocab_sizer   c                 C   sd   |d u rt | dr| j  }t | dr|  } t| tr$t| ||S t | dr,| j} t| ||S )N
llm_engineget_tokenizerr(   )	hasattrr+   get_model_configget_vocab_sizer,   
isinstancer   r   r(   )r(   r)   r*   r   r   r   (build_vllm_token_enforcer_tokenizer_data#   s   



r1   llmcharacter_level_parserr   c                 C   s&   t | ts	t| } t| |}t||S )zBuild the logits processor function that llama.cpp will use to filter the tokens generated by the model. The result
    can be passed in the logits_processor list that is sent to the call or generate() method of llama.cpp models.)r0   r   r1   r   r   )r2   r3   r   r   r   r   r   build_vllm_logits_processor1   s   


r4   )FN)F)r   vllm!vllm.transformers_utils.tokenizerr   transformersr   ImportErrorlmformatenforcerr   r   r   r   *lmformatenforcer.integrations.transformersr   typingr	   r
   r   r   r   LLMboolr&   r1   r4   __all__r   r   r   r   <module>   s.    *
