o
    i7                  	   @   s   z
d dl mZmZ W n ey   edw d dlmZmZmZmZ d dl	Z
d dlmZ d dlmZmZmZ dedeeeeef  fdd	Zdedefd
dZG dd dZddeeef dededefddZddgZdS )    )LlamaLogitsProcessorzXllama-cpp-python is not installed. Please install it with "pip install llama-cpp-python")CharacterLevelParserTokenEnforcerFormatEnforcerAnalyzerTokenEnforcerTokenizerDataN)TupleListUnionllmreturnc              	   C   s   |  dd }g }|  |  g}t|  D ]=}||v rqz)| ||gddd  }| |gd}t|t|k}||||f W q   ||ddf Y q|S )N   0utf-8   u   �F)	tokenize	token_bos	token_eosrangen_vocab
detokenizedecodelenappend)r   token_0regular_tokensspecial_tokens	token_idxdecoded_after_0decoded_regularis_word_start_token r!   c/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/lmformatenforcer/integrations/llamacpp.py_build_regular_tokens_list
   s   r#   c                    s@   t }dtt dtf fdd d}t|  | S )Nsentr   c                    s.   z	 | dW S     | d d  Y S )Nr   r   )r   r   )r$   
decoder_fnr   r!   r"   r&   #   s   z7build_token_enforcer_tokenizer_data.<locals>.decoder_fnF)r#   r	   intstrr   r   r   )r   r   use_bitmaskr!   r%   r"   #build_token_enforcer_tokenizer_data    s   r*   c                   @   sH   e Zd ZdefddZdejej dejej	 dejej	 fddZ
d	S )
LlamaCppLogitsProcessortoken_enforcerc                 C   s"   || _ |r	t|nd | _d | _d S )N)r,   r   analyzermask)selfr,   analyzer!   r!   r"   __init__.   s   
z LlamaCppLogitsProcessor.__init__	input_idsscoresr   c                 C   sr   |  }| jr| j||   | j|j}| jd u r%t|j	t
| _n| jd d| j|< td|| j< |S )NTFz-inf)tolistr-   report_raw_logitsr,   get_allowed_tokensallowed_tokensr.   nponesshapeboolfillfloat)r/   r2   r3   token_sequencer7   r!   r!   r"   __call__3   s   

z LlamaCppLogitsProcessor.__call__N)__name__
__module____qualname__r   r1   nptNDArrayr8   intcsingler?   r!   r!   r!   r"   r+   -   s    2r+   Fcharacter_level_parserr0   c                 C   s&   t | tr	t| } t| |}t||S )zBuild the logits processor function that llama.cpp will use to filter the tokens generated by the model. The result
    can be passed in the logits_processor list that is sent to the call or generate() method of llama.cpp models.)
isinstancer   r*   r   r+   )r   rG   r0   r,   r!   r!   r"   build_llamacpp_logits_processorA   s   


rI   )F)	llama_cppr   r   ImportErrorlmformatenforcerr   r   r   r   numpyr8   numpy.typingtypingrC   r   r	   r
   r'   r(   r;   r#   r*   r+   rI   __all__r!   r!   r!   r"   <module>   s     $
