o
    ]Û·i7  ã                	   @   sÜ   z
d dl mZmZ W n ey   edƒ‚w d dlmZmZmZmZ d dl	Z
d dlmZ d dlmZmZmZ dedeeeeef  fdd	„Zdedefd
d„ZG dd„ dƒZddeeef dededefdd„ZddgZdS )é    )ÚLlamaÚLogitsProcessorzXllama-cpp-python is not installed. Please install it with "pip install llama-cpp-python")ÚCharacterLevelParserÚTokenEnforcerÚFormatEnforcerAnalyzerÚTokenEnforcerTokenizerDataN)ÚTupleÚListÚUnionÚllmÚreturnc              	   C   s®   |   d¡d }g }|  ¡ |  ¡ g}t|  ¡ ƒD ]=}||v rqz)|  ||g¡ d¡dd … }|  |g¡ d¡}t|ƒt|ƒk}| |||f¡ W q   | |ddf¡ Y q|S )Nó   0éÿÿÿÿúutf-8é   u   ï¿½F)	ÚtokenizeÚ	token_bosÚ	token_eosÚrangeÚn_vocabÚ
detokenizeÚdecodeÚlenÚappend)r   Útoken_0Úregular_tokensÚspecial_tokensÚ	token_idxÚdecoded_after_0Údecoded_regularÚis_word_start_token© r!   ú\/home/ubuntu/vllm_env/lib/python3.10/site-packages/lmformatenforcer/integrations/llamacpp.pyÚ_build_regular_tokens_list
   s   r#   c                    s@   t ˆƒ}dtt dtf‡ ‡fdd„‰ d}t|ˆ ˆ ¡ |ˆ ¡ ƒS )NÚsentr   c                    s.   z	ˆ  | ¡ d¡W S    ˆ | d d… ƒ Y S )Nr   r   )r   r   )r$   ©Ú
decoder_fnr   r!   r"   r&   #   s   z7build_token_enforcer_tokenizer_data.<locals>.decoder_fnF)r#   r	   ÚintÚstrr   r   r   )r   r   Úuse_bitmaskr!   r%   r"   Ú#build_token_enforcer_tokenizer_data    s   r*   c                   @   sH   e Zd Zdefdd„Zdejej dejej	 dejej	 fdd„Z
d	S )
ÚLlamaCppLogitsProcessorÚtoken_enforcerc                 C   s"   || _ |r	t|ƒnd | _d | _d S )N)r,   r   ÚanalyzerÚmask)Úselfr,   Úanalyzer!   r!   r"   Ú__init__.   s   
z LlamaCppLogitsProcessor.__init__Ú	input_idsÚscoresr   c                 C   sr   |  ¡ }| jr| j ||  ¡ ¡ | j |¡j}| jd u r%t |j	t
¡| _n| j d¡ d| j|< tdƒ|| j< |S )NTFz-inf)Útolistr-   Úreport_raw_logitsr,   Úget_allowed_tokensÚallowed_tokensr.   ÚnpÚonesÚshapeÚboolÚfillÚfloat)r/   r2   r3   Útoken_sequencer7   r!   r!   r"   Ú__call__3   s   

z LlamaCppLogitsProcessor.__call__N)Ú__name__Ú
__module__Ú__qualname__r   r1   ÚnptÚNDArrayr8   ÚintcÚsingler?   r!   r!   r!   r"   r+   -   s    2r+   FÚcharacter_level_parserr0   c                 C   s&   t | tƒr	t| ƒ} t| |ƒ}t||ƒS )zåBuild the logits processor function that llama.cpp will use to filter the tokens generated by the model. The result
    can be passed in the logits_processor list that is sent to the call or generate() method of llama.cpp models.)Ú
isinstancer   r*   r   r+   )r   rG   r0   r,   r!   r!   r"   Úbuild_llamacpp_logits_processorA   s   


rI   )F)Ú	llama_cppr   r   ÚImportErrorÚlmformatenforcerr   r   r   r   Únumpyr8   Únumpy.typingÚtypingrC   r   r	   r
   r'   r(   r;   r#   r*   r+   rI   Ú__all__r!   r!   r!   r"   Ú<module>   s    ÿ $
