o
    ۾i                     @   sh   d dl mZmZ ddlmZ d dlZd dlZ			ddejdee dee deee	  d	ef
d
dZ
dS )    )ListOptional   )LLTokenizerNvocabn_vocab	eos_tokenslicesreturnc              	   C   s,  t | }|du rt | }d}t|d }g }t|D ]N}t | |||dd}	|	dk r:td| d| d|	 |	|ks@J t|d|	 }
t 	| |}|t j
@ sb|t j@ rf|
d	rf|
d
rfd|
 }
||
 q|durt||k r|d t||k svtt j jjtjj}tj|| |||dS )a  
    Create a new tokenizer from a llama.cpp vocab object.
    This is an expensive operation (~1s), so the result should be cached.

    Args:
        vocab: llama_cpp.llama_vocab_p - the vocab object to use
        n_vocab: int - override the size of the vocabulary
        eos_token: int - override the EOS token
        slices: List[str] - configuration for slicer optimization; pass [] to disable,
            or None to use the default configuration
    Ni @  r   r   TzError writing token z to buffer of size z	. Error:    <   >       )tokens	vocab_ptrtokenize_fptrr   r	   )	llama_cppllama_vocab_n_tokensllama_vocab_eosctypescreate_string_bufferrangellama_token_to_piece
ValueErrorbytesllama_token_get_attrLLAMA_TOKEN_ATTR_CONTROLLLAMA_TOKEN_ATTR_USER_DEFINED
startswithendswithappendlencast_libllama_tokenizec_void_pvaluer   from_llamacpp)r   r   r   r	   ntok
buffer_lenbufferr   tokenntokattrfptr r0   G/home/ubuntu/.local/lib/python3.10/site-packages/llguidance/llamacpp.pylltokenizer_from_vocab   sR   



r2   )NNN)typingr   r   r#   r   r   r   llama_vocab_pintstrr2   r0   r0   r0   r1   <module>   s$    
