o
    پiF9                     @   s   d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZmZ d dlmZ er@d dlmZmZ G dd deZG d	d
 d
eddZG dd dZ	ddedee defddZdS )    N)	TYPE_CHECKINGDictIteratorListOptionalSetTuple	TypedDictUnion)Unpack)GenerationParametersSamplingParameters)	Tokenizer)LlamaLogitsProcessorListc                   @   s   e Zd ZdddZdee dee fddZ		dd
eeee f de	de	de
ee ee f fddZdedefddZdd Zdd Zdd Zdd ZdS )LlamaCppTokenizermodelr   c                 C   s   |  | _| | jg| _| j| _t | _t | _	| | _d | _
z|jj | _	|jj| _
W n tyP   t| D ]}| |g}|| j	|< q>Y nw dd t| j	 dd dD | _	d | _d S )Nc                 S   s   i | ]\}}||qS  r   ).0toktok_idr   r   L/home/ubuntu/.local/lib/python3.10/site-packages/outlines/models/llamacpp.py
<dictcomp>0   s    z.LlamaCppTokenizer.__init__.<locals>.<dictcomp>c                 S   s   | d S )N   r   )xr   r   r   <lambda>2   s    z,LlamaCppTokenizer.__init__.<locals>.<lambda>)key)	token_eoseos_token_id	tokenizerdecode	eos_tokenpad_token_idsetspecial_tokensdict
vocabulary_hf_tokenizer
tokenizer_hf_tokenizer	get_vocabAttributeErrorrangen_vocabsorteditems_hash)selfr   ttoken_piecer   r   r   __init__   s(   


zLlamaCppTokenizer.__init__	token_idsreturnc                 C   s   | j |}|jdddgS )Nutf-8ignoreerrors)r   
detokenizer    )r1   r5   decoded_bytesr   r   r   r    7   s   zLlamaCppTokenizer.decodeTpromptadd_bosspecialc                    sH   t |tr	td jj|jddd||d} fdd|D }||fS )Nz=llama-cpp-python tokenizer doesn't support batch tokenizationr7   r8   r9   )r>   r?   c                    s   g | ]}| j krd ndqS )r   r   )r"   )r   token_idr1   r   r   
<listcomp>F   s    z,LlamaCppTokenizer.encode.<locals>.<listcomp>)
isinstancelistNotImplementedErrorr   tokenizeencode)r1   r=   r>   r?   r5   attention_maskr   rA   r   rG   ;   s   

zLlamaCppTokenizer.encodetokenc                 C   sF   | j d ur!ddlm} | j |g}||s|dkrd| }|S |S )Nr   )SPIECE_UNDERLINEz<0x20> )r'   transformers.file_utilsrJ   convert_tokens_to_string
startswith)r1   rI   rJ   	token_strr   r   r   convert_token_to_stringK   s   
z)LlamaCppTokenizer.convert_token_to_stringc                 C   s   t |tsdS |  | kS )NF)rC   r   __getstate__)r1   otherr   r   r   __eq__V   s   
zLlamaCppTokenizer.__eq__c                 C   s    | j d u rtt| | _ | j S N)r0   hashpickledumpsrA   r   r   r   __hash__[   s   
zLlamaCppTokenizer.__hash__c                 C   s   | j | j| j| jt| jfS )z3Create a stable representation for outlines.caching)r&   r   r!   r"   r.   r$   rA   r   r   r   rQ   `   s   zLlamaCppTokenizer.__getstate__c                 C   s   t d)Nz(Cannot load a pickled llamacpp tokenizer)rE   )r1   stater   r   r   __setstate__j   s   zLlamaCppTokenizer.__setstate__Nr   r   )TT)__name__
__module____qualname__r4   r   intstrr    r
   boolr   rG   rP   rS   rX   rQ   rZ   r   r   r   r   r      s$    


r   c                   @   s   e Zd ZU ee ed< eed< eed< eed< eed< eed< eed< ded	< eeee	e f  ed
< eed< eed< eed< eed< eed< eed< eed< eed< e
ed< dS )LlamaCppParamssuffixtemperaturetop_pmin_p	typical_pseed
max_tokensr   logits_processorstopfrequence_penaltypresence_penaltyrepeat_penaltytop_ktfs_zmirostat_modemirostat_taumirostat_etastreamN)r\   r]   r^   r   r`   __annotations__floatr_   r
   r   ra   r   r   r   r   rb   n   s&   
 rb   F)totalc                   @   s   e Zd ZdZdddZedd Zded	ed
e	e
 fddZdeeee f ded	ed
e	e
 def
ddZdeeee f ded	ed
e	e
 dee f
ddZdefddZdS )LlamaCppaE  Represents a model provided by the `llama-cpp-python` library.

    We wrap models from model providing libraries in order to give all of
    them the same interface in Outlines and allow users to easily switch
    between providers. This class wraps the `llama_cpp.Llama` class from the
    `llama-cpp-python` library.

    r   r   c                 C   s
   || _ d S rT   )r   )r1   r   r   r   r   r4      s   
zLlamaCpp.__init__c                 C   s
   t | jS rT   )r   r   rA   r   r   r   r      s   
zLlamaCpp.tokenizergeneration_parameterssampling_parametersllama_cpp_paramsc                 K   sj  ddl m} t|\}}}d|vr||d< d|vr||d< d|vr2|du r+d|d< n|d |d< n|d d |d< t|\}	}
}}}|	d	krLtd
|
dkrTtdd|vre|dura||d< nd|d< d|vrmd|d< d|vr~|durz||d< nd|d< d|vr|dur||d< nd|d< d|vrd|d< d|d< |durd|v r|d | |S ||g|d< |S )zfPrepare the generation parameters.

        `llama-cpp-python` uses different default values

        r   )r   rk   rh   ri   Nr   beam_searchz<The `llama_cpp_python` library does not support Beam Search.zFThe `llama_cpp_python` library does not allow to take several samples.re         ?rf   g        ro   rd   rn   Frt   rj   )	llama_cppr   dataclassesastuplerE   append)r1   ry   rz   structure_logits_processorr{   r   ri   stop_atrh   samplernum_samplesre   ro   rd   r   r   r   prepare_generation_parameters   s^   



z&LlamaCpp.prepare_generation_parameterspromptsr6   c                 K   sX   t |ts	td| j|||fi |}| j|fi |}|d d d }| j  |S )a  Generate text using `llama-cpp-python`.

        Arguments
        ---------
        prompts
            A prompt or list of prompts.
        generation_parameters
            An instance of `GenerationParameters` that contains the prompt,
            the maximum number of tokens, stop sequences and seed. All the
            arguments to `SequenceGeneratorAdapter`'s `__cal__` method.
        logits_processor
            The logits processor to use when generating text.
        sampling_parameters
            An instance of `SamplingParameters`, a dataclass that contains
            the name of the sampler to use and related parameters as available
            in Outlines.
        llama_cpp_params
            Keyword arguments that can be passed to
            `llama_cpp_python.Llama.__call__`.  The values in `llama_cpp_params`
            supersede the values of the parameters in `generation_parameters` and
            `sampling_parameters`.  See the `llama_cpp_python` documentation for
            a list of possible values: https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__call__

        Returns
        -------
        The generated text.

        @The `llama-cpp-python` library does not support batch inference.choicesr   text)rC   r`   rE   r   r   reset)r1   r   ry   r   rz   r{   
completionresultr   r   r   generate   s   
$
zLlamaCpp.generatec                    s`   t |ts	tdj|||fi |}d|d< j|fi | dtt f fdd}| S )a  Stream text using `llama-cpp-python`.

        Arguments
        ---------
        prompts
            A prompt or list of prompts.
        generation_parameters
            An instance of `GenerationParameters` that contains the prompt,
            the maximum number of tokens, stop sequences and seed. All the
            arguments to `SequenceGeneratorAdapter`'s `__cal__` method.
        logits_processor
            The logits processor to use when generating text.
        sampling_parameters
            An instance of `SamplingParameters`, a dataclass that contains
            the name of the sampler to use and related parameters as available
            in Outlines.
        llama_cpp_params
            Keyword arguments that can be passed to
            `llama_cpp_python.Llama.__call__`.  The values in `llama_cpp_params`
            supersede the values of the parameters in `generation_parameters` and
            `sampling_parameters`.  See the `llama_cpp_python` documentation for
            a list of possible values: https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__call__

        Returns
        -------
        A generator that return strings.

        r   Trt   r6   c                  3   sD    	 zt  } | d d d V  W n ty    j  Y d S w q)NTr   r   r   )nextStopIterationr   r   )r   	generatorr1   r   r   token_generatorZ  s   
z(LlamaCpp.stream.<locals>.token_generator)rC   r`   rE   r   r   r   )r1   r   ry   r   rz   r{   r   r   r   r   rt   '  s   
%	zLlamaCpp.streamadapter_pathc                 C   s"   | j j|drtd| d S )Nr~   z%Failed to apply LoRA from lora path: )r   _modelapply_lora_from_fileRuntimeError)r1   r   r   r   r   	load_lorae  s   zLlamaCpp.load_loraNr[   )r\   r]   r^   __doc__r4   propertyr   r   r   r   rb   r   r
   r`   r   r   r   rt   r   r   r   r   r   rx      sF    
	

]
6
>rx   repo_idfilenamer6   c                 K   sZ   ddl m} d|vrd|d< d|vrd|d< d|vrtd |j| |fi |}t|S )a  Load a model from the `llama-cpp-python` library.

    We use the `Llama.from_pretrained` classmethod that downloads models
    directly from the HuggingFace hub, instead of asking users to specify
    a path to the downloaded model. One can still load a local model
    by initializing `llama_cpp.Llama` directly.

    Arguments
    ---------
    repo_id
        The name of the model repository.
    filename:
        A filename of glob pattern to match the model file in the repo.
    llama_cpp_model_params
        Llama-specific model parameters. See the `llama-cpp-python` documentation
        for the full list: https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__init__

    r   )r   n_ctxverboseFr   aX  The pre-tokenizer in `llama.cpp` handles unicode improperly (https://github.com/ggerganov/llama.cpp/pull/5613)
Outlines may raise a `RuntimeError` when building the regex index.
To circumvent this error when using `models.llamacpp()` you may pass the argument`tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(<hf_repo_id>)`
)r   r   warningswarnfrom_pretrainedrx   )r   r   llamacpp_model_paramsr   r   r   r   r   llamacppm  s   r   rT   )r   rV   r   typingr   r   r   r   r   r   r   r	   r
   typing_extensionsr   outlines.generate.apir   r   outlines.models.tokenizerr   r   r   r   r   rb   rx   r`   r   r   r   r   r   <module>   s*    ,U l