o
    پiY                     @   s   d dl Z d dlmZmZmZmZ d dlmZmZ er,d dl	m
Z
 d dlmZ d dlmZ G dd dZd	efd
dZdddZdS )    N)TYPE_CHECKINGListOptionalUnion)GenerationParametersSamplingParameters)PreTrainedTokenizerBaseLLMSamplingParamsc                   @   sr   e Zd ZdZdddZdd Zdd	d
deeee f de	de
ded def
ddZdd Zdee fddZdS )VLLMa  Represents a vLLM model.

    We wrap models from model providing libraries in order to give all of
    them the same interface in Outlines and allow users to easily switch
    between providers. This class wraps the `vllm.LLM` class from the
    `vllm` library.

    modelr
   c                 C   s   || _ d | _|  | _d S )N)r   lora_request_get_tokenizer	tokenizer)selfr    r   H/home/ubuntu/.local/lib/python3.10/site-packages/outlines/models/vllm.py__init__   s   zVLLM.__init__c                 C   sZ   t | jdr| j }nt | jdr$t | jjdr| jjj}n	| jj}ntdt|dS )Nget_tokenizerr   zZThe provided LLM instance neither has a `tokenizer` attribute or a `get_tokenizer` method.)r   )hasattrr   r   r   
ValueErroradapt_tokenizer)r   r   r   r   r   r      s   

zVLLM._get_tokenizerNT)sampling_paramsuse_tqdmpromptsgeneration_parameterssampling_parametersr   r   r   c                C   s  ddl m} |du r| }t|\}}	}
|dur||_|	dur+t|	tr(|	g}	|	|_|
dur2|
|_|dur9|gng |_	t|\}}}}}|j
dkrQ||_
||_|dur]|jdkr]||_|durp|jdkrp||_|dkrpd|_|dur||jdkr|||_|dkrd|_| jj||| j|d	}d
d |D }t|}t|d }|dkr|dkr|d d S |dkr|d S |dkrdd |D S |S )a  Generate text using vLLM.

        Arguments
        ---------
        prompts
            A prompt or list of prompts.
        generation_parameters
            An instance of `GenerationParameters` that contains the prompt,
            the maximum number of tokens, stop sequences and seed. All the
            arguments to `SequenceGeneratorAdapter`'s `__cal__` method.
        logits_processor
            The logits processor to use when generating text.
        sampling_parameters
            An instance of `SamplingParameters`, a dataclass that contains
            the name of the sampler to use and related parameters as available
            in Outlines.
        sampling_params
            An instance of `vllm.sampling_params.SamplingParams`. The values
            passed via this dataclass supersede the values of the parameters
            in `generation_parameters` and `sampling_parameters`. See the
            vLLM documentation for more details: https://docs.vllm.ai/en/latest/dev/sampling_params.html.
        use_tqdm
            A boolean in order to display progress bar while inferencing

        Returns
        -------
        The generated text, of shape `(n_batch, n_samples)`. If there are only
        one batch and several samples, the list is of shape `(n_samples)`. If
        this is a batch with several sequences but only one sample the list is
        of shape `(n_batch)`. If there is only one sequence and one sample, a
        string is returned.

        r   r   N   g      ?beam_searchT)r   r   r   c                 S   s   g | ]
}d d |j D qS )c                 S   s   g | ]}|j qS r   )text).0sampler   r   r   
<listcomp>   s    z,VLLM.generate.<locals>.<listcomp>.<listcomp>)outputsr#   batchr   r   r   r%      s    z!VLLM.generate.<locals>.<listcomp>c                 S   s   g | ]}|d  qS )r   r   r'   r   r   r   r%      s    )vllm.sampling_paramsr   dataclassesastuple
max_tokens
isinstancestrstopseedlogits_processorsnbest_oftop_ptop_krepetition_penaltytemperatureuse_beam_searchr   generater   len)r   r   r   logits_processorr   r   r   r   r,   stop_atr0   samplernum_samplesr4   r5   r7   results
batch_sizesample_sizer   r   r   r9   +   s\   +

zVLLM.generatec                 O   s   t d)zReturn a text generator.

        Streaming is not yet available for `vllm.LLM`.

        TODO: Implement the streaming functionality ourselves.

        z4Streaming is not available for the vLLM integration.)NotImplementedError)r   argskwargsr   r   r   stream   s   zVLLM.streamadapter_pathc                 C   s0   ddl m} |d u rd | _d S ||d|| _d S )Nr   )LoRARequestr   )vllm.lora.requestrG   r   )r   rF   rG   r   r   r   	load_lora   s   
zVLLM.load_lora)r   r
   )__name__
__module____qualname____doc__r   r   r   r.   r   r   r   r   boolr9   rE   rI   r   r   r   r   r      s&    
	
kr   
model_namec                 K   s$   ddl m} || fi |}t|S )a6  Load a vLLM model.

    Arguments
    ---------
    model_name
        The name of the model to load from the HuggingFace hub.
    vllm_model_params
        vLLM-specific model parameters. See the vLLM code for the full list:
        https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/llm.py

    r   r	   )vllmr
   r   )rO   vllm_model_paramsr
   r   r   r   r   rP      s   rP   r   r   returnc                    sL   ddl m   _tj_dttt	f dtf fdd}|_
S )a  Adapt a tokenizer to use to compile the FSM.

    The API of Outlines tokenizers is slightly different to that of `transformers`. In
    addition we need to handle the missing spaces to Llama's tokenizer to be able to
    compile FSMs for this model.

    Parameters
    ----------
    tokenizer
        The tokenizer of the model.

    Returns
    -------
    PreTrainedTokenizerBase
        The adapted tokenizer.
    r   )SPIECE_UNDERLINEtokenrR   c                    s6    | g}t| tu r|  s| dkrd| S |S )Nz<0x20> )convert_tokens_to_stringtyper.   
startswith)rT   stringrS   r   r   r   convert_token_to_string   s   z0adapt_tokenizer.<locals>.convert_token_to_string)transformersrS   	get_vocab
vocabularysetall_special_tokensspecial_tokensr   r.   bytesr[   )r   r[   r   rZ   r   r      s   
 r   )r   r   rR   r   )r*   typingr   r   r   r   outlines.generate.apir   r   r\   r   rP   r
   r)   r   r   r.   r   r   r   r   r   <module>   s      