o
    -iK;                     @   s   d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZmZ d d
lmZ d dlmZ eddG dd dZG dd deZG dd deZdS )    N)ABCabstractmethod)	dataclass)	Annotated)Field)ModelConfig)VLLMValidationError)EmbedsPrompt
TextPromptTokensPrompt)get_prompt_componentsparse_raw_prompts)TokenizerLike)AsyncMicrobatchTokenizerT)frozenc                   @   s~   e Zd ZU dZdZedB ed< 	 dZedB ed< 	 dZe	ed< 	 dZ
edB ed< 	 dZe	dB ed	< 	 d
ededB fddZdS )RenderConfigz2Configuration to control how prompts are prepared.N
max_lengthtruncate_prompt_tokensTadd_special_tokens
cache_saltFneeds_detokenizationmodel_configreturnc                 C   sX   | j }|du s|dkr|S |dk r|j}| j}|dur*||kr*td|d|d|S )z:Validate and normalize `truncate_prompt_tokens` parameter.Nr   ztruncate_prompt_tokens=z# cannot be greater than max_length=z*. Please select a smaller truncation size.)r   max_model_lenr   
ValueError)selfr   r   r    r   V/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/entrypoints/renderer.pyverify_truncate_prompt_tokens,   s   z*RenderConfig.verify_truncate_prompt_tokens)__name__
__module____qualname____doc__r   int__annotations__r   r   boolr   strr   r   r   r   r   r   r   r      s   
 r   c                       s  e Zd ZdZ	ddededB f fddZedee	e B e	e
 B e	e	e
  B ded	e	e fd
dZeddddee	e B e	e
 B e	e	e
  B dB dee	e B dB ded	e	eeB  fddZ		ddee	e B dee
eddf dB dedB d	e	e fddZ  ZS )BaseRenderera  
    Base class for unified input processing and rendering.

    The Renderer serves as a unified input processor that consolidates
    tokenization, chat template formatting, and multimodal input handling
    into a single component.
    It converts high-level API requests (OpenAI-style JSON) into token IDs and
    multimodal features ready for engine consumption.

    Key responsibilities:
    - Convert text prompts to token sequences with proper special tokens
    - Apply chat templates and format conversations
    - Handle multimodal inputs (images, audio, etc.) when applicable
    - Manage prompt truncation and length validation
    - Provide clean separation between API layer and engine core
    Nr   	tokenizerc                    s   t    || _|| _d S N)super__init__r   r(   )r   r   r(   	__class__r   r   r+   Q   s   

zBaseRenderer.__init__prompt_or_promptsconfigr   c                      t )aL  
        Convert text or token inputs into engine-ready TokensPrompt objects.

        This method accepts text or token inputs and produces a
        list of [`TokensPrompt`][vllm.inputs.data.TokensPrompt] objects
        for the engine.

        Args:
            prompt_or_prompts: One of:
                - `str`: Single text prompt.
                - `list[str]`: Batch of text prompts.
                - `list[int]`: Single pre-tokenized sequence.
                - `list[list[int]]`: Batch of pre-tokenized sequences.
            config: Render configuration controlling how prompts are prepared
                (e.g., tokenization and length handling).

        Returns:
            list[TokensPrompt]: Engine-ready token prompts.

        Raises:
            ValueError: If input formats are invalid or length limits exceeded.
        NotImplementedError)r   r.   r/   r   r   r   render_promptZ   s   zBaseRenderer.render_promptr.   prompt_embedsr5   c                   r0   )a  
        Convert text/token and/or base64-encoded embeddings inputs into
        engine-ready prompt objects using a unified RenderConfig.

        At least one of `prompt_or_prompts` or `prompt_embeds` must be
        provided and non-empty. If both are omitted or empty (e.g., empty
        string and empty list), a `ValueError` is raised.

        Args:
            prompt_or_prompts: Text or token inputs to include.
            prompt_embeds: Base64-encoded bytes (or list thereof) containing a
                torch-saved tensor to be used as prompt embeddings.
            config: Render configuration controlling how prompts are prepared
                (e.g., tokenization and length handling).

        Returns:
            list[Union[TokensPrompt, EmbedsPrompt]]:
                Engine-ready prompt objects.

        Raises:
            ValueError: If both `prompt_or_prompts` and `prompt_embeds`
                are omitted or empty (decoder prompt cannot be empty), or if
                length limits are exceeded.
        r1   )r   r.   r5   r/   r   r   r   render_prompt_and_embedsy   s    z%BaseRenderer.render_prompt_and_embedsr   r   )ger   c                    sR   | j js
tddddtdtffdd t|tr$ fdd	|D S  |gS )
z@Load and validate base64-encoded embeddings into prompt objects.z?You must set `--enable-prompt-embeds` to input `prompt_embeds`.r5   )	parameterembedr   c                    s   t j 2 t jttj| dddt dd}t	|t j
r*|jt jt jt jfv s,J | }W d    n1 s:w   Y  | dkrR|d}| dksRJ d ur]| d  }t|d} d urj |d< |S )	NT)validatecpu)weights_onlymap_location   r   )r5   r   )torchsparsecheck_sparse_tensor_invariantsloadioBytesIOpybase64	b64decodedevice
isinstanceTensordtypefloat32bfloat16float16to_densedimsqueezer	   )r9   tensorembeds_prompt)r   r   r   r   _load_and_validate_embed   s,   



zABaseRenderer.load_prompt_embeds.<locals>._load_and_validate_embedc                    s   g | ]} |qS r   r   ).0r9   )rS   r   r   
<listcomp>   s    z3BaseRenderer.load_prompt_embeds.<locals>.<listcomp>)r   enable_prompt_embedsr   bytesr	   rH   list)r   r5   r   r   r   )rS   r   r   r   load_prompt_embeds   s   

zBaseRenderer.load_prompt_embedsr)   NN)r   r    r!   r"   r   r   r+   r   r&   rX   r#   r   r   r3   rW   r	   r6   r   r   rY   __classcell__r   r   r,   r   r'   ?   sN    	"
$
r'   c                       s  e Zd Z		d&dededB deeef dB f fddZdee	e B e	e
 B e	e	e
  B ded	e	e fd
dZddddee	e B e	e
 B e	e	e
  B dB dee	e B dB ded	e	eeB  fddZde	e
 de
dB d	e	e
 fddZdeeB dede
dB d	efddZdede
dB de
dB dededB d	efddZ	d'de	e
 de
dB de
dB dedB dedB d	efdd Zd	efd!d"Z			d(de	e
 de
dB dedB d#edB d	ef
d$d%Z  ZS ))CompletionRendererNr   r(   async_tokenizer_poolc                    s   t  || || _d | _d S r)   )r*   r+   r]   async_tokenizer)r   r   r(   r]   r,   r   r   r+      s   
zCompletionRenderer.__init__r.   r/   r   c                   sD     jdkrg S  fddt|D }tj| I dH S )zImplementation of prompt rendering for completion-style requests.

        Uses async tokenizer pooling for improved performance. See base class
        for detailed parameter documentation.
        r   c                 3   s     | ]}j | d V  qdS ))r/   r   N)_create_prompt)rT   prompt_inputr/   r   r   r   r   	<genexpr>   s    
z3CompletionRenderer.render_prompt.<locals>.<genexpr>N)r   r   r   asynciogather)r   r.   r/   tasksr   ra   r   r3      s   	z CompletionRenderer.render_promptr4   r5   c                   sr   | | j}|dkrg S g }|dur|| |||j |du s&|dkr(|S | j||dI dH }|| |S )z
        Render text/token prompts and/or precomputed embedding prompts. At
        least one of `prompt_or_prompts` or `prompt_embeds` must be provided.
        r   N )r.   r/   )r   r   extendrY   r   r3   )r   r.   r5   r/   r   renderedtoken_promptsr   r   r   r6      s&   
z+CompletionRenderer.render_prompt_and_embeds	token_idsr   c                 C   s*   |du r|S |t |kr|S || d S )z#Apply truncation to token sequence.N)len)r   rj   r   r   r   r   _maybe_apply_truncation  s
   z*CompletionRenderer._maybe_apply_truncationr`   c                    s`   t |\}}}|d ur| ||j||j|jI d H S |d ur.| ||j||j|jI d H S tr)   )r   _create_prompt_from_token_idsr   r   r   _create_prompt_from_textr   r2   )r   r`   r/   r   promptprompt_token_ids_r   r   r   r_     s&   

	z!CompletionRenderer._create_prompttextr   r   r   c                    sr   |   }| jjdur| jjddr| }|du r%|||dI dH }n|||d|dI dH }| |j|||S )z#Tokenize text input asynchronously.Ndo_lower_caseF)r   T)r   
truncationr   )_get_async_tokenizerr   encoder_configgetlower_create_tokens_prompt	input_ids)r   rr   r   r   r   r   r^   encodedr   r   r   rn   9  s"   	
z+CompletionRenderer._create_prompt_from_textFr   c                    s@   |  ||}d}|r|  }||I dH }| j||||dS )z:Optionally detokenize token IDs and build a tokens prompt.N)rj   r   r   ro   )rl   ru   decodery   )r   rj   r   r   r   r   ro   r^   r   r   r   rm   Z  s   	z0CompletionRenderer._create_prompt_from_token_idsc                 C   sl   | j }|dur	|S | j}|du rtd| jdu rt|}n| j|}|du r1t|}|| j|< || _ |S )z0Get or create async tokenizer using shared pool.Nz0No tokenizer available for text input processing)r^   r(   r   r]   r   rw   )r   r^   r(   r   r   r   ru   q  s   


z'CompletionRenderer._get_async_tokenizerro   c                 C   sf   |durt ||krtd| dt | ddt |dt|d}|dur)||d< |dur1||d	< |S )
zCreate validated TokensPrompt.Nz'This model's maximum context length is z# tokens. However, your request has z> input tokens. Please reduce the length of the input messages.input_tokens)r8   value)rp   r   ro   )rk   r   r   )r   rj   r   r   ro   tokens_promptr   r   r   ry     s   
z(CompletionRenderer._create_tokens_promptrZ   )F)NNN)r   r    r!   r   r   dictr   r+   r&   rX   r#   r   r   r3   rW   r	   r6   rl   r
   r_   r%   rn   rm   ru   ry   r[   r   r   r,   r   r\      s    

"

"


'
r\   )rc   rC   abcr   r   dataclassesr   typingr   rE   r?   pydanticr   vllm.configr   vllm.exceptionsr   vllm.inputs.datar	   r
   r   vllm.inputs.parser   r   vllm.tokenizersr   vllm.utils.async_utilsr   r   r'   r\   r   r   r   r   <module>   s&   ( 	