o
    -i`                     @   s  d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z. ddl/m0Z0m1Z1 ee2Z3G dd dZ4dS )    )Mapping)Anycast)assert_never)ModelConfigObservabilityConfig)init_logger)MULTIMODAL_REGISTRYMultiModalRegistry)BaseMultiModalProcessorCache)MultiModalDataDictMultiModalEncDecInputsMultiModalInputsMultiModalUUIDDict)BaseMultiModalProcessor)renderer_from_config)TokenizerLike)json_iter_leaves)MultiModalCacheStats   )DecoderOnlyInputsEmbedsInputsEmbedsPromptEncoderDecoderInputsExplicitEncoderDecoderPromptProcessorInputs
PromptTypeSingletonInputsSingletonPrompt
TextPromptTokenInputsTokensPromptembeds_inputstoken_inputs)"is_explicit_encoder_decoder_promptparse_singleton_promptc                       s  e Zd ZdedfdededB dededB ddf
 fddZe	de
dB fd	d
Zde
fddZdedB fddZdedB fddZdedB fddZdee fddZdee dB dee fddZ	dIdeeef dB deeef fddZ	dIdedeeef dB dee fddZdefdd Z	dIdd!deee B d"ed#eeef dB deeef dB d$edB defd%d&Z d'e!de"fd(d)Z#	dId*ee deeef dB dee fd+d,Z$	dIdd!d'e%deeef dB d$edB de&eB fd-d.Z'	dIdd!d'e(deeef dB d$edB de&eB fd/d0Z)	dIdd!de*deeef dB d$edB de+fd1d2Z,d3e+d4e+dB de-fd5d6Z.	dId*e+e/B d7e+dB de0e+e+f fd8d9Z1	dIdd!de2deeef dB d$edB de-fd:d;Z3d<e4de4fd=d>Z5	dIdd!de*deeef dB d$edB de4fd?d@Z6	dIdd!de2deeef dB d$edB de7fdAdBZ8	dIdd!de2deeef dB d$edB de7fdCdDZ9de:dB fdEdFZ;dJdGdHZ<  Z=S )KInputPreprocessorNmodel_configobservability_configmm_registrymm_processor_cachereturnc                    sF   t    || _|| _t|| _|| _|| _|rt | _	d S d | _	d S N)
super__init__r'   r(   r   rendererr)   r*   r   mm_cache_stats)selfr'   r(   r)   r*   	__class__ S/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/inputs/preprocess.pyr.   /   s   

zInputPreprocessor.__init__c                 C   s   | j jS r,   )r/   	tokenizerr1   r4   r4   r5   r6   @   s   zInputPreprocessor.tokenizerc                 C   s
   | j  S r,   )r/   get_tokenizerr7   r4   r4   r5   r8   D   s   
zInputPreprocessor.get_tokenizerc                 C       | j d u rtd d S | j jS )Nz@Using None for BOS token id because tokenizer is not initialized)r6   loggerwarning_oncebos_token_idr7   r4   r4   r5   get_bos_token_idG      
z"InputPreprocessor.get_bos_token_idc                 C   r9   )Nz@Using None for EOS token id because tokenizer is not initialized)r6   r:   r;   eos_token_idr7   r4   r4   r5   get_eos_token_idP   r>   z"InputPreprocessor.get_eos_token_idc                 C   sh   | j jstd dS | j du s| j jdu rtd dS t| j jdd}|du r2td |  }|S )z
        Obtain the decoder start token id employed by an encoder/decoder
        model. Returns None for non-encoder/decoder models or if the
        model config is unavailable.
        zSUsing None for decoder start token id because this is not an encoder/decoder model.NzLUsing None for decoder start token id because model config is not available.decoder_start_token_idzaFalling back on <BOS> for decoder start token id because decoder start token id is not available.)r'   is_encoder_decoderr:   r;   	hf_configgetattrr=   )r1   dec_start_token_idr4   r4   r5   get_decoder_start_token_idY   s&   
z,InputPreprocessor.get_decoder_start_token_idc                 C   s   |   }|dus
J |gS )aU  
        Specifically for encoder/decoder models:
        generate a default decoder prompt for when
        the user specifies only the encoder prompt.

        Encoder/decoder models utilize the decoder
        prompt in different ways; as new models are
        added, it is intended that this function
        will be extended to produce differing
        default decoder prompts, depending on the
        model variety.

        Absent a special case, the default behavior
        of this method is to mirror the behavior of
        the HuggingFace (HF) GenerationMixin for a None
        decoder prompt, which is to employ a logit processor
        setting to force the first decoded token to be <BOS>.
        Here, this behavior is approximated by having the
        "default" decoder prompt be <BOS>.

        However, it is possible that in the future
        other models may have different or more
        complex logic for the default decoder prompt.
        This motivates having a special helper method
        for default decoder prompts.

        Returns:

        * prompt_token_ids
        N)r=   )r1   r<   r4   r4   r5   #_get_default_enc_dec_decoder_prompt{   s    z5InputPreprocessor._get_default_enc_dec_decoder_promptdecoder_input_idsc                 C   sJ   |   }|dus
J |du r|  }t|dks|d |kr#|g| }|S )a  
        Prepares `decoder_input_ids` for generation with encoder-decoder models.

        Based on:
        https://github.com/huggingface/transformers/blob/4037a2b5b1278736e566aec12e169100275545ea/src/transformers/generation/utils.py
        specifically,
        `GenerationMixin._prepare_decoder_input_ids_for_generation()`.

        Arguments:

        * decoder_input_ids: input token ids to preprocess

        Returns:

        * Processed token list
        Nr   )rF   rG   len)r1   rH   rA   r4   r4   r5   )_prepare_decoder_input_ids_for_generation   s   
z;InputPreprocessor._prepare_decoder_input_ids_for_generation	overridesc                 C   s0   t ttf  }| jjrd|d< |r|| |S )NFadd_special_tokens)dictstrr   r'   rB   update)r1   rK   kwargsr4   r4   r5   _get_tokenization_kw   s   
z&InputPreprocessor._get_tokenization_kwprompttokenization_kwargsc                 C   sD   |   }| |}| jj}|r|ddr| }|j|fi |S )zn
        Apply the model's tokenizer to a text prompt, returning the
        corresponding token IDs.
        do_lower_caseF)r8   rQ   r'   encoder_configgetlowerencode)r1   rR   rS   r6   rU   r4   r4   r5   _tokenize_prompt   s   	
z"InputPreprocessor._tokenize_promptc                 C   s.   t | ds| jj| j| j| j| jd| _| jS )N_mm_processor)r6   cache)hasattrr)   create_processorr'   r(   r6   r*   rZ   r7   r4   r4   r5   _get_mm_processor   s   
z#InputPreprocessor._get_mm_processormm_uuidsmm_datamm_processor_kwargsr`   c          
      C   s^   |   }|du r
i }|j|||||d}|d }tdd t|D }	|	s-td| d|S )z
        Apply the model's multi-modal processor to a multi-modal prompt,
        returning the corresponding token IDs and metadata.
        N)hf_processor_mm_kwargsrS   r`   	mm_hashesc                 s   s    | ]}t |tV  qd S r,   )
isinstancerN   ).0leafr4   r4   r5   	<genexpr>  s    

z8InputPreprocessor._process_multimodal.<locals>.<genexpr>z*mm_hashes must contain only strings, got: z_. This is likely due to an incorrect custom implementation of MultiModalProcessor.apply method.)r^   applyallr   
ValueError)
r1   rR   ra   rb   rS   r`   mm_processormm_inputrd   contains_only_stringsr4   r4   r5   _process_multimodal   s&   
z%InputPreprocessor._process_multimodalparsed_contentc                 C   sZ   | j jstd|d }|jdkr|jdd}|jdkr td| }t||dd	S )
Nz?You must set `--enable-prompt-embeds` to input `prompt_embeds`.prompt_embeds   r   )dim   z6prompt_embeds must be of shape (seq_len, hidden_size).
cache_salt)rq   ru   )r'   enable_prompt_embedsrk   ndimsqueezecpur"   rV   )r1   rp   rq   r4   r4   r5   _process_embeds  s   


z!InputPreprocessor._process_embedsinputsc                 C   sH   |rd|vs| j d u r|S |d }| j jdkr|| d  S |d | S )N
truncation
max_lengthleft)r6   truncation_side)r1   r{   rS   r}   r4   r4   r5   _truncate_inputs9  s   
z"InputPreprocessor._truncate_inputsc                C   s`   |  |d |}|d }r| j|||dpi ||d}nt|}|d }r.||d< |S )Nprompt_token_idsmulti_modal_datarb   rS   r`   ru   )r   rV   ro   r#   )r1   rp   rS   r`   r   r   r{   ru   r4   r4   r5   _process_tokensJ  s   z!InputPreprocessor._process_tokensc          	      C   sf   |d }| d }r| j||| dpi ||d}n| j||d}t|}| d }r1||d< |S )NrR   r   rb   r   rS   ru   )rV   ro   rY   r#   )	r1   rp   rS   r`   prompt_textr   r{   r   ru   r4   r4   r5   _process_textf  s"   zInputPreprocessor._process_textc                C   s   t |}|d dkr| |d S |d dkr | j|d |dS |d dkr0| j|d ||dS |d dkrC| jt|d d	||dS t| d
S )z
        Extract the singleton inputs from a prompt.

        Arguments:

        * prompt: single encoder or decoder input prompt

        Returns:

        * [`SingletonInputs`][vllm.inputs.data.SingletonInputs] instance
        typeembedscontenttokensr_   textr   rN   )rR   N)r%   rz   r   r   r   r   )r1   rR   rS   r`   parsedr4   r4   r5   _prompt_to_llm_inputs  s*   z'InputPreprocessor._prompt_to_llm_inputsencoder_inputsdecoder_inputsc                 C   s   |d dks|r|d dkrt dtttB |}tttB d B |}|d u r>| jjjdkr4|d  }n| d }t	|}nd|v rFt d| |d }||d< t
||dS )	Nr   r   =Embedding inputs are not supported for encoder-decoder modelswhisperr   r   zJMulti-modal decoder inputs of encoder-decoder models are not supported yet)encoderdecoder)rk   r   r    r   r'   rC   
model_typecopyrJ   r#   r   )r1   r   r   dec_token_idsr4   r4   r5   _build_enc_dec_llm_inputs  s4   

z+InputPreprocessor._build_enc_dec_llm_inputsdecoder_inputs_to_overridec                 C   s   |d dks|r|d dkrt dtttB tB |}tttB dB |}|d dkrad|vr2tdtt|}t|d }|p@|}td|d |d	 |d
 |d d}|d }r]||d< ||fS |d dkrttg d}|po|}||fS t| ||fS )zx
        For encoder/decoder models only:
        Separate Encoder/Decoder inputs from a MultiModalEncDecInputs
        r   r   r   N
multimodalencoder_prompt_token_idszXYou should register an encoder-decoder multi-modal processor for encoder-decoder models.r   	mm_kwargsrd   mm_placeholders)r   r   r   rd   r   ru   token)r   )	rk   r   r    r   r   RuntimeErrorr#   rV   r   )r1   r{   r   r   decoder_prompt_inputsr   ru   r4   r4   r5   _split_enc_dec_mm_inputs  sN   




z*InputPreprocessor._split_enc_dec_mm_inputsc          	      C   s   t |r2tt|}| j|d ||d}|d  }du rd}n| j||d}| jjr1| ||\}}n| jtt|||d}| jjrI| |\}}n|}d}| ||S )ab  
        For encoder/decoder models only:
        Process an input prompt into an
        [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
        instance.

        There are two types of input prompts:
        singleton prompts which carry only the
        encoder prompt, and explicit encoder/decoder
        prompts which carry both the encoder and the
        decoder prompts as member variables.

        This function handles the following scenarios:
        * Singleton encoder prompt: extract encoder prompt
          token ids & infer default decoder prompt token ids
        * Explicit encoder/decoder prompt: extract encoder
          and decoder prompt token ids

        Note that for Explicit encoder/decoder prompts,
        each sub-prompt (encoder or decoder prompt) can
        have any possible singleton type; thus this
        method relies on helper functions to obtain
        token ids for the sub-prompts.

        Arguments:

        * prompt: an input prompt

        Returns:

        * [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
          instance
        encoder_promptr   decoder_promptNr   )	r$   r   r   r   r'   is_multimodal_modelr   r   r   )	r1   rR   rS   r`   prompt_r   decoder_inputr   r{   r4   r4   r5   _process_encoder_decoder_prompt  s6   *
z1InputPreprocessor._process_encoder_decoder_promptprompt_inputsc                 C   s   d|v rt ttB |}|S )Nr   )r   r    r   )r1   r   r4   r4   r5   _build_decoder_only_llm_inputsf  s
   z0InputPreprocessor._build_decoder_only_llm_inputsc                C   s   | j |||d}| |S )a1  
        For decoder-only models:
        Process an input prompt into a
        [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance.

        Arguments:

        * prompt: input prompt

        Returns:

        * [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance
        r   )r   r   )r1   rR   rS   r`   prompt_compsr4   r4   r5   _process_decoder_only_promptq  s   
z.InputPreprocessor._process_decoder_only_promptc                C   s>   | j jr| j|||dS t|rtd| jtt|||dS )Nr_   z9Cannot pass encoder-decoder prompt to decoder-only modelsr   )r'   rB   r   r$   rk   r   r   r   )r1   rR   rS   r`   r4   r4   r5   _preprocess  s   zInputPreprocessor._preprocessc                C   sf   | j |||d}| jr1| jdur1| jjdd}| j jd7  _| j j|j7  _| j j|j7  _|S )zPreprocess the input prompt.r_   NT)deltar   )r   r*   r0   
make_statsrequestsqueriestotalhits)r1   rR   rS   r`   resr   r4   r4   r5   
preprocess  s   zInputPreprocessor.preprocessc                 C   s   | j }|d u r	d S t | _ |S r,   )r0   r   )r1   r0   r4   r4   r5   stat_mm_cache  s
   zInputPreprocessor.stat_mm_cachec                 C   s.   | j d ur
| j   | jd urd| j_d S d S )NT)r*   clear_cacher0   resetr7   r4   r4   r5   clear_mm_cache  s
   


z InputPreprocessor.clear_mm_cacher,   )r+   N)>__name__
__module____qualname__r	   r   r   r
   r   r.   propertyr   r6   r8   intr=   r@   rF   listrG   rJ   rM   rN   r   rQ   rY   r   r^   r   r   objectr   r   ro   r   r   rz   r   r!   r    r   r   r   r   r   r   r   r   r   tupler   r   r   r   r   r   r   r   r   r   r   r   __classcell__r4   r4   r2   r5   r&   .   sb   		"$

'




(



!
*
0

@
N

 
 
	r&   N)5collections.abcr   typingr   r   typing_extensionsr   vllm.configr   r   vllm.loggerr   vllm.multimodalr	   r
   vllm.multimodal.cacher   vllm.multimodal.inputsr   r   r   r   vllm.multimodal.processingr   vllm.renderersr   vllm.tokenizersr   vllm.utils.jsontreer   vllm.v1.metrics.statsr   datar   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   parser$   r%   r   r:   r&   r4   r4   r4   r5   <module>   s"   @