o
    
۾i&K                     @   s>  d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZ d dlmZmZmZmZmZmZm Z  d dl!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 ee9Z:G dd dZ;dS )    )Mapping)Anyoverload)assert_never)ModelConfigObservabilityConfig)init_logger)MULTIMODAL_REGISTRYMultiModalRegistry)BaseMultiModalProcessorCache)MultiModalDataDictMultiModalInputsMultiModalUUIDDict)BaseMultiModalProcessor)renderer_from_config)DecoderDictPromptDecoderOnlyDictPrompt
DictPromptEncoderDecoderDictPromptEncoderDictPromptSingletonDictPrompt	TokPrompt)parse_dec_only_promptparse_enc_dec_prompt)TokenizerLike)json_iter_leaves)MultiModalCacheStats   )DecoderInputsDecoderOnlyInputsEmbedsInputsEmbedsPromptEncoderDecoderInputsEncoderInputsProcessorInputs
PromptTypeSingletonInputs
TextPromptTokenInputsTokensPromptembeds_inputstoken_inputsc                       s.  e Zd ZdedfdededB dededB ddf
 fddZe	de
dB fd	d
Zde
fddZdedB fddZdedB fddZdefddZdee dee fddZ	dHdeeef dB deeef fddZ	dHdedeeef dB dee fddZdefddZ	dHdddeee B d ed!eeef dB deeef dB d"edB defd#d$Zd%e de!fd&d'Z"	dHd(ee deeef dB dee fd)d*Z#	dHddd%e$deeef dB d"edB de%eB fd+d,Z&	dHddd%e'deeef dB d"edB de%eB fd-d.Z(e)	dHddde*deeef dB d"edB de+fd/d0Z,e)	dHddde-deeef dB d"edB de.fd1d0Z,e)	dHddde/deeef dB d"edB de0fd2d0Z,	dHddde1deeef dB d"edB de2fd3d0Z,d(e2de+fd4d5Z3d(e2de.fd6d7Z4	dHd8e2d9e2dB de5fd:d;Z6	dHddde7deeef dB d"edB de5fd<d=Z8	dHddde/deeef dB d"edB de0fd>d?Z9	dHddde:e;B e<B deeef dB d"edB de=fd@dAZ>	dHddde:e;B e<B deeef dB d"edB de=fdBdCZ?de@dB fdDdEZAdIdFdGZB  ZCS )JInputPreprocessorNmodel_configobservability_configmm_registrymm_processor_cachereturnc                    sF   t    || _|| _t|| _|| _|| _|rt | _	d S d | _	d S N)
super__init__r-   r.   r   rendererr/   r0   r   mm_cache_stats)selfr-   r.   r/   r0   	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/vllm/inputs/preprocess.pyr4   7   s   

zInputPreprocessor.__init__c                 C   s   | j jS r2   )r5   	tokenizerr7   r:   r:   r;   r<   H   s   zInputPreprocessor.tokenizerc                 C   s
   | j  S r2   )r5   get_tokenizerr=   r:   r:   r;   r>   L   s   
zInputPreprocessor.get_tokenizerc                 C       | j d u rtd d S | j jS )Nz@Using None for BOS token id because tokenizer is not initialized)r<   loggerwarning_oncebos_token_idr=   r:   r:   r;   get_bos_token_idO      
z"InputPreprocessor.get_bos_token_idc                 C   r?   )Nz@Using None for EOS token id because tokenizer is not initialized)r<   r@   rA   eos_token_idr=   r:   r:   r;   get_eos_token_idX   rD   z"InputPreprocessor.get_eos_token_idc                 C   s>   t | jjdd}|du rtd |  }|du rtd|S )z
        Obtain the decoder start token id employed by an encoder/decoder
        model. Raises an error if it is not available.
        decoder_start_token_idNzaFalling back on <BOS> for decoder start token id because decoder start token id is not available.z+Cannot find decoder start token id or <BOS>)getattrr-   	hf_configr@   rA   rC   RuntimeError)r7   dec_start_token_idr:   r:   r;   get_decoder_start_token_ida   s   
z,InputPreprocessor.get_decoder_start_token_iddecoder_input_idsc                 C   s.   |   }t|dks|d |kr|g| }|S )a  
        Prepares `decoder_input_ids` for generation with encoder-decoder models.

        Based on:
        https://github.com/huggingface/transformers/blob/4037a2b5b1278736e566aec12e169100275545ea/src/transformers/generation/utils.py
        specifically,
        `GenerationMixin._prepare_decoder_input_ids_for_generation()`.

        Arguments:

        * decoder_input_ids: input token ids to preprocess

        Returns:

        * Processed token list
        r   )rL   len)r7   rM   rG   r:   r:   r;   _prepare_decoder_input_idsw   s
   
z,InputPreprocessor._prepare_decoder_input_ids	overridesc                 C   s0   t ttf  }| jjrd|d< |r|| |S )NFadd_special_tokens)dictstrr   r-   is_encoder_decoderupdate)r7   rP   kwargsr:   r:   r;   _get_tokenization_kw   s   
z&InputPreprocessor._get_tokenization_kwprompttokenization_kwargsc                 C   sD   |   }| |}| jj}|r|ddr| }|j|fi |S )zn
        Apply the model's tokenizer to a text prompt, returning the
        corresponding token IDs.
        do_lower_caseF)r>   rW   r-   encoder_configgetlowerencode)r7   rX   rY   r<   r[   r:   r:   r;   _tokenize_prompt   s   	
z"InputPreprocessor._tokenize_promptc                 C   s.   t | ds| jj| j| j| j| jd| _| jS )N_mm_processor)r<   cache)hasattrr/   create_processorr-   r.   r<   r0   r`   r=   r:   r:   r;   _get_mm_processor   s   
z#InputPreprocessor._get_mm_processormm_uuidsmm_datamm_processor_kwargsrf   c                C   sj   |   }|du r
i }|j|}|j|||||d}|d }	tdd t|	D }
|
s3td|	 d|S )z
        Apply the model's multi-modal processor to a multi-modal prompt,
        returning the corresponding token IDs and metadata.
        N)hf_processor_mm_kwargsrY   rf   	mm_hashesc                 s   s    | ]}t |tV  qd S r2   )
isinstancerS   ).0leafr:   r:   r;   	<genexpr>   s    

z8InputPreprocessor._process_multimodal.<locals>.<genexpr>z*mm_hashes must contain only strings, got: z_. This is likely due to an incorrect custom implementation of MultiModalProcessor.apply method.)rd   infoparse_mm_dataapplyallr   
ValueError)r7   rX   rg   rh   rY   rf   mm_processormm_itemsmm_inputrj   contains_only_stringsr:   r:   r;   _process_multimodal   s(   
z%InputPreprocessor._process_multimodalparsed_contentc                 C   sZ   | j jstd|d }|jdkr|jdd}|jdkr td| }t||dd	S )
Nz?You must set `--enable-prompt-embeds` to input `prompt_embeds`.prompt_embeds   r   )dim   z6prompt_embeds must be of shape (seq_len, hidden_size).
cache_salt)rz   r~   )r-   enable_prompt_embedsrs   ndimsqueezecpur*   r\   )r7   ry   rz   r:   r:   r;   _process_embeds   s   


z!InputPreprocessor._process_embedsinputsc                 C   sH   |rd|vs| j d u r|S |d }| j jdkr|| d  S |d | S )N
truncation
max_lengthleft)r<   truncation_side)r7   r   rY   r   r:   r:   r;   _truncate_inputs  s   
z"InputPreprocessor._truncate_inputsc                C   s`   |  |d |}|d }r| j|||dpi ||d}nt|}|d }r.||d< |S )Nprompt_token_idsmulti_modal_datarh   rY   rf   r~   )r   r\   rx   r+   )r7   ry   rY   rf   r   r   r   r~   r:   r:   r;   _process_tokens  s   z!InputPreprocessor._process_tokensc          	      C   sf   |d }| d }r| j||| dpi ||d}n| j||d}t|}| d }r1||d< |S )NrX   r   rh   r   rY   r~   )r\   rx   r_   r+   )	r7   ry   rY   rf   prompt_textr   r   r   r~   r:   r:   r;   _process_text5  s"   zInputPreprocessor._process_textc                C      d S r2   r:   r7   rX   rY   rf   r:   r:   r;   _prompt_to_llm_inputsS     z'InputPreprocessor._prompt_to_llm_inputsc                C   r   r2   r:   r   r:   r:   r;   r   \  r   c                C   r   r2   r:   r   r:   r:   r;   r   e  r   c                C   sL   d|v r	|  |S d|v r| j||dS d|v r | j|||dS t| dS )z
        Extract the singleton inputs from a prompt.

        Arguments:

        * prompt: single encoder or decoder input prompt

        Returns:

        * [`SingletonInputs`][vllm.inputs.data.SingletonInputs] instance
        rz   r   re   rX   r   N)r   r   r   r   r   r:   r:   r;   r   n  s   
c                 C   s4   |d dkr
t d|d dkrd|vrtd|S )Ntypeembeds=Embedding inputs are not supported for encoder-decoder models
multimodalencoder_prompt_token_idszXYou should register an encoder-decoder multi-modal processor for encoder-decoder models.)rs   rJ   r7   r   r:   r:   r;   _validate_enc_inputs  s   z&InputPreprocessor._validate_enc_inputsc                 C   s   |d dkr
t d|S )Nr   r   r   )rs   r   r:   r:   r;   _validate_dec_inputs  s
   z&InputPreprocessor._validate_dec_inputsencoder_inputsdecoder_inputsc                 C   s   |  |}|d u r|}n| |}|d dkr/t|d }td|d |d |d |d d}n|d d	kr=tg d
}|}nt| | |d |d< |d }rU||d< t||dS )Nr   r   r   r   	mm_kwargsrj   mm_placeholders)r   r   r   rj   r   token)r   r~   )encoderdecoder)r   r   r+   r   r   rO   r\   r"   )r7   r   r   
enc_inputs
dec_inputsenc_inputs_newdec_inputs_newr~   r:   r:   r;   _build_enc_dec_inputs  s.   


z'InputPreprocessor._build_enc_dec_inputsc                C   sD   |d }|d }| j | j|||d|du rddS | j||ddS )a[  
        For encoder/decoder models only:
        Process an input prompt into an
        [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
        instance.

        Arguments:

        * prompt: an input prompt

        Returns:

        * [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
          instance
        encoder_promptdecoder_promptr   Nr   )r   r   )r   r   )r7   rX   rY   rf   r   r   r:   r:   r;   _process_encoder_decoder_prompt  s    	z1InputPreprocessor._process_encoder_decoder_promptc                C   s   | j |||dS )a1  
        For decoder-only models:
        Process an input prompt into a
        [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance.

        Arguments:

        * prompt: input prompt

        Returns:

        * [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance
        r   )r   r   r:   r:   r;   _process_decoder_only_prompt  s
   z.InputPreprocessor._process_decoder_only_promptc                C   s0   | j jr| jt|||dS | jt|||dS )Nre   r   )r-   rT   r   r   r   r   r   r:   r:   r;   _preprocess  s   zInputPreprocessor._preprocessc                C   sf   | j |||d}| jr1| jdur1| jjdd}| j jd7  _| j j|j7  _| j j|j7  _|S )zPreprocess the input prompt.re   NT)deltar   )r   r0   r6   
make_statsrequestsqueriestotalhits)r7   rX   rY   rf   resr   r:   r:   r;   
preprocess'  s   zInputPreprocessor.preprocessc                 C   s   | j }|d u r	d S t | _ |S r2   )r6   r   )r7   r6   r:   r:   r;   stat_mm_cache9  s
   zInputPreprocessor.stat_mm_cachec                 C   s.   | j d ur
| j   | jd urd| j_d S d S )NT)r0   clear_cacher6   resetr=   r:   r:   r;   clear_mm_cacheB  s
   


z InputPreprocessor.clear_mm_cacher2   )r1   N)D__name__
__module____qualname__r	   r   r   r
   r   r4   propertyr   r<   r>   intrC   rF   rL   listrO   rR   rS   r   rW   r_   r   rd   r   r   objectr   r   rx   r!   r    r   r   r)   r(   r   r'   r   r   r   r#   r   r   r   r   r   r   r&   r   r   r"   r   r   r   r   r%   r   r   r$   r   r   r   r   r   __classcell__r:   r:   r8   r;   r,   6   s   		




)




$
)
,




	r,   N)<collections.abcr   typingr   r   typing_extensionsr   vllm.configr   r   vllm.loggerr   vllm.multimodalr	   r
   vllm.multimodal.cacher   vllm.multimodal.inputsr   r   r   vllm.multimodal.processingr   vllm.renderersr   vllm.renderers.inputsr   r   r   r   r   r   r    vllm.renderers.inputs.preprocessr   r   vllm.tokenizersr   vllm.utils.jsontreer   vllm.v1.metrics.statsr   datar   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r   r@   r,   r:   r:   r:   r;   <module>   s$   $	@