o
    -i<                     @   s  d dl mZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d	d
lmZmZmZmZmZmZmZ d	dlm Z  d	dl!m"Z"m#Z#m$Z$m%Z% errd dl&m'Z'mZm(Z( d dl)m*Z* ee+Z,ede-d dZ.ede$dZ/ede$ddZ0G dd dee0 Z1G dd dee/ Z2G dd dee/ Z3eddG dd de	e/ Z4G dd  d Z5d!S )"    )Mapping)	dataclass)Lock)TYPE_CHECKINGGenericLiteralProtocolTypeVarcast)BaseDummyOptions)ObservabilityConfig)init_logger)TokenizerLikecached_tokenizer_from_config   )BaseMultiModalProcessorCacheBaseMultiModalReceiverCacheMultiModalProcessorOnlyCacheMultiModalProcessorSenderCacheMultiModalReceiverCacheShmObjectStoreReceiverCacheShmObjectStoreSenderCache)MultiModalInputs)BaseDummyInputsBuilderBaseMultiModalProcessorBaseProcessingInfoInputProcessingContext)ModelConfigr   
VllmConfig)SupportsMultiModalNr   )bound_I_I_coT)r!   	covariantc                   @   s"   e Zd ZdZdedefddZdS )ProcessingInfoFactory
    Constructs a
    [`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor]
    instance from the context.
    ctxreturnc                 C      d S N )selfr'   r+   r+   U/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/multimodal/registry.py__call__0   s   zProcessingInfoFactory.__call__N)__name__
__module____qualname____doc__r   r#   r.   r+   r+   r+   r-   r%   )   s    r%   c                   @   s&   e Zd ZdZdedee fddZdS )DummyInputsBuilderFactoryz
    Constructs a
    [`BaseDummyInputsBuilder`][vllm.multimodal.processing.BaseDummyInputsBuilder]
    instance from the context.
    infor(   c                 C   r)   r*   r+   )r,   r4   r+   r+   r-   r.   =   s    z"DummyInputsBuilderFactory.__call__N)r/   r0   r1   r2   r"   r   r.   r+   r+   r+   r-   r3   6   s    r3   c                
   @   s<   e Zd ZdZdddedee dedB dee fdd	ZdS )
MultiModalProcessorFactoryr&   Ncacher4   dummy_inputsr7   r(   c                C   r)   r*   r+   )r,   r4   r8   r7   r+   r+   r-   r.   G   s   z#MultiModalProcessorFactory.__call__)	r/   r0   r1   r2   r"   r   r   r   r.   r+   r+   r+   r-   r5   @   s    r5   )frozenc                   @   sN   e Zd ZU ee ed< ee ed< ee ed< dddede	dB fdd	Z
dS )
_ProcessorFactoriesr4   	processorr8   Nr6   r'   r7   c                C   s$   |  |}| |}| j|||dS )Nr6   r4   r8   r;   )r,   r'   r7   r4   dummy_inputs_builderr+   r+   r-   build_processorV   s   

z#_ProcessorFactories.build_processor)r/   r0   r1   r%   r"   __annotations__r5   r3   r   r   r>   r+   r+   r+   r-   r:   P   s   
 r:   c                   @   s0  e Zd ZdZdddeeef dB fddZdddefdd	Z	dddd
ddde
dB deeef dB dedB deeef f
ddZdddddde
dB dedB deeef fddZdee dee dee fddZd9ddZ		d:dddddedB defddZ	d;dddddddedB defd d!Z	d;ddd"dddddedB de
dB dee f
d#d$Z	d;dddd%ddd&eeef dB de
dB dedB dedB defd'd(Zdddefd)d*Zd+d,ded- fd.d/Z d+d,de
dB fd0d1Z!d+d,de"dB fd2d3Z#d+d,de$dB fd4d5Z%d+d,d6e&de$dB fd7d8Z'dS )<MultiModalRegistryzL
    A registry that dispatches data processing according to the model.
    model_configr   r(   Nc                    s6    j sdS  fdd j jD }t|dkr|S dS )z
        Extract multimodal dummy options from model config.

        Returns None if no configurable options are found, otherwise returns
        a mapping of modality names to their dummy options.
        Nc                    s&   i | ]} j | d ur|qS r*   )multimodal_configget_dummy_options).0mrA   optr+   r-   
<dictcomp>s   s
    z:MultiModalRegistry._extract_mm_options.<locals>.<dictcomp>r   )rB   limit_per_promptlen)r,   rA   
mm_optionsr+   rF   r-   _extract_mm_optionsf   s   
z&MultiModalRegistry._extract_mm_optionsc                    sP   |j sdS | j|dd}| }|  t fdd|D r&td dS dS )z
        Checks if the model supports multimodal inputs.
        Returns True if the model is multimodal with any non-zero supported
        modalities, otherwise returns False, effectively running in
        text-only mode.
        FN	tokenizerc                 3   s    | ]
}  |d kV  qdS )r   N)get_limit_per_prompt)rD   modality	mm_configr+   r-   	<genexpr>   s
    
z@MultiModalRegistry.supports_multimodal_inputs.<locals>.<genexpr>zcAll limits of multimodal modalities supported by the model are set to 0, running in text-only mode.T)is_multimodal_model_create_processing_infoget_supported_mm_limitsget_multimodal_configalllogger	info_once)r,   rA   r4   supported_modalitiesr+   rQ   r-   supports_multimodal_inputs{   s   z-MultiModalRegistry.supports_multimodal_inputs)r7   profiler_limitsobservability_configr7   r]   r^   c                   s   |j si S | j|||d}|du r|j}dd | D  |jj|j d}|dur5 fdd| D S | j| |d}dd |d	  D S )
z
        Get the maximum number of tokens per data item from each modality based
        on underlying model configuration.
        r6   Nc                 S   s   i | ]\}}|d kr|dqS )r   r   r+   )rD   rP   limitr+   r+   r-   rH      s    zJMultiModalRegistry.get_max_tokens_per_item_by_modality.<locals>.<dictcomp>)seq_len	mm_countsc                    s&   i | ]\}}  |d d kr||qS )r   )get)rD   rP   
max_tokensra   r+   r-   rH      s
    )ra   r;   c                 S   s$   i | ]\}}|t d d |D qS )c                 s   s    | ]}|j V  qd S r*   )get_num_embeds)rD   itemr+   r+   r-   rS      s    zTMultiModalRegistry.get_max_tokens_per_item_by_modality.<locals>.<dictcomp>.<genexpr>)sum)rD   rP   placeholdersr+   r+   r-   rH      s    mm_placeholders)rT   create_processorallowed_mm_limitsitemsr4   get_mm_max_tokens_per_itemmax_model_lenget_dummy_mm_inputs)r,   rA   r7   r]   r^   r;   max_tokens_per_item	mm_inputsr+   rd   r-   #get_max_tokens_per_item_by_modality   s4   

z6MultiModalRegistry.get_max_tokens_per_item_by_modality)r7   r^   c                C   s    |j si S | j|||d}|jS )z
        Get the maximum number of multi-modal input instances for each modality
        that are allowed per prompt for a model class.
        r6   )rT   rj   rk   )r,   rA   r7   r^   r;   r+   r+   r-   get_mm_limits_per_prompt   s   z+MultiModalRegistry.get_mm_limits_per_promptr;   r4   r8   c                   s    dt dt f fdd}|S )a/  
        Register a multi-modal processor to a model class. The processor
        is constructed lazily, hence a factory method should be passed.

        When the model receives multi-modal data, the provided function is
        invoked to transform the data into a dictionary of model inputs.
        	model_clsr(   c                    s,   d| j v rtd|  t d| _| S )N_processor_factoryzfModel class %s already has a multi-modal processor registered to %s. It is overwritten by the new one.r<   )__dict__rY   warningr:   ru   )rt   r8   r4   r;   r,   r+   r-   wrapper   s   
z6MultiModalRegistry.register_processor.<locals>.wrapper)r    )r,   r;   r4   r8   ry   r+   rx   r-   register_processor   s   z%MultiModalRegistry.register_processorr   c                 C   s0   ddl m} ||\}}t|dsJ td|S )Nr   )get_model_architectureru   r   ) vllm.model_executor.model_loaderr{   hasattrr
   )r,   rA   r{   rt   _r+   r+   r-   _get_model_cls   s   
z!MultiModalRegistry._get_model_clszObservabilityConfig | NonerN   c                 C   s   |d u rt |}t|||dS )N)r^   )r   r   )r,   rA   r^   rN   r+   r+   r-   _create_processing_ctx  s
   z)MultiModalRegistry._create_processing_ctxrM   c                C   s(   |  |}|j}| |||}||S r*   )r   ru   r   r4   )r,   rA   r^   rN   rt   	factoriesr'   r+   r+   r-   rU     s   

z*MultiModalRegistry._create_processing_info)rN   r7   c                C   sB   |j st|j d| |}|j}| |||}|j||dS )zT
        Create a multi-modal processor for a specific model and tokenizer.
        z is not a multimodal modelr6   )rT   
ValueErrormodelr   ru   r   r>   )r,   rA   r^   rN   r7   rt   r   r'   r+   r+   r-   rj     s   
z#MultiModalRegistry.create_processor)r7   r^   r;   ra   c                C   s   |j }|du r| j|||d}|du r|j}|jj||| |d}|j|j|j|j	|j
d}|d }	t|	}
|
|k rE|	dg||
   |S )z
        Create dummy data for profiling the memory usage of a model.

        The model is identified by `model_config`.
        Nr6   )r`   ra   rK   )promptmm_datahf_processor_mm_kwargstokenization_kwargsprompt_token_idsr   )rn   rj   rk   r8   get_dummy_processor_inputsrL   applyr   r   r   r   rJ   extend)r,   rA   ra   r7   r^   r;   r`   processor_inputsrq   r   	total_lenr+   r+   r-   ro   2  s.   z&MultiModalRegistry.get_dummy_mm_inputsc                 C   sD   |j sdS | |}|sdS t|dksJ dtt|}|| S )zY
        Get the maximum length of the encoder input for encoder-decoder models.
        r   r   zdEncoder-decoder models are expected to implement the multimodal interface with at most one modality.)is_encoder_decoderrr   rJ   nextiter)r,   rA   rc   first_modalityr+   r+   r-   get_encdec_max_encoder_len\  s   
z-MultiModalRegistry.get_encdec_max_encoder_lenvllm_configr   )Nprocessor_onlylrushmc                 C   s`   |j }| |s
d S | }|jdkrd S |j}|jdko$|jdkp$|j}|s)dS | }|jS )Nr   r   r   )	rA   r\   rW   mm_processor_cache_gbparallel_config_api_process_countdata_parallel_sizedata_parallel_external_lbmm_processor_cache_type)r,   r   rA   rR   r   is_ipc_supportedr+   r+   r-   _get_cache_typep  s   



z"MultiModalRegistry._get_cache_typec                 C   sX   |  |}|du rdS |dkrt|jS |dkrt|jS |dkr%t|S td|)z4Return a `BaseMultiModalProcessorCache`, if enabled.Nr   r   r   Unknown cache type: )r   r   rA   r   r   r   r,   r   
cache_typer+   r+   r-   processor_cache_from_config  s   


z.MultiModalRegistry.processor_cache_from_configc                 C   s    |  |}|du rdS t|jS )z4Return a `MultiModalProcessorOnlyCache`, if enabled.N)r   r   rA   r   r+   r+   r-    processor_only_cache_from_config  s   

z3MultiModalRegistry.processor_only_cache_from_configc                 C   s6   |  |}|dv rdS |dkrt|jS td|)z>Return a `BaseMultiModalReceiverCache` for the engine process.)Nr   r   Nr   r   )r   r   rA   r   r   r+   r+   r-   !engine_receiver_cache_from_config  s   

z4MultiModalRegistry.engine_receiver_cache_from_configshared_worker_lockc                 C   s6   |  |}|dv rdS |dkrt||S td|)z>Return a `BaseMultiModalReceiverCache` for the worker process.)Nr   r   Nr   r   )r   r   r   )r,   r   r   r   r+   r+   r-   !worker_receiver_cache_from_config  s   

z4MultiModalRegistry.worker_receiver_cache_from_config)rA   r   r(   r   )NNr*   )(r/   r0   r1   r2   r   strr   rL   boolr\   r   intr   rr   rs   r5   r"   r%   r3   rz   r   r   r   r   r   rU   r   rj   r   ro   r   r   r   r   r   r   r   r   LockTyper   r+   r+   r+   r-   r@   a   s    
 

4



"



*



r@   N)6collections.abcr   dataclassesr   multiprocessing.synchronizer   r   typingr   r   r   r   r	   r
   vllm.config.multimodalr   vllm.config.observabilityr   vllm.loggerr   vllm.tokenizersr   r   r7   r   r   r   r   r   r   r   inputsr   
processingr   r   r   r   vllm.configr   r   %vllm.model_executor.models.interfacesr   r/   rY   typer    r"   r#   r%   r3   r5   r:   r@   r+   r+   r+   r-   <module>   s0    $	
