o
    
۾i                     @   s   d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ eeZded	e
d
edeeef deeef f
ddZG dd dZdS )    )Mapping)ModelConfig
VllmConfig)init_logger)BaseMultiModalProcessor)MultiModalRegistry)set_default_torch_num_threads)compute_mm_encoder_budgetmodel_configmm_registry	processor	mm_countsreturnc                 C   sD   |j j| j|d}|dur|S |j| ||d}dd |d  D S )zx
    Get the maximum number of tokens per data item from each modality based
    on underlying model configuration.
    )seq_lenr   N)r   r   c                 S   s$   i | ]\}}|t d d |D qS )c                 s   s    | ]}|  V  qd S N)get_num_embeds).0item r   R/home/ubuntu/.local/lib/python3.10/site-packages/vllm/multimodal/encoder_budget.py	<genexpr>'   s    z6get_mm_max_toks_per_item.<locals>.<dictcomp>.<genexpr>)sum)r   modalityplaceholdersr   r   r   
<dictcomp>&   s    z,get_mm_max_toks_per_item.<locals>.<dictcomp>mm_placeholders)infoget_mm_max_tokens_per_itemmax_model_lenget_dummy_mm_inputsitems)r
   r   r   r   max_tokens_per_item	mm_inputsr   r   r   get_mm_max_toks_per_item   s   

r#   c                       sr   e Zd ZdZdededdf fddZded	ede	eef fd
dZ
defddZdefddZdddZ  ZS )MultiModalBudgetzDHelper class to calculate budget information for multi-modal models.vllm_configr   r   Nc              	      s  t    |j | _}|j | _}|j| _|j| _t P ||}|j	||d}|| _
| }|d uo6|j|jj}|jj | _fdd|D }	fdd|D }
|	|
B }t|||t|ddW d    n1 sow   Y  |
r~tdt|
 fdd	|D   fd
d	|	D }t| \}}|| _|| _tttf  }tttf  }| D ]\}}| ||\||< ||< q|| _|| _|| _ d S )N)cachec                    s    h | ]}  |d d kr|qS r   getr   r   )	mm_limitsr   r   	<setcomp>H   s
    z,MultiModalBudget.__init__.<locals>.<setcomp>c                    s$   h | ]} r |d d kr|qS r'   r(   r*   )enable_mm_embedsr+   r   r   r,   N   s       )r   zBenable_mm_embeds is True; modalities handled as embedding-only: %sc                       i | ]}| v r| | qS r   r   r*   )all_mm_max_toks_per_itemr   r   r   g   
    z-MultiModalBudget.__init__.<locals>.<dictcomp>c                    r/   r   r   r*   )active_mm_max_toks_per_itemr   r   r   l   r1   )!super__init__r
   scheduler_configr   max_num_seqsmax_num_reqsr    processor_only_cache_from_configcreate_processorr&   get_multimodal_configr-   r   supported_mm_limitsallowed_mm_limitsr+   r#   dictfromkeyslogger	info_oncetupler	   encoder_compute_budgetencoder_cache_sizestrintr    _get_max_itemsmm_max_toks_per_itemmm_max_items_per_promptmm_max_items_per_batch)selfr%   r   r
   r5   r&   r   	mm_configr;   tower_modalitiesembed_only_modalitiesactive_modalitiestower_mm_max_toks_per_itemrB   rC   rH   rI   r   max_toks_per_item	__class__)r2   r0   r-   r+   r   r4   /   sl   



!
	


zMultiModalBudget.__init__r   r!   c                 C   s   |dkrdS |    }dkrdS || }| j| }tdt|| j| }| j}| j}|js5t||j| }|| }	tdt||	}
||
fS )Nr   )r   r   r.   )	get_encoder_budgetr+   maxminr   r5   r7   enable_chunked_prefillmax_num_batched_tokens)rJ   r   r!   encoder_budgetmax_encoder_items_per_batchmm_limitmax_items_per_promptr5   r7   max_decoder_items_per_batchmax_items_per_batchr   r   r   rF      s.   
zMultiModalBudget._get_max_itemsc                 C   s"   | j }t| dd d\}}|S )Nc                 S   s   | d S )Nr.   r   )xr   r   r   <lambda>   s    z?MultiModalBudget.get_modality_with_max_tokens.<locals>.<lambda>)key)rG   rT   r    )rJ   rG   r   _r   r   r   get_modality_with_max_tokens   s   z-MultiModalBudget.get_modality_with_max_tokensc                 C   s   t | j| jS r   )rU   rB   rC   rJ   r   r   r   rS      s   z#MultiModalBudget.get_encoder_budgetc                 C   s   | j d ur| j   d S d S r   )r&   clear_cacherc   r   r   r   reset_cache   s   
zMultiModalBudget.reset_cache)r   N)__name__
__module____qualname____doc__r   r   r4   rD   rE   rA   rF   rb   rS   re   __classcell__r   r   rQ   r   r$   ,   s&    \

*r$   N)collections.abcr   vllm.configr   r   vllm.loggerr   vllm.multimodal.processingr   vllm.multimodal.registryr   vllm.utils.torch_utilsr   "vllm.v1.core.encoder_cache_managerr	   rf   r?   rD   rE   r#   r$   r   r   r   r   <module>   s(   


