o
    -i                     @   s   d dl mZmZ d dlmZ d dlmZmZ d dlm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d	d
lmZ ddlmZ e
dedZeeZeG dd dZG dd dee	e Z dS )    )ABCabstractmethod)Mapping)	dataclassfield)GenericTypeVarN)Image)AudioDummyOptionsBaseDummyOptionsImageDummyOptionsVideoDummyOptions)init_logger   )MultiModalDataDict   )BaseProcessingInfo_I)boundc                   @   s^   e Zd ZU dZeee B ed< eed< e	e
dZeeef ed< e	e
dZeeef ed< dS )ProcessorInputszq
    Represents the keyword arguments to
    [`vllm.multimodal.processing.BaseMultiModalProcessor.apply`][].
    promptmm_data)default_factoryhf_processor_mm_kwargstokenization_kwargsN)__name__
__module____qualname____doc__strlistint__annotations__r   r   dictr   r   objectr    r%   r%   d/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/multimodal/processing/dummy_inputs.pyr      s   
 r   c                       s:  e Zd ZdZdeddf fddZedeee	f defdd	Z
e	dd
e	deee	f deeef dB defddZ	dd
e	deee	f deeef dB defddZddde	de	dedB deej fddZddde	de	de	dedB deej f
ddZddde	de	de	de	dedB deej fddZ  ZS ) BaseDummyInputsBuilderz_
    Abstract base class that constructs the dummy data to profile
    multi-modal models.
    inforeturnNc                    s   t    || _d S N)super__init__r(   )selfr(   	__class__r%   r&   r,   /   s   

zBaseDummyInputsBuilder.__init__	mm_countsc                 C      t )zD
        Build the text input corresponding to `mm_counts`.
        NotImplementedError)r-   r0   r%   r%   r&   get_dummy_text4   s   z%BaseDummyInputsBuilder.get_dummy_textseq_len
mm_optionsc                 C   r1   )a  
        Build the multimodal input which, after processing, results in
        the maximum possible number of placeholder tokens.

        Args:
            seq_len: Sequence length
            mm_counts: Count of items per modality
            mm_options: Configurable options per modality (optional).
                       If None, use model defaults for backward compatibility.
                       If provided, models can use these to customize dummy
                       data generation.
        r2   )r-   r5   r0   r6   r%   r%   r&   get_dummy_mm_data;   s   z(BaseDummyInputsBuilder.get_dummy_mm_datac                 C   s.   |  |}| |||}ddi}t|||dS )a,  
        Build the input which, after processing, results in
        the maximum possible number of placeholder tokens.

        Args:
            seq_len: Sequence length
            mm_counts: Count of items per modality
            mm_options: Configurable options per modality (optional)
        
truncationF)r   r   r   )r4   r7   r   )r-   r5   r0   r6   
dummy_textdummy_mm_datar   r%   r%   r&   get_dummy_processor_inputsP   s   
z1BaseDummyInputsBuilder.get_dummy_processor_inputs)	overrideslength
num_audiosr<   c                C   sR   |dkrg S |r|j r|j |krtd|j | t||j }t|f}|g| S )Nr   zOaudio.length override (%d) exceeds model's maximum length (%d), will be ignored)r=   loggerwarningminnpzeros)r-   r=   r>   r<   audior%   r%   r&   _get_dummy_audiosl   s   


z(BaseDummyInputsBuilder._get_dummy_audioswidthheight
num_imagesc                C   s   |dkrg S |r4|j r|j |krtd|j | t||j }|jr4|j|kr.td|j| t||j}tjd||fdd}|g| S )Nr   zMimage.width override (%d) exceeds model's maximum width (%d), will be ignoredzOimage.height override (%d) exceeds model's maximum height (%d), will be ignoredRGB   )color)rF   r?   r@   rA   rG   r	   new)r-   rF   rG   rH   r<   imager%   r%   r&   _get_dummy_images   s*   


z(BaseDummyInputsBuilder._get_dummy_images
num_frames
num_videosc                C   s   |dkrg S |rJ|j r|j |krtd|j | t||j }|jr4|j|kr.td|j| t||j}|jrJ|j|krDtd|j| t||j}tj|||dfdtjd}|g| S )Nr   z]video.num_frames override (%d) exceeds model's maximum number of frames (%d), will be ignoredzMvideo.width override (%d) exceeds model's maximum width (%d), will be ignoredzOvideo.height override (%d) exceeds model's maximum height (%d), will be ignored   rJ   )dtype)	rO   r?   r@   rA   rF   rG   rB   fulluint8)r-   rF   rG   rO   rP   r<   videor%   r%   r&   _get_dummy_videos   s:   	



z(BaseDummyInputsBuilder._get_dummy_videosr*   )r   r   r   r   r   r,   r   r   r   r!   r4   r   r   r7   r   r;   r
   r    nptNDArrayrE   r   r	   rN   r   rV   __classcell__r%   r%   r.   r&   r'   )   s|    


!

'r'   )!abcr   r   collections.abcr   dataclassesr   r   typingr   r   numpyrB   numpy.typingrW   PILr	   vllm.config.multimodalr
   r   r   r   vllm.loggerr   inputsr   contextr   r   r   r?   r   r'   r%   r%   r%   r&   <module>   s    