o
    پiy                     @   s  d dl Z d dlZd dlmZmZ d dl mZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d	d
lmZmZ e jddG dd de
Ze jddG dd deZeddG dd deZedddG dd dZG dd dejjeZdS )    N)ABCabstractmethod)	dataclass)PretrainedConfig)MultimodalDataItem)EmbeddingResult)BaseMultimodalProcessor)logger   )compute_retention_mask%replace_offsets_with_tokens_per_frameT)kw_onlyc                   @   s$   e Zd ZU eeeeef  ed< dS )EVSDataItem	thw_gridsN)__name__
__module____qualname__listtupleint__annotations__ r   r   X/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/multimodal/evs/evs_module.pyr       s   
 r   c                   @   s    e Zd ZU ejed< dd ZdS )VideoEVSDataItempre_chunked_input_idsc                 C   s   |   sJ d S )N)is_videoselfr   r   r   __post_init__)   s   zVideoEVSDataItem.__post_init__N)r   r   r   torchTensorr   r   r   r   r   r   r   %   s   
 
r   c                   @   sd   e Zd ZU dZee ed< dejdee	eef  de
dedede	ejee	eef  f fd	d
ZdS )EVSEmbeddingResultam  
    Embedding result that includes per-frame token counts after EVS pruning.

    After pruning, each frame retains a different number of tokens based on its
    dissimilarity to the previous frame. This metadata is needed downstream to
    adjust the input_ids placeholder spans to match the actual embedding sizes.

    Attributes:
        embedding: The pruned video embeddings tensor.
        num_tokens_per_frame: Actual retained token count for each frame.
            For example, [256, 180, 195, 256] means frame 0 kept all 256 tokens
            (first frame is never pruned), while frames 1-2 were pruned.
    num_tokens_per_frame	input_idsoffsetsitemextend_prefix_lenextend_seq_lenreturnc          	      C   s   t ||ksJ t|tsJ dt| |j}|j}t|| j||d}tj	||j
|jd}t||}||||  }t ||ksOJ dt | d| ||fS )NzExpected VideoEVSDataItem, got )r   r"   frame_offsets_inclusivefiller_token_id)dtypedevicez3Input ids length changed after redistribution, got z != )len
isinstancer   typer   	pad_valuer   r"   r   tensorr+   r,   r   get_mm_items_offset)	r   r#   r$   r%   r&   r'   r   r*   input_ids_listr   r   r   'redistribute_pruned_frames_placeholders?   s2   	
z:EVSEmbeddingResult.redistribute_pruned_frames_placeholdersN)r   r   r   __doc__r   r   r   r   r    r   r   r4   r   r   r   r   r!   -   s    
 r!   )frozenr   c                   @   s*   e Zd ZU eed< dZeed< dd ZdS )	EVSConfigvideo_pruning_rater
   spatial_merge_sizec                 C   s(   | j dkr
| j dk sJ d| j d S )N        g      ?zLVideo pruning rate must be between 0.0 and 1.0, got self.video_pruning_rate=)r8   r   r   r   r   r   f   s   
zEVSConfig.__post_init__N)r   r   r   floatr   r9   r   r   r   r   r   r   r7   a   s   
 r7   c                       s   e Zd ZdZeededefddZede	e
 dejfddZded	ejd
ejddf fddZde	e
 defddZ  ZS )EVSa^  
    Base class for video models that support EVS pruning.

    Subclass this alongside your model class and implement the static `create_evs_config`.
    On initialization, if video_pruning_rate > 0, this mixin replaces the model's
    get_video_feature() method with a wrapper that applies EVS pruning.

    Example: See `NemotronH_Nano_VL_V2`
    configr(   c                 C      t zJExtract EVS parameters from model config. Must be implemented by subclass.NotImplementedError)r=   r   r   r   create_evs_configw   s   zEVS.create_evs_configitemsc                 C   r>   r?   r@   )r   rC   r   r   r   get_video_feature}   s   zEVS.get_video_featureargskwargsNc                    sv   t    | jj}| j| _| || _| jjdk| _	| j	r0t
d| d| j d | j| _d S t
d| d d S )Nr:   z[EVS] enabled for z []z[EVS] requested on model z) but is disabled for pruning_rate == 0.0.)super__init__	__class__r   rD   original_get_video_featurerB   
evs_configr8   evs_enabledr	   info	evs_video)r   r=   rE   rF   
model_namerJ   r   r   rI      s   

zEVS.__init__c                    s@  t d| jj d| jd t|dksJ dt| |d }t|ts.J d| | jj}| jj	 | 
|g}|jdkrG|dd}|jd	ksQJ |jg }g } fd
d|jD }t|||jddD ]*\}}	t||	 |d}
||
 }|| |	d }|
|djdd }|| qjt|}t||dS )a  
        Apply EVS pruning to video embeddings.

        Args:
            items: List containing a single VideoEVSDataItem with video features.

        Returns:
            EVSEmbeddingResult with pruned embeddings and actual token counts per frame.
        z[EVS] beginning for model z [evs_config=self.evs_config=rG   r
   zExpected 1 item, got r   z3Expected VideoEVSDataItem with modality VIDEO, got       c                    s&   g | ]\}}}|| |  d   qS )rS   r   ).0thwmerger   r   
<listcomp>   s   & z!EVS.evs_video.<locals>.<listcomp>T)strict)video_size_thwr9   q)dim)	embeddingr"   )r	   debugrJ   r   rL   r-   r.   r   r8   r9   rK   ndimflattenr   zipsplitr   appendreshapesumtolistextendr   catr!   )r   rC   r%   r]   videos_featuresfinal_embeddingsr"   sizessingle_videor\   retention_mask	preserved
num_framestokens_per_framefinal_embeddings_tensorr   rX   r   rO      sR   



zEVS.evs_video)r   r   r   r5   staticmethodr   r   r7   rB   r   r   r   r    rD   typingAnyrI   r!   rO   __classcell__r   r   rQ   r   r<   l   s"    
r<   )dataclassesrv   abcr   r   r   r   transformersr   "sglang.srt.managers.schedule_batchr   %sglang.srt.mem_cache.multimodal_cacher   /sglang.srt.multimodal.processors.base_processorr   sglang.utilsr	   evs_corer   r   r   r   r!   r7   nnModuler<   r   r   r   r   <module>   s(   


3
