o
    پi	                     @   sJ   d dl mZ d dlZd dlmZ d dlmZ eddG dd deZdS )	    )	dataclassN)ForwardMetadata)ForwardBatchT)kw_onlyc                   @   s   e Zd ZU eed< eed< eed< eed< ejed< ejed< edejdejd	ed
ejdd f
ddZe	dejdejde
dd fddZdS )BailingLinearMetadatanum_prefillsnum_prefill_tokensnum_decodes
batch_sizehas_initial_states	q_lengthsquery_start_locmamba_cache_indicesbsseq_lensreturnc              
   C   s(   t || ||jd ddt||  dS )zTThis path is run during CUDA graph capture, i.e. decode only, so `num_prefills` is 0r   )r
   r   r   r	   r   r   r   r   )r   shapetorch	ones_likediffr   r   r   r    r   f/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/attention/linear/linear_metadata.pyprepare_decode   s   z$BailingLinearMetadata.prepare_decodeforward_batchc           	   
   C   s   |j du r| j|||j|jdS t|j}|j }t|j| }|j}|dus(J |dk}|d|d  }t|j||||||| dS )zEThis path cannot run with CUDA graph, as it contains extend requests.Nr   r      )r
   r   r   r   r   r	   r   r   )	extend_num_tokensr   r
   r   lenextend_seq_lensextend_prefix_lensr   r   )	clsr   r   r   r   r   r	   context_lens_tensorr   r   r   r   prepare_mixed%   s0   

z#BailingLinearMetadata.prepare_mixedN)__name__
__module____qualname__int__annotations__r   Tensorstaticmethodr   classmethodr   r"   r   r   r   r   r   	   s:   
 

r   )dataclassesr   r   1sglang.srt.layers.attention.mamba.mamba2_metadatar   ,sglang.srt.model_executor.forward_batch_infor   r   r   r   r   r   <module>   s    