o
    پi$                     @   s   d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ er,d dlmZmZ G dd de	Z			dd	ed
edededejdejdeej dddee dededeej fddZdS )    )TYPE_CHECKINGCallableListOptionalN)two_batch_overlap)AttentionBackend)	SpecInput)ForwardBatchForwardModec                       sJ  e Zd Zdedee f fddZedeg ef fddZd.d
dZ	de
de
fddZde
de
dejdejdeej dddee fddZde
dejdejde
deej dddee deej fddZ			d/dede
dejdejdeej dddee d e
d!e
d"eej fd#d$Zd%d& Zd'd( Zd)d* Zd+e
dd	fd,d-Z  ZS )0TboAttnBackendprimarychildrenc                    s   t    || _|| _d S N)super__init__r   r   )selfr   r   	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/attention/tbo_backend.pyr      s   

zTboAttnBackend.__init__creatorc                    s    |    fddt dD dS )Nc                    s   g | ]}  qS r   r   ).0_r   r   r   
<listcomp>   s    z+TboAttnBackend.init_new.<locals>.<listcomp>   )r   r   )range)clsr   r   r   r   init_new   s   zTboAttnBackend.init_newforward_batchr	   c                 C   sR   | j j|d |jd ur%t| j|jddD ]\}}|jdkr$|j|d qd S d S )N)r   T)strictr   )r   init_forward_metadatatbo_childrenzipr   
batch_size)r   r   childforward_batch_childr   r   r   r!      s   


z$TboAttnBackend.init_forward_metadatamax_bsmax_num_tokensc                 C   s.   | j j||d | jD ]	}|j||d qd S )N)r'   r(   )r   init_cuda_graph_stater   )r   r'   r(   itemr   r   r   r)   #   s   
z$TboAttnBackend.init_cuda_graph_statebs
num_tokensreq_pool_indicesseq_lensencoder_lensforward_moder
   	spec_infoc              
   C   s8   | j j|||||||d | jd|||||||d d S )N)r+   r,   r-   r.   r/   r0   r1   (init_forward_metadata_capture_cuda_graph)fn_namer+   r-   r.   r/   r0   r1   capture_num_tokens)r   r2   *_init_forward_metadata_cuda_graph_children)r   r+   r,   r-   r.   r/   r0   r1   r   r   r   r2   )   s&   


z7TboAttnBackend.init_forward_metadata_capture_cuda_graphseq_lens_sumseq_lens_cpuc	           	      C   s<   | j j||||||||d | jd||||||||d	 d S )N)r+   r-   r.   r6   r/   r0   r1   r7   'init_forward_metadata_replay_cuda_graph)	r3   r+   r-   r.   r/   r0   r1   replay_seq_lens_sumreplay_seq_lens_cpu)r   r8   r5   )	r   r+   r-   r.   r6   r/   r0   r1   r7   r   r   r   r8   H   s*   
z6TboAttnBackend.init_forward_metadata_replay_cuda_graphNr3   r4   r9   r:   c                 C   s  t j||d}|dkr||| ksJ d|| }t j|||d\}}|}|| }|}|| }|dkr7|dksGJ d|d|d|d	|t|||||||||	|
d

}td|td |d|}td|t|d d|}| j\}}t||di | t||di | d S )Nr0   r1   r2   zVFor target-verify or decode mode, num_tokens should be equal to token_num_per_seq * bs)r0   cuda_graph_num_tokensr1   r   znum_tokens_child_left=z num_tokens_child_right=z forward_mode=z num_tokens=)
r3   r+   r-   r.   r/   r0   r1   r4   r9   r:   )	output_bs	seq_slicer   )r   get_token_num_per_seq+compute_split_indices_for_cuda_graph_replaydict'_init_forward_metadata_cuda_graph_splitslicer   getattr)r   r3   r+   r-   r.   r/   r0   r1   r4   r9   r:   token_num_per_seqr,   tbo_split_seq_indextbo_split_token_indexnum_tokens_child_leftnum_tokens_child_rightbs_child_leftbs_child_rightcommon_pre_split_args	args_left
args_right
child_leftchild_rightr   r   r   r5   j   s`   
z9TboAttnBackend._init_forward_metadata_cuda_graph_childrenc                 C   s*   | j  }| jD ]
}|| ksJ q|S r   )r   !get_cuda_graph_seq_len_fill_valuer   )r   ansr%   r   r   r   rQ      s   

z0TboAttnBackend.get_cuda_graph_seq_len_fill_valuec                 O      | j j|i |S r   )r   forward_extendr   argskwargsr   r   r   rT         zTboAttnBackend.forward_extendc                 O   rS   r   )r   forward_decoderU   r   r   r   rY      rX   zTboAttnBackend.forward_decodelayer_idc                 C   s   | j ||S r   )r   get_indexer_metadata)r   rZ   r   r   r   r   r[      s   z#TboAttnBackend.get_indexer_metadata)r   r	   NNN)__name__
__module____qualname__r   r   r   classmethodr   r   r!   intr)   torchTensorr   r   r2   r8   strr5   rQ   rT   rY   r[   __classcell__r   r   r   r   r      s    
	
	
-	
Fr   r3   r>   r=   r+   r-   r.   r/   r0   r
   r1   r4   r9   r:   c                 C   s  t j||d}|d u sJ d|d urDt j||jd ur|jnd|jd ur'|jn||jd ur2|j| nd|jd ur=|j| n|| d}nd }t||| || |d |d}| dkrn|	|| ksbJ d|t|| d |S | d	kr|| }|t|  |d
 |S t	)Nr;   z!encoder_lens is not supported yetr   )r1   start_seq_indexend_seq_indexstart_token_indexend_token_index)r+   r-   r.   r0   r/   r1   r2   zPOnly support num_tokens==bs * token_num_per_seq for target-verify or decode mode)r,   r8   )r6   r7   )
r   r?   split_spec_infostartstoprA   updatesumr*   NotImplementedError)r3   r>   r=   r+   r-   r.   r/   r0   r1   r4   r9   r:   rE   output_spec_inforR   output_seq_lens_cpur   r   r   rB      sZ   




	rB   r\   )typingr   r   r   r   rb   sglang.srt.batch_overlapr   -sglang.srt.layers.attention.base_attn_backendr    sglang.srt.speculative.spec_infor   ,sglang.srt.model_executor.forward_batch_infor	   r
   r   rd   rC   ra   rc   rB   r   r   r   r   <module>   sJ     @	
