o
    پiN                     @   sF  d dl Z d dlmZ d dlmZmZ d dlZd dlmZm	Z	m
Z
 e Ze	 Ze
 Zes-er3d dlmZ deej deej deej d	efd
dZG dd deZejddfdejdeej dejdejdejdedededededeej deej fddZ	d'dejdejdejd ejd!ejd"ejd#ejd$ejdefd%d&ZdS )(    N)IntEnum)ListOptional)is_cudais_hipis_npu)build_tree_kernel_efficient
score_list
token_listparents_listnum_draft_tokenc           
      C   s   t j| ddd} t j|dd}t j| |d dd}|j}t |j}t j||dd}t|dkr>t j|d d dd}n|d j	d }	t j
|	d|d jd}|||fS )N   dim)indexr   r   device)torchcatflattentopkindicessortvaluesgatherlenshapeemptyr   )
r	   r
   r   r   ss_token_list
top_scorestop_scores_indexdraft_tokensparent_list
batch_size r%   V/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/speculative/eagle_utils.pyorganize_draft_results   s   
r'   c                   @   s   e Zd ZdZdZdZdS )TreeMaskModer   r      N)__name__
__module____qualname__	FULL_MASK	QLEN_ONLYQLEN_ONLY_BITPACKINGr%   r%   r%   r&   r(   )   s    r(   verified_idr#   r!   r"   seq_lensseq_lens_sumr   
spec_stepsnum_verify_tokenstree_mask_modetree_mask_bufposition_bufc                 C   s  t j| d|fdd }| }|j}|
d urC|
}|	tjkr&|d ny|	tj	kr1|d nn|	tj
kr<|d nctd|	|	tjkrXt j|| | fdt j|d}nG|	tj	krt jt jt jg}ttt|d d }t j|| f|| |d}n|	tj
krt j|| || |  fd|d	}ntd|	t jd
||fd|t jd}|\}}}|d ur|}nt j|| f|t jd}trt jj|jt jd|||||||||||	 nt||||||||||||	 ||||||fS )Nr   r   Tr   z"Invalid tree mask: tree_mask_mode=)dtyper         r      r   )r   r8   )r8   )r   r   	unsqueezer   numelr   r(   r.   fill_r/   r-   NotImplementedErrorfullbooluint8uint16uint32intmathceillog2zeroslongr   _is_npuopsnpur   toint64sgl_build_tree_kernel_efficient)r0   r#   r!   r"   r1   r2   r   r3   r4   r5   r6   r7   bsr   	tree_maskpacked_dtypespacked_dtype_idxretrive_bufretrive_indexretrive_next_tokenretrive_next_sibling	positionsr%   r%   r&   r   /   s   






	
r   r   predictsaccept_indexaccept_token_num
candidatesrV   rW   rX   target_predictc	           
   
   C   s`   t strddlm}	 |	| |||||||d ntr+ddlm}	 |	| |||||||d | ||fS )Nr   )verify_tree_greedy)rZ   r[   r\   r]   rV   rW   rX   r^   )_is_cuda_is_hip
sgl_kernelr_   rK   (sgl_kernel_npu.sample.verify_tree_greedy)
rZ   r[   r\   r]   rV   rW   rX   r^   r   r_   r%   r%   r&   verify_tree_greedy_func   s2   

rd   )r   )rF   enumr   typingr   r   r   sglang.srt.utilsr   r   r   r`   ra   rK   rb   r   rP   TensorrE   r'   r(   r-   rd   r%   r%   r%   r&   <module>   s    
	

{	