o
    پi                     @   s  d dl Z d dlZd dlmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZmZ e rYd dlmZ eeZedZ de!dee"e j#f fddZ$G dd deZ%G dd deZ&dS )    N)OptionalTuple)envs)speculative_moe_backend_context)TpModelWorker)
ServerArgs)TreeMaskMode)EagleDraftWorkerEAGLEWorkerV2)SpeculativeAlgorithm)draft_tp_context)empty_contextget_bool_env_varis_cuda)segment_packbitsSGLANG_RETURN_ORIGINAL_LOGPROBdevicereturnc                 C   s<   t j rt|  }t| |}||fS d t fS )N)	r   !SGLANG_ENABLE_OVERLAP_PLAN_STREAMgettorchget_device_moduleStreamstream
contextlibnullcontext)r   plan_streamplan_stream_ctx r   _/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/speculative/standalone_worker_v2.py_get_plan_stream   s
   
r    c                   @   sF   e Zd ZdZdededededededed	ed
efddZdd ZdS )StandaloneDraftWorkerzPCustom EagleDraftWorker that doesn't share embeddings/lm_head with target model.server_argsgpu_idtp_rankdp_rankmoe_ep_rankattn_cp_rankmoe_dp_rank	nccl_porttarget_workerc
                 C   s  || _ || _|| _|| _|| _|| _|	| _|| _|| _|j	| _	|j
| _|j| _|j| _t|j| _ddlm}
 t| j| j | j|
_|j}d|_|	 \| _| _t  t|||d|||||d| j| jd| _W d    n1 srw   Y  | jj| _|   |   || jj _|j rt!nt| _!| !| jj"# t#  | $  | %  W d    n1 sw   Y  W d    n1 sw   Y  t&j'| _(t)| j	\| _*| _+d S )Nr   )EagleDraftInputT)r"   r#   r$   pp_rankr%   r&   r'   r(   r)   is_draft_workerreq_to_token_pooltoken_to_kv_pool_allocator),r"   r#   r$   r%   r&   r)   r*   r'   r(   r   speculative_eagle_topktopkspeculative_num_stepsspeculative_num_draft_tokensr   from_stringspeculative_algorithm!sglang.srt.speculative.eagle_infor+   maxALLOC_LEN_PER_DECODEdisable_cuda_graphget_memory_poolr.   r/   r   r   draft_workermodel_runnerdraft_runnerinit_token_mapinit_lm_headenable_dp_attentionr   tp_groupr   init_attention_backendinit_cuda_graphsr   	FULL_MASKtree_mask_moder    r   r   )selfr"   r#   r$   r%   r&   r'   r(   r)   r*   r+   backup_disable_cuda_graphr   r   r   __init__&   st   




 zStandaloneDraftWorker.__init__c                 C   s   dS )zEOverride to prevent sharing embeddings and lm_head with target model.Nr   )rF   r   r   r   r?   |   s   z"StandaloneDraftWorker.init_lm_headN)	__name__
__module____qualname____doc__r   intr   rH   r?   r   r   r   r   r!   #   s,    	

Vr!   c                   @   s>   e Zd Zdedededee dedededed	efd
dZdS )StandaloneWorkerV2r"   r#   r$   r%   r&   r'   r(   r)   r*   c
           
   
   C   s   || _ |j| _|j| _|j| _|j| _|| _|j| _|	| _|j	| _	t
|j| _|	 \| _| _|	jjj|_t|||||||||		| _tjdtj| jd| _tjdtj| jd| _t| j\| _| _d S )Nr   )dtyper   )r"   r0   r1   r2   r3   enable_nan_detectionr#   r   _target_worker	page_sizer   r4   r5   r:   r.   r/   r<   model_configcontext_lencontext_lengthr!   _draft_workerr   emptyint64num_new_pages_per_topkextend_lensr    r   r   )
rF   r"   r#   r$   r%   r&   r'   r(   r)   r*   r   r   r   rH      s>   

zStandaloneWorkerV2.__init__N)rI   rJ   rK   r   rM   r   r   rH   r   r   r   r   rN      s(    	
rN   )'r   loggingtypingr   r   r   sglang.srt.environr   sglang.srt.layers.moe.utilsr   sglang.srt.managers.tp_workerr   sglang.srt.server_argsr   "sglang.srt.speculative.eagle_utilsr   &sglang.srt.speculative.eagle_worker_v2r	   r
    sglang.srt.speculative.spec_infor   !sglang.srt.speculative.spec_utilsr   sglang.srt.utilsr   r   r   
sgl_kernelr   	getLoggerrI   loggerr   stranyAbstractContextManagerr    r!   rN   r   r   r   r   <module>   s0    

`