o
    پiN                     @   s   d dl Z d dlmZ d dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d d	lmZmZmZ e rId d
lmZ e eZedZG dd deZdS )    N)Optional)#speculative_moe_a2a_backend_contextspeculative_moe_backend_context)TpModelWorker)
ServerArgs)EAGLEWorker)SpeculativeAlgorithm)draft_tp_contextload_token_map)empty_contextget_bool_env_varis_cuda)segment_packbitsSGLANG_RETURN_ORIGINAL_LOGPROBc                   @   s>   e Zd Zdedededee dedededed	efd
dZdS )StandaloneWorkerserver_argsgpu_idtp_rankdp_rankmoe_ep_rankattn_cp_rankmoe_dp_rank	nccl_porttarget_workerc
                 C   s  || _ |j| _|j| _|j| _|j| _|| _|j| _|	| _|j	| _	t
|j| _|	jjj|_|j}
d|_|	 \| _| _|jd urRt|j| _dt| j d|_nd | _t B t / t  tj| |||d|||||d| j| jd W d    n1 sw   Y  W d    n1 sw   Y  W d    n1 sw   Y  |
| jj _|j rt!nt| _!| !| jj"6 t # t  | #  | $  W d    n1 sw   Y  W d    n1 sw   Y  W d    n1 sw   Y  t%j&dt%j'| jd| _(t%j&dt%j'| jd| _)d S )NTz{"hot_vocab_size": }r   )r   r   r   pp_rankr   r   r   r   r   is_draft_workerreq_to_token_pooltoken_to_kv_pool_allocator )dtypedevice)*r   speculative_eagle_topktopkspeculative_num_stepsspeculative_num_draft_tokensenable_nan_detectionr   r!   r   	page_sizer   from_stringspeculative_algorithmmodel_runnermodel_configcontext_lencontext_lengthdisable_cuda_graphget_memory_poolr   r   speculative_token_mapr
   hot_token_idlenjson_model_override_argsr   r   r   r   __init__draft_model_runnerenable_dp_attentionr	   tp_groupinit_attention_backendinit_cuda_graphstorchemptyint64num_new_pages_per_topkextend_lens)selfr   r   r   r   r   r   r   r   r   backup_disable_cuda_graphr   r   \/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/speculative/standalone_worker.pyr4      s   

  
  
zStandaloneWorker.__init__N)__name__
__module____qualname__r   intr   r   r4   r   r   r   rA   r      s(    	
r   )loggingtypingr   r:   sglang.srt.layers.moe.utilsr   r   sglang.srt.managers.tp_workerr   sglang.srt.server_argsr   #sglang.srt.speculative.eagle_workerr    sglang.srt.speculative.spec_infor   !sglang.srt.speculative.spec_utilsr	   r
   sglang.srt.utilsr   r   r   
sgl_kernelr   	getLoggerrB   loggerr   r   r   r   r   rA   <module>   s    
