o
    پi#                     @   s@   d dl Z d dlmZmZ d dlmZ e eZG dd dZ	dS )    N)
ServerArgsget_global_server_args)is_blackwellc                   @   s   e Zd ZdededefddZdededefd	d
Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3S )4DraftBackendFactoryserver_argstopkspeculative_num_stepsc                 C   s$   || _ || _|| _|| _|j| _d S )N)r   draft_model_runnerr   r   #speculative_draft_attention_backenddraft_attn_backend)selfr   r	   r   r    r   V/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/speculative/draft_utils.py__init__
   s
   zDraftBackendFactory.__init__backend_namebackend_maperror_templatec                 C   sJ   | j r| j nt| j|}|d u r| jj}||vr t|j|d||  S )N)backend_type)r   getattrr   attention_backend
ValueErrorformat)r   r   r   r   r   r   r   r   _create_backend   s   

z#DraftBackendFactory._create_backendc                 C   sV   | j dkrd S | j| j| j| jt s| jn| j| j| j| j| j	| j
d
}| d|dS )N   

flashinfertritonaiterfa3hybrid_linear_attnflashmla
trtllm_mha
trtllm_mlansaascenddecode_attention_backendzAEAGLE is not supported in decode attention backend {backend_type})r   !_create_flashinfer_decode_backend_create_triton_decode_backend_create_aiter_decode_backend_create_fa3_decode_backendr   _create_flashmla_decode_backend!_create_trtllm_mha_decode_backend!_create_trtllm_mla_decode_backend_create_nsa_decode_backend_create_ascend_decode_backendr   )r   r   r   r   r   create_decode_backend'   s(   
z)DraftBackendFactory.create_decode_backendc                 C   s\   | j | j| j| jt s| jn| j| j| j| j| j| j	d
}| j
jdkr%dnd}| ||dS )Nr   decoder%   prefill_attention_backendz:EAGLE is not supported in attention backend {backend_type})"_create_flashinfer_prefill_backend_create_triton_prefill_backend_create_aiter_prefill_backend_create_fa3_prefill_backendr    _create_flashmla_prefill_backend"_create_trtllm_mha_prefill_backend"_create_trtllm_mla_prefill_backend_create_nsa_prefill_backend_create_ascend_prefill_backendr   speculative_attention_moder   )r   r   r   r   r   r   create_draft_extend_backendB   s,   z/DraftBackendFactory.create_draft_extend_backendc                 C      ddl m} || j| j| jS )Nr   ) NativeSparseAttnMultiStepBackend)'sglang.srt.layers.attention.nsa_backendr>   r	   r   r   )r   r>   r   r   r   r-   ^      z.DraftBackendFactory._create_nsa_decode_backendc                 C      ddl m} || jddS )Nr   )NativeSparseAttnBackendFskip_prefill)r?   rB   r	   )r   rB   r   r   r   r9   g      z/DraftBackendFactory._create_nsa_prefill_backendc                 C   sD   t  jsddlm} || j| j| jS ddlm} || j| j| jS )Nr   )FlashInferMultiStepDraftBackend)"FlashInferMLAMultiStepDraftBackend)	r   use_mla_backend.sglang.srt.layers.attention.flashinfer_backendrF   r	   r   r   2sglang.srt.layers.attention.flashinfer_mla_backendrG   )r   rF   rG   r   r   r   r&   l   s   z5DraftBackendFactory._create_flashinfer_decode_backendc                 C   r=   )Nr   )TritonMultiStepDraftBackend)*sglang.srt.layers.attention.triton_backendrK   r	   r   r   )r   rK   r   r   r   r'   ~   r@   z1DraftBackendFactory._create_triton_decode_backendc                 C   r=   )Nr   )AiterMultiStepDraftBackend))sglang.srt.layers.attention.aiter_backendrM   r	   r   r   )r   rM   r   r   r   r(      s   z0DraftBackendFactory._create_aiter_decode_backendc                 C   r=   )Nr   )FlashAttentionMultiStepBackend)2sglang.srt.layers.attention.flashattention_backendrO   r	   r   r   )r   rO   r   r   r   r)      r@   z.DraftBackendFactory._create_fa3_decode_backendc                 C   r=   )Nr   )FlashMLAMultiStepDraftBackend),sglang.srt.layers.attention.flashmla_backendrQ   r	   r   r   )r   rQ   r   r   r   r*      r@   z3DraftBackendFactory._create_flashmla_decode_backendc                 C   r=   )Nr   )!TRTLLMHAAttnMultiStepDraftBackend).sglang.srt.layers.attention.trtllm_mha_backendrS   r	   r   r   )r   rS   r   r   r   r+      r@   z5DraftBackendFactory._create_trtllm_mha_decode_backendc                 C   s.   t  jstdddlm} || j| j| jS )N=trtllm_mla backend requires MLA model (use_mla_backend=True).r   )TRTLLMMLAMultiStepDraftBackend)r   rH   r   .sglang.srt.layers.attention.trtllm_mla_backendrV   r	   r   r   )r   rV   r   r   r   r,      s   z5DraftBackendFactory._create_trtllm_mla_decode_backendc                 C   r=   )Nr   )AscendAttnMultiStepDraftBackend)8sglang.srt.hardware_backend.npu.attention.ascend_backendrX   r	   r   r   )r   rX   r   r   r   r.      r@   z1DraftBackendFactory._create_ascend_decode_backendc                 C   s<   t  jsddlm} || jddS ddlm} || jddS )Nr   )FlashInferAttnBackendFrC   )FlashInferMLAAttnBackend)r   rH   rI   rZ   r	   rJ   r[   )r   rZ   r[   r   r   r   r2      s
   z6DraftBackendFactory._create_flashinfer_prefill_backendc                 C   rA   )Nr   )TritonAttnBackendFrC   )rL   r\   r	   )r   r\   r   r   r   r3      rE   z2DraftBackendFactory._create_triton_prefill_backendc                 C   rA   )Nr   )AiterAttnBackendFrC   )rN   r]   r	   )r   r]   r   r   r   r4      rE   z1DraftBackendFactory._create_aiter_prefill_backendc                 C   rA   )Nr   )FlashAttentionBackendFrC   )rP   r^   r	   )r   r^   r   r   r   r5      s   z/DraftBackendFactory._create_fa3_prefill_backendc                 C   rA   )Nr   )TRTLLMHAAttnBackendFrC   )rT   r_   r	   )r   r_   r   r   r   r7      rE   z6DraftBackendFactory._create_trtllm_mha_prefill_backendc                 C   s*   t  jstdddlm} || jddS )NrU   r   )TRTLLMMLABackendFrC   )r   rH   r   rW   r`   r	   )r   r`   r   r   r   r8      s   z6DraftBackendFactory._create_trtllm_mla_prefill_backendc                 C   s   ddl m} || jS )Nr   )AscendAttnBackend)rY   ra   r	   )r   ra   r   r   r   r:      s   
z2DraftBackendFactory._create_ascend_prefill_backendc                 C   s   t d d S )Nz?flashmla prefill backend is not yet supported for draft extend.)loggerwarning)r   r   r   r   r6      s   z4DraftBackendFactory._create_flashmla_prefill_backendN)__name__
__module____qualname__r   intr   strdictr   r/   r<   r-   r9   r&   r'   r(   r)   r*   r+   r,   r.   r2   r3   r4   r5   r7   r8   r:   r6   r   r   r   r   r   	   sF    

						
r   )
loggingsglang.srt.server_argsr   r   sglang.srt.utils.commonr   	getLoggerrd   rb   r   r   r   r   r   <module>   s
    
