o
    ÔÙ¾iÚ  ã                   @   s:  d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZmZmZmZ eƒ Zeƒ Zeƒ Zeƒ ZedƒoQeZeƒ Zeƒ Zeƒ Zeƒ ZeoaeZ eƒ Z!e  "e#¡Z$d	gZ%g d
¢Z&dd„ Z'dee
 de(fdd„Z)dde*de*de*fdd„Z+de,de*dej-dej-fdd„Z.dS )é    N)ÚOptional)Úenvs)Úget_moe_runner_backend)ÚQuantizationConfig)Úis_fp8_fnuz)	Úcpu_has_amx_supportÚget_bool_env_varÚget_device_smÚis_cpuÚis_cudaÚis_gfx95_supportedÚis_hipÚis_npuÚ is_nvidia_cublas_version_ge_12_9ÚSGLANG_USE_AITERÚq_b_proj)Úfa3ÚnsaÚ
flashinferÚcutlass_mlaÚ
trtllm_mlaÚascendc                  C   s@   t r
ddlm}  | S trddlm}  | S trddlm}  | S dS )zÂ
    Get the AWQ dequantize function for the current device

    Return:
        - The AWQ dequantize function for the current device.
        - None if the current device is not supported.
    r   ©Úawq_dequantize)Úawq_dequantize_triton)Úawq_dequantize_decompositionN)Ú_is_cudaÚ
sgl_kernelr   Ú_is_hipÚ)sglang.srt.layers.quantization.awq_tritonr   Ú_is_npur   r   © r!   ú[/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/deepseek_common/utils.pyÚawq_dequantize_func@   s   r#   Úquant_configÚreturnc                 C   s(   t j ¡ o| d uo|  ¡ dkotƒ  ¡ S )NÚmodelopt_fp4)r   ÚSGLANG_NVFP4_CKPT_FP8_NEXTN_MOEÚgetÚget_namer   Úis_deep_gemm)r$   r!   r!   r"   Ú!enable_nextn_moe_bf16_cast_to_fp8\   s   
ÿ
þür+   é   ÚscaleÚmscalec                 C   s"   | dkrdS d| t  | ¡ d S )Nr,   g      ð?gš™™™™™¹?)ÚmathÚlog)r-   r.   r!   r!   r"   Úyarn_get_mscaleg   s   r1   Ú original_max_position_embeddingsÚscaling_betaÚ	positionsc              	   C   s(   d|t  dt  ||  ¡ ¡  }|d S )Nr,   ).NN)Útorchr0   Úfloor)r2   r3   r4   Úscalingr!   r!   r"   Ú_get_llama_4_scalingm   s   ÿr8   )r,   r,   )/Úloggingr/   Útypingr   r5   Úsglang.srt.environr   Ú,sglang.srt.layers.moe.fused_moe_triton.layerr   Ú*sglang.srt.layers.quantization.base_configr   Ú)sglang.srt.layers.quantization.fp8_kernelr   Úsglang.srt.utilsr   r   r	   r
   r   r   r   r   r   r   r   r    Ú_is_fp8_fnuzÚ
_use_aiterÚ_is_cpu_amx_availableÚ_is_cpuÚ
_device_smÚ_is_gfx95_supportedÚ_use_aiter_gfx95Ú_is_cublas_ge_129Ú	getLoggerÚ__name__ÚloggerÚ!NVFP4_CKPT_FP8_ATTN_QUANT_MODULESÚ&FORWARD_ABSORB_CORE_ATTENTION_BACKENDSr#   Úboolr+   Úfloatr1   ÚintÚTensorr8   r!   r!   r!   r"   Ú<module>   sN   ,

ÿ
þÿÿÿþ