o
    پii                  	   @  sL  U d dl mZ d dlZd dlZd dlmZmZmZmZ d dl	Z	G dd dZ
e
Zd dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZm Z  d dl!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z< e< Z=erd dl>m?Z? i dededed e#d!e#d"e"d#e7d$e5d%ed&ed'ed(ed)ed*e d+e'd,ed-e-e3e+ee/ee%e1d.Z@d/eAd0< e9 se=re: re@Bd1e)i i e@ZCd8d6d7ZDejEZFdS )9    )annotationsN)TYPE_CHECKINGDictOptionalTypec                   @  s   e Zd Zdd ZdS )DummyConfigc                 O  s   d S )N )selfargskwargsr   r   [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/quantization/__init__.pyoverride_quantization_method   s   z(DummyConfig.override_quantization_methodN)__name__
__module____qualname__r   r   r   r   r   r      s    r   )AutoRoundConfig)	AWQConfigAWQMarlinConfig)QuantizationConfig)BitsAndBytesConfig)BlockInt8Config)CompressedTensorsConfig)	Fp8Config)FBGEMMFp8Config)
GGUFConfig)
GPTQConfigGPTQMarlinConfig)ModelOptFp4ConfigModelOptFp8Config)ModelSlimConfig)MoeWNA16Config)Mxfp4Config)PetitNvFp4Config)	QoQConfig)QuarkConfig)QuarkInt4Fp8Config)W4AFp8Config)W8A8Fp8Config)W8A8Int8Config)is_cudais_hipis_npumxfp_supported)
TopKOutputfp8mxfp8blockwise_int8modeloptmodelopt_fp8modelopt_fp4	w8a8_int8w8a8_fp8awq
awq_marlinbitsandbytesggufgptqgptq_marlin	moe_wna16zcompressed-tensorsqoq)w4afp8petit_nvfp4
fbgemm_fp8quarkz
auto-round	modelslimquark_int4fp8_moez#Dict[str, Type[QuantizationConfig]]BASE_QUANTIZATION_METHODSmxfp4quantizationstrreturnType[QuantizationConfig]c                 C  s,   | t vrtd|  dtt   t |  S )NzInvalid quantization method: z. Available methods: )QUANTIZATION_METHODS
ValueErrorlistkeys)rF   r   r   r   get_quantization_config[   s   
rN   )rF   rG   rH   rI   )G
__future__r   builtinsinspecttypingr   r   r   r   torchr   r   )sglang.srt.layers.quantization.auto_roundr   "sglang.srt.layers.quantization.awqr   r   *sglang.srt.layers.quantization.base_configr   +sglang.srt.layers.quantization.bitsandbytesr   -sglang.srt.layers.quantization.blockwise_int8r   Dsglang.srt.layers.quantization.compressed_tensors.compressed_tensors"sglang.srt.layers.quantization.fp8r   )sglang.srt.layers.quantization.fpgemm_fp8r   #sglang.srt.layers.quantization.ggufr   #sglang.srt.layers.quantization.gptqr   r   -sglang.srt.layers.quantization.modelopt_quantr   r   2sglang.srt.layers.quantization.modelslim.modelslimr   (sglang.srt.layers.quantization.moe_wna16r    $sglang.srt.layers.quantization.mxfp4r!   $sglang.srt.layers.quantization.petitr"   "sglang.srt.layers.quantization.qoqr#   *sglang.srt.layers.quantization.quark.quarkr$   0sglang.srt.layers.quantization.quark_int4fp8_moer%   %sglang.srt.layers.quantization.w4afp8r&   'sglang.srt.layers.quantization.w8a8_fp8r'   (sglang.srt.layers.quantization.w8a8_int8r(   sglang.srt.utilsr)   r*   r+   r,   _is_mxfp_supportedsglang.srt.layers.moe.topkr-   rD   __annotations__updaterJ   rN   
isinstanceoriginal_isinstancer   r   r   r   <module>   s   	



