o
    پi                     @  s   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ eeZeG d	d
 d
ZdS )    )annotationsN)	dataclass)Any)is_nunchaku_available)current_platform)init_logger)StoreBooleanc                   @  s~   e Zd ZU dZdZded< dZded< dZded< dZd	ed
< dZ	ded< dddZ
dddZedddZedddZdS )NunchakuSVDQuantArgszCLI-facing configuration for Nunchaku (SVDQuant) inference.

    This is intentionally lightweight and only contains arguments needed to
    construct `runtime.layers.quantization.nunchaku_config.NunchakuConfig`.
    Fboolenable_svdquantNz
str | Nonequantized_model_pathquantization_precisionz
int | Nonequantization_rankquantization_act_unsignedreturnNonec                 C  s   | j r	| js	d| _| jr| j sdS d}d}tj| j }td|}|r6| \}}|dkr0dnd}t|}| j	du rO|p>d| _	|rOt
d| j	 d| j   | jdu rj|pWd	| _|rlt
d
| j d| j   dS dS dS )z6infer precision and rank from filename if not providedTNzsvdq-(int4|fp4)_r(\d+)fp4nvfp4int4z#inferred --quantization-precision: z from --quantized-model-path:     zinferred --quantization-rank: )r   r   ospathbasenameresearchgroupsintr   loggerinfor   )selfinferred_precisioninferred_rankfilenamematchp_strr_str r&   ^/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/configs/quantization.py_adjust_config$   s<   





z#NunchakuSVDQuantArgs._adjust_configc              	   C  s
  |    | js	d S t stdtj }g }t|D ]/}tj	|\}}|dkr9|
d| d| | d q|dvrK|
d| d| | d q|rYtdd	| d
| js`tdt sgtd| jdvrutd| j d| jdkrtd| j dd S )NzONunchaku SVDQuant is only supported on NVIDIA CUDA GPUs (Ampere SM8x or SM12x).	   zcuda:z (SMz	, Hopper))      )zNunchaku SVDQuant is currently only supported on Ampere (SM8x) or SM12x GPUs; Hopper (SM90) is not supported. Unsupported devices: z, z*. Disable it with --enable-svdquant false.z;--enable-svdquant requires --quantized-model-path to be setzNunchaku is enabled, but not installed. Please refer to https://nunchaku.tech/docs/nunchaku/installation/installation.html for detailed installation methods.)r   r   z"Invalid --quantization-precision: z. Must be one of: int4, nvfp4r   zInvalid --quantization-rank: z. Must be > 0)r(   r   r   is_cuda
ValueErrortorchcudadevice_countrangeget_device_capabilityappendjoinr   r   r   r   )r   r1   unsupportedimajorminorr&   r&   r'   validateI   sN   


zNunchakuSVDQuantArgs.validatec                 C  sd   | j dttjdd | j dttjdd | j dtd dd | j d	td d
d | j dttjdd d S )Nz--enable-svdquantz0Enable Nunchaku SVDQuant (W4A4-style) inference.)actiondefaulthelpz--quantized-model-pathzqPath to pre-quantized Nunchaku weights. Can be a single .safetensors file or a directory containing .safetensors.)typer<   r=   z--quantization-precisionzfQuantization precision: int4 or nvfp4. If not specified, inferred from model path or defaults to int4.z--quantization-rankz`SVD low-rank dimension (e.g., 32). If not specified, inferred from model path or defaults to 32.z--quantization-act-unsignedz4Use unsigned activation quantization (if supported).)add_argumentr   r	   r   strr   r   r   )parserr&   r&   r'   add_cli_args}   s<   	
z!NunchakuSVDQuantArgs.add_cli_argskwargsdict[str, Any]'NunchakuSVDQuantArgs'c              
   C  sD   | t |d| j|d| j|d|dt |d| jdS )Nr   r   r   r   r   )r   r   r   r   r   )r
   getr   r   r   )clsrC   r&   r&   r'   	from_dict   s   zNunchakuSVDQuantArgs.from_dict)r   r   )rC   rD   r   rE   )__name__
__module____qualname____doc__r   __annotations__r   r   r   r   r(   r:   staticmethodrB   classmethodrH   r&   r&   r&   r'   r	      s   
 

%4#r	   )
__future__r   r   r   dataclassesr   typingr   r/   Isglang.multimodal_gen.runtime.layers.quantization.configs.nunchaku_configr   'sglang.multimodal_gen.runtime.platformsr   1sglang.multimodal_gen.runtime.utils.logging_utilsr   sglang.multimodal_gen.utilsr   rI   r   r	   r&   r&   r&   r'   <module>   s   