o
    پi                     @   s   d Z ddlZddlmZmZ ddlZeeZdej	j
defddZdej	j
defdd	Zefd
ej	j
dedee fddZdS )z
Common utilities for torchao.
    N)CallableOptionalmodulefqnc                 C   s   d|v S )z1Filter function for quantizing projection layers.proj r   r   r   r   S/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/torchao_utils.pyproj_filter   s   r
   c                 C   s,   t | tjjrtd| d dS d|v S )NzQuantize: skipping z because it's a Conv3dFr   )
isinstancetorchnnConv3dloggerwarningr   r   r   r	   proj_filter_conv3d   s   r   modeltorchao_config	filter_fnc                 C   sN  |dks|du r
| S ddl m}m}m}m}m}m} ddlm}	m	}
 d|v r0|| | t
d | S d|v r>|| | |d | S d	|v rbt|d
d }|dv sVJ d| || ||d|d | S d|v rp|| | t
d | S d|v r|d
d }|	 |
 d}||v sJ d|  d| || ||| dt
d | S td| )a[  Quantize a modelwith torchao quantization specified by torchao_config

    Args:
       `model`: a model to be quantized based on torchao_config
       `torchao_config` (str): type of quantization and their arguments we want to use to
        quantize the model, e.g. int4wo-128 means int4 weight only quantization with group_size
        128
     Nr   )'float8_dynamic_activation_float8_weightfloat8_weight_onlyint4_weight_only#int8_dynamic_activation_int8_weightint8_weight_only	quantize_)PerRow	PerTensorint8wo)r   int8dqint4wo-)    @         z?int4wo groupsize needs to be one of [32, 64, 128, 256] but got )
group_sizefp8wofp8dq)per_row
per_tensorzSupported granularity are: z, got )granularityzUnexpected config: )torchao.quantizationr   r   r   r   r   r   torchao.quantization.observerr   r   r   intsplitkeys
ValueError)r   r   r   r   r   r   r   r   r   r   r   r'   r,   GRANULARITY_MAPr   r   r	   apply_torchao_config_to_model    sJ    #!


r4   )__doc__loggingtypingr   r   r   	getLogger__name__r   r   Modulestrr
   r   r4   r   r   r   r	   <module>   s.    

	
