o
    ٷi                     @   s   d dl Z d dlZd dlmZmZmZ ddlmZ ddlm	Z	 d dl
mZ eeZddd	d
gdfde jjdededee dee de jjfddZdS )    N)CallableOptionalList   )maybe_empty_cache)current_platform)init_loggerfloat8_weight_onlyTembedderembedmodule
quant_typeper_rowexclude_layers	filter_fnreturnc                    s  t tjjs	J t rtjdksJ dzdd l}W n ty'   tdw ddddddd	d
ddd
}t	
|}t| D ]}	|	drL||	 q@dd | D } |v rb|   dv sqJ  ddv rt dksJ ddddddtjjdtdtf fdd}
fdd}ddlm} || |d u r|
n|dd d t  |v r| tdjjd d!d d"d d#d d$d d%d  _d&_S )'NcudazRQuantization functionality with torchao backend is only supported on CUDA devices.r   z|Quantization functionality requires the 'quantization' extra dependencies. Install with: pip install cache-dit[quantization]fp8_w8a8_dqfp8_w8a16_woint8_w8a8_dqint8_w8a16_woint4_w4a8_dqint4_w4a4_dqint4_w4a16_wo)
float8r	   	float8_woint8int8_weight_onlyint8_woint4	int4_w4a4int4_weight_onlyint4_wo_woc                 S   s   i | ]\}}||qS  r$   ).0kvr$   r$   Z/home/ubuntu/.local/lib/python3.10/site-packages/cache_dit/quantize/torchao/quantize_ao.py
<dictcomp>8   s    zquantize_ao.<locals>.<dictcomp>)r   r   r   r   r   r   r   z* is not supported for torchao backend now!fp8)   	   z(FP8 is not supported for current device.mnamer   c                    s   d7 t | tjjrRd7  D ]}||v r*td| d| d d7  dS qrL| jjtjkrLdkrLtd| d| jj d d7 dS d7 d	S dS )
N   zSkip Quantization: z -> pattern<>Fr   z -> pattern<dtype(z)!=bfloat16>T)	
isinstancetorchnnLinearloggerinfoweightdtypebfloat16)r-   r.   exclude_name)r   
num_layersnum_linear_layersnum_quant_linearnum_skip_linearr   r   r$   r(   
_filter_fnT   s$   zquantize_ao.<locals>._filter_fnc               
      s  zdkr7ddl m} m}m} rtj |  dtj dtjr,| | fn| | fd}W |S dkrNddl m	} | dtjd	}W |S d
kr^ddl m
} | }W |S dkrtddl m} | dd d}W |S dkrddl m} | ddd}W |S dkrddl m} | }W |S dkrddl m}	 |	 ddd}W |S td d ty }
 z|
 j d7  _|
d }
~
ww )Nr   r   ))Float8DynamicActivationFloat8WeightConfig	PerTensorPerRowweight_dtypeactivation_dtype)rC   rD   granularityr   )Float8WeightOnlyConfig)rC   r   )%Int8DynamicActivationInt8WeightConfigr   )Int8WeightOnlyConfig
group_size)rI   r   )%Int8DynamicActivationInt4WeightConfig    r   )%Int4DynamicActivationInt4WeightConfigr   )Int4WeightOnlyConfigzquant_type: z is not supported now!zM is not supported in torchao backend now! Please upgrade the torchao library.)torchao.quantizationr@   rA   rB   tor2   r9   getfloat8_e4m3fnrF   rG   rH   rJ   rL   rM   
ValueErrorImportErrormsg)r@   rA   rB   quant_configrF   rG   rH   rJ   rL   rM   e)kwargsr   r   r   r$   r(   _quant_configm   sx   L;/
)

z"quantize_ao.<locals>._quant_config)	quantize_device)r   rZ   zQuantized        Module: z>5z
Quantized        Method: z
Quantized Linear Layers: z
Skipped   Linear Layers: z
Total     Linear Layers: z
Total     (all)  Layers: T)r1   r2   r3   Moduler   is_accelerator_availabledevice_typetorchaorS   copydeepcopylistkeysendswithpopitemslowerget_device_capabilitystrboolrN   rY   rP   r   r5   r6   	__class____name___quantize_type_is_quantized)r   r   r   r   r   rW   r^   	alias_mapalias_map_revkeyr?   rX   rY   r$   )	r   rW   r   r;   r<   r=   r>   r   r   r(   quantize_ao   s   




*Z
	rq   )r2   r_   typingr   r   r   utilsr   	platformsr   cache_dit.loggerr   rk   r5   r3   r[   rh   ri   rq   r$   r$   r$   r(   <module>   s4    
