o
    
۾i?                     @   s   d dl mZ d dlmZmZ d dlZd dlZd dlm	Z	 d dl
mZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ erFd d
lmZ e	eZG dd deZdS )    )Fraction)TYPE_CHECKINGAnyN)init_logger)
LinearBaseUnquantizedLinearMethod)QuantizationConfigQuantizationMethods)ParallelLMHead)current_platform)scalar_types)WeightsMapperc                       s  e Zd ZdZh dZdhZddhZh dZ							d8d
edede	de
de
ee
 B dB dee
ef dB de
de
ddf fddZde
fddZedefddZedeej fddZedefddZedee
 fddZedee
ef dd fd d!Zd"e
fd#d$Zd
ede	fd%d&Zd9d)d*Zd:d+e
de
fd,d-Zd:d+e
de
fd.d/Zd+e
fd0d1Zd2ejj d+e
fd3d4Z!e		5d;d6d7Z"  Z#S )<	INCConfigziConfig class for Intel Neural Compressor (INC).
    Repo: https://github.com/intel/neural-compressor
    >               intauto_round:auto_gptqzauto_round:auto_awq>   
awq:marlingptq:marlinawqautogptqmarlinTNr   weight_bits
group_sizesympacking_formatblock_name_to_quantizeextra_config	data_typebackendreturnc	           	         s   t    || jvrtd| d| j d|| jvr'td| d| j d|| jvr8td| d| j d|| jvrItd| d| j d|| _|| _|| _	|| _
t|tr_|d	n|| _|| _|| _|| _td
|| _d S )NzUnsupported weight_bits: z, currently only support .zUnsupported data_type: z, currently only support  zUnsupported packing_format: zUnsupported backend: z,  currently only support ,    )super__init__SUPPORTED_BITS
ValueErrorSUPPORTED_DTYPESSUPPORTED_FORMATSSUPPORTED_BACKENDSr   r   r   r   
isinstancestrsplitr   r    r!   r"   r   pack_factor)	selfr   r   r   r   r   r    r!   r"   	__class__ _/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/quantization/inc.pyr(   +   sJ   




zINCConfig.__init__c                 C   s   d| j  d| j d| j dS )NzINCConfig(weight_bits=z, group_size=z, sym=))r   r   r   r2   r5   r5   r6   __repr__Z   s   
zINCConfig.__repr__c                 C      dS )Nincr5   clsr5   r5   r6   get_name`      zINCConfig.get_namec                 C   s   t jt jgS N)torchhalfbfloat16r<   r5   r5   r6   get_supported_act_dtypesd   s   z"INCConfig.get_supported_act_dtypesc                 C   r:   )N<   r5   r<   r5   r5   r6   get_min_capabilityh   r?   zINCConfig.get_min_capabilityc                 C   s   dgS )Nzquantization_config.jsonr5   r<   r5   r5   r6   get_config_filenamesl   s   zINCConfig.get_config_filenamesconfigc                 C   sv   | |  |dg|  |dg|  |dg| |dgd| |ddgd | |dgd | |d	gd
| |ddgddS )Nbitsr   r   r   r   r   to_quant_block_namesr    r!   r   r"   vllm_backendr   )r   r   r   r   r   r    r!   r"   )get_from_keysget_from_keys_or)r=   rH   r5   r5   r6   from_configp   s   
zINCConfig.from_config
layer_namec                    sF  ddt dtffddjrjv rS t|t jr-tfddjD jrZd|jj	 v rZfd	d
jD }|rZt
t|dkrR|d S td djrj D ];\ } v r dkr fdd
|D }fdd
|D }t
t|dkr|d   S td d| qbS )NTname	quantizedc              	      sd  j s|rjnd|rjnd|rjfS dfS | j v rDj |  }|d|r*jnd|d|r4jnd|d|r@jfS dfS td j  D ]O\}}t|trat	 fdd	|D sbqMz0t
t
|| d ur|d|rvjnd|d|rjnd|d|rjndfW   S W qM t
jy   Y qMw |rjnd|rjnd|rjfS dfS )
N   TrI   r   r   z*+?^$()[]{}|\\c                 3   s    | ]}| v V  qd S r@   r5   ).0cREGEX_SPECIAL_CHARSr5   r6   	<genexpr>   s    
zAINCConfig.get_layer_config.<locals>.get_config.<locals>.<genexpr>)r    r   r   r   getsetitemsr.   r/   anyresearchcompileerror)rP   rQ   cfgpatternr8   rV   r6   
get_config   sJ   


z.INCConfig.get_layer_config.<locals>.get_configc                 3   s    | ]}  |V  qd S r@   
startswithrT   rP   )rO   r5   r6   rX      s    

z-INCConfig.get_layer_config.<locals>.<genexpr>fusedmoec                    s    g | ]}| r |qS r5   rd   rf   )rc   rO   rQ   r5   r6   
<listcomp>   s    z.INCConfig.get_layer_config.<locals>.<listcomp>   r   zFused MoE layer 'z5' requires consistent quant config for all sub-layersc                    s   g | ]}  |qS r5   )replace)rT   sub_key)
fusion_keyrO   r5   r6   rh      s    c                    s   g | ]} |qS r5   r5   rf   )rc   rQ   r5   r6   rh      s    zFused module 'z'' requires consistent quant config for )T)r/   boolr    r.   r
   r   r\   r4   __name__lowerlenrZ   r*   packed_modules_mappingr[   count)r2   layerrO   moe_configssub_keys	sub_namessub_configsr5   )rl   rc   rO   rQ   r2   r6   get_layer_config   sD   *

zINCConfig.get_layer_configc                 C   s   |dk S )NrR   r5   )r2   r   r5   r5   r6   check_quantized   s   zINCConfig.check_quantizedhf_to_vllm_mapperr   c                 C   s8   | j d ur|| j | _ | jd ur|| j| _d S d S r@   )r   
apply_listr    
apply_dict)r2   rz   r5   r5   r6   apply_vllm_mapper   s   

zINCConfig.apply_vllm_mapperprefixc                 C   s~  ddl m} ddlm}m} | ||\}}}	| |s(t|tt	fr&t
 S d S td||jj|||	 |dks<d|v r]tjtjd}
||
v oO||
| ||	 }t||r\|o[|||}nd}|rwdd	lm}m}m} ||||	 di g d
}nddlm}m} ||||	 d}t||r|r|||jS ddlm} d|||	 dd}||||S t|tt	fr|r||S ||S d S )Nr   FusedMoEcheck_marlin_supportedcheck_moe_marlin_supports_layer0[%s] Type: %s, Bits: %s, Group Size: %s, Sym: %sr   r   )r   r   F)AWQMarlinConfigAWQMarlinLinearMethodAWQMarlinMoEMethod)r   r   
zero_pointlm_head_quantizedfull_configmodules_to_not_convert)	AWQConfigAWQLinearMethod)r   r   r   MoeWNA16Configr   )quant_methodrI   r   r   lm_head)$vllm.model_executor.layers.fused_moer   :vllm.model_executor.layers.quantization.utils.marlin_utilsr   r   rx   ry   r.   r   r
   r   loggerdebugr4   rn   r   uint4uint82vllm.model_executor.layers.quantization.awq_marlinr   r   r   +vllm.model_executor.layers.quantization.awqr   r   
moe_config1vllm.model_executor.layers.quantization.moe_wna16r   rN   get_quant_method)r2   rs   r~   r"   r   r   r   r   r   r   AWQ_TYPE_MAP
use_marlinr   r   r   quant_args_marlinr   r   
quant_argsr   rH   r5   r5   r6   apply_awq_quant_layer   sx   


	
zINCConfig.apply_awq_quant_layerc              	   C   s  ddl m} ddlm}m} | ||\}}}	| |s(t|tt	fr&t
 S d S td||jj|||	 |dks<d|v rbtjtjd}
||	f|
v oT||
||	f ||	 d}t||ra|o`|||}nd	}|r|dd
lm}m}m} ||||	d	d	i i d}nddlm}m} |||d	d	i d}t||r|r|||jS ddlm} d|||	d	d}||||S t|tt	fr|r||S ||S d S )Nr   r   r   r   r   r   ))r   T)r   T)has_zpF)GPTQMarlinConfigGPTQMarlinLinearMethodGPTQMarlinMoEMethod)r   r   is_symr   desc_actdynamicr   )
GPTQConfigGPTQLinearMethod)r   r   r   r   r   r   r   )r   rI   r   r   r   )r   r   r   r   r   rx   ry   r.   r   r
   r   r   r   r4   rn   r   uint4b8	uint8b1283vllm.model_executor.layers.quantization.gptq_marlinr   r   r   ,vllm.model_executor.layers.quantization.gptqr   r   r   r   r   rN   r   )r2   rs   r~   r"   r   r   r   r   r   r   GPTQ_TYPE_MAPr   r   r   r   r   r   r   r   r   rH   r5   r5   r6   apply_gptq_quant_layer8  s   




z INCConfig.apply_gptq_quant_layerc                 C   s<   |  ||\}}}| |st|ttfrt S d S td)Nz>INC quantization is not supported during xpu kernel migration.)rx   ry   r.   r   r
   r   NotImplementedError)r2   rs   r~   r   r   r   r5   r5   r6   apply_ipex_quant_layer  s   
z INCConfig.apply_ipex_quant_layerrs   c                 C   s   |r&| j r&| j D ]}||ks|d| kr%| j | dddkr%t   S qt s3t s3| jdkr9| ||S d| jv sCd| jv rI| 	||S d| jv sSd| jv rY| 
||S d S )Nzmodel.rI   rR   ipexr   r   )r    rY   r   r   is_cpuis_xpur"   r   r   r   r   )r2   rs   r~   rO   r5   r5   r6   r     s"   



zINCConfig.get_quant_methodQuantizationMethods | Nonec                 C   s    | dddk}|r|  S dS )z*Override the `auto-round` method to `inc`.r   Nz
auto-round)rY   r>   )r=   hf_quant_cfg
user_quantis_auto_round_formatr5   r5   r6   override_quantization_method  s   z&INCConfig.override_quantization_method)Tr   NNr   r   )rz   r   )r   )r#   r   )$rn   
__module____qualname____doc__r)   r+   r,   r-   r   rm   r/   listdictr   r(   r9   classmethodr	   r>   rA   dtyperD   rF   rG   rN   rx   ry   r}   r   r   r   nnModuler   r   __classcell__r5   r5   r3   r6   r      sj    	
/V
V]r   )	fractionsr   typingr   r   regexr]   rA   vllm.loggerr   !vllm.model_executor.layers.linearr   r   'vllm.model_executor.layers.quantizationr   r	   3vllm.model_executor.layers.vocab_parallel_embeddingr
   vllm.platformsr   vllm.scalar_typer    vllm.model_executor.models.utilsr   rn   r   r   r5   r5   r5   r6   <module>   s   