o
    i*                     @   s  d dl Z d dlmZ d dlmZmZ d dlZd dlmZ d dl	m
Z
mZmZmZmZ d dlmZmZmZ g dZG dd	 d	eZ	 d
edee defddZd
edefddZe Zeedejejddde
je
jddZeedejejdddde
je
jd	edejejdejdde
je
jd	dZ eedejej!dddej"ej"ddZ#eedejej!dddej"ej"dedejej!dddej"ej"ddZ$eedej%ej&dddedej%ej'dddddZ(eedej%ej&ddddZ)eedej%ej!dddd dZ*eedej%ej!dddd dZ+eedej%dej!ddd!edej%ej'dddddZ,eedejej-dddedejej-ddddZ.eedejej&dddedejej'dddddZ/eedejej0ddddgd"edejej!ddddd#dZ1ee)e*e+e(e(e,e.e/e1ee e#e$d$Z2dS )%    N)deepcopy)ListOptional)CompressionFormat)FP8_E4M3_DATADynamicTypeQuantizationArgsQuantizationStrategyQuantizationType)	BaseModel
ConfigDictmodel_validator)QuantizationSchemepreset_name_to_schemeis_preset_schemec                   @   s|   e Zd ZU dZee ed< dZee	 ed< dZ
ee	 ed< dZee	 ed< dZee ed< edd	dddZeddZdS )r   a  
    Set of QuantizationArgs defining how the weights, inputs and outputs of target list
    of modules should be quantized

    :param targets: list of modules to apply the QuantizationArgs to, can be layer
    names, layer types or a regular expression, typically ["Linear"]
    :param weights: quantization config for layer weights
    :param input_activations: quantization config for layer inputs
    :param output_activations: quantization config for layer outputs
    :param format: CompressionFormat for the layer
    targetsNweightsinput_activationsoutput_activationsformatafter)modemodelreturnc                 C   s   | j }| j}| j}| j}|d ur@|jtjtjtjtj	tj
fvr7|jtjkr.|jdu r.tdtd|j d|jd ur@td|d urM|jd urMtd|tjjkrWtd|r~|r~|jtjkr~|jtjkr~|j|jkr~tjd|j d	|j d
tdd | S )NTzDStatic and local group-wise activation quantization is not supportedzUsing z6 strategy is not supported for activation quantizationz*Cannot apply actorder to input activationsz+Cannot apply actorder to output activationszBmixed-precision cannot be set as a format for a QuantizationSchemezXUsing GROUP strategy for both weights and input_activations with different group sizes (z vs zu) may complicate fused kernel implementations. Consider using TENSOR_GROUP strategy for both or matching group sizes.   )
stacklevel)r   r   r   r   strategyr	   TOKENTENSORGROUPTENSOR_GROUP	ATTN_HEADdynamicNotImplementedErroractorder
ValueErrorr   mixed_precisionvalue
group_sizewarningswarnUserWarning)r   inputsoutputsr   r    r.   `/home/ubuntu/.local/lib/python3.10/site-packages/compressed_tensors/quantization/quant_scheme.pyvalidate_model_after8   s\   



z'QuantizationScheme.validate_model_afterforbid)extra)r   r   r   r   )__name__
__module____qualname____doc__r   str__annotations__r   r   r   r   r   r   r   r0   r   model_configr.   r.   r.   r/   r   %   s   
 ;r   namer   r   c                 C   sJ   |   } | tvrtd|  dtt  tt|  }tdd|i|S )a  
    :param name: preset quantization settings name. must exist in upper case in
        PRESET_SCHEMES
    :param targets: list of quantization targets to be passed to the Scheme
    :return: new QuantizationScheme for a given name with the given targets
    zUnknown preset scheme name z, available names: r   Nr.   )upperPRESET_SCHEMESKeyErrorlistkeysr   r   )r:   r   scheme_argsr.   r.   r/   r   |   s   
r   c                 C   s   |   tv S )zn
    :param name: preset quantization settings name
    :return: True if the name is a preset scheme name
    )r;   r<   )r:   r.   r.   r/   r      s   r      TF   )num_bitstyper   	symmetricr"   r(   scale_dtypezp_dtype)r   static_minmax)	rC   rD   r   rE   r"   r(   observerrF   rG   )r   r       )rC   rD   r   r"   rE   r(   rF   rG      )rC   rD   r   rE   r"   )rC   rD   r   rE   r"   rI      )rC   rD   r   r(   rE   r"   )rC   rD   r(   r   rE   r"   )rC   rD   r   rE   r"   block_structure)rC   rD   r   rE   r"   rI   r(   )UNQUANTIZEDW8A16W4A16
W4A16_ASYMW8A8INT8W4A8FP8FP8_DYNAMIC	FP8_BLOCKNVFP4A16NVFP4MXFP4A16MXFP4)3r)   copyr   typingr   r   torchcompressed_tensors.configr   *compressed_tensors.quantization.quant_argsr   r   r   r	   r
   pydanticr   r   r   __all__r   r7   r   boolr   dictrN   FLOATr    dtyperX   LOCALrY   r   uint8rZ   r[   INTCHANNELr   	INT8_W8A8rO   rP   rQ   	INT8_W4A8r   rU   rV   BLOCKrW   r<   r.   r.   r.   r/   <module>   s  R

