o
    i|O                     @   s@  d dl Z d dlmZ d dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZ d d	lmZ d d
lm Z  ddl!m"Z" G dd de j#Z$eG dd de$Z%eG dd de$Z&eG dd de$Z'G dd de'Z(de
deee$ ee$ f fddZ)dS )    N)	dataclass)AnyOptionalTupleUnion)AOBaseConfig)
e4m3_dtype)FP8Granularity_normalize_granularity)GranularityPerAxisPerGroupPerRow	PerTensorPerToken)_SUB_BYTE_INT_BOUNDS_SUB_BYTE_UINT_BOUNDSMappingTypeTorchAODTypeZeroPointDomain)Int4PackingFormat)_is_float8_type   _log_deprecation_warningc                   @   s   e Zd ZdZdS )FakeQuantizeConfigBasez?
    Base class for representing fake quantization config.
    N)__name__
__module____qualname____doc__ r    r    a/home/ubuntu/.local/lib/python3.10/site-packages/torchao/quantization/qat/fake_quantize_config.pyr   (   s    r   c                   @   sV   e Zd ZU dZeZejed< e Z	e
ed< dZee ed< dZee ed< dd ZdS )	Float8FakeQuantizeConfiga  
    Config for float8 fake quantization, targeting :class:`~torchao.quantization.Float8Tensor`.

    Args:
       dtype (torch.dtype): the dtype for float8 Tensor
       granularity (FP8Granularity): the granularity for the Tensor, currently either PerRow() or PerTensor()
       hp_value_lb (Optional[float]): the lower bound for high precision floating point value for calculating scale
       hp_value_ub (Optional[float]): the upper bound for high precision floating point value for calculating scale
    dtypegranularityNhp_value_lbhp_value_ubc                 C   sT   t | jst| j dt| jtrtdt| jttfvr(td| j dS )zG
        Verify dtype and granularity are the ones we support.
        z is not a float8 dtypez[Please specify the granularity object instead of the class, e.g. PerRow() instead of PerRowz.Expected PerRow or PerTensor granularity, got N)r   r#   
ValueError
isinstancer$   typer   r   selfr    r    r!   __post_init__A   s   

z&Float8FakeQuantizeConfig.__post_init__)r   r   r   r   r   r#   torch__annotations__r   r$   r	   r%   r   floatr&   r,   r    r    r    r!   r"   0   s   
 
r"   c                   @   s4   e Zd ZU dZdZeed< eZe	j
ed< dd ZdS )Int4WeightFakeQuantizeConfiga  
    Config for pint4 weight fake quantization that targets the numerics in the following preshuffled kernel:
        torch.ops.mslk.f8i4bf16_shuffled
        torch.ops.mslk.bf16i4bf16_shuffled
        torch.ops.mslk.bf16i4bf16_rowwise

    Currently this only supports float8 input activations. It is expected to be used in conjunction with
    :class:`~torchao.quantization.Float8DynamicActivationInt4WeightConfig`. In the future, we may extend
    this to support bfloat16 as well.
       
group_sizeactivation_dtypec                 C   s$   | j ttjfvrtdt dd S )NzOnly z+ or torch.bfloat16 activation are supported)r3   r   r-   bfloat16r'   r*   r    r    r!   r,   a   s
   
z*Int4WeightFakeQuantizeConfig.__post_init__N)r   r   r   r   r2   intr.   r   r3   r-   r#   r,   r    r    r    r!   r0   Q   s
   
 r0   c                       s~  e Zd ZU dZeejef ed< e	ed< e
ed< ejed< ejed< eed< dZeed	< d
Zeed< dZee ed< ddejejejdd
dfddddeejef dee	edf dee
 dejdejded	ededee dee dee fddZdd Zdee	edf dee de	fddZdee
 dee de
fddZedefddZedefddZdedef fd d!Z  Z S )"IntxFakeQuantizeConfiga	  
    Config for how to fake quantize weights or activations,
    targeting integer dtypes up to torch.int8.

    Args:
        dtype: dtype to simulate during fake quantization, e.g. torch.int8.
            For PyTorch versions older than 2.6, you may use `TorchAODType` to represent
            torch.int1 to torch.int7 instead, e.g. TorchAODType.INT4.
        granularity: granularity of scales and zero points, e.g. PerGroup(32).
            We also support the following strings:
               1) 'per_token': equivalent to PerToken()
               2) 'per_channel': equivalent to PerAxis(0)
               3) 'per_group': equivalent to PerGroup(group_size), must be combined
                   with separate `group_size` kwarg, Alternatively, just set the
                   `group_size` kwarg and leave this field empty.
        mapping_type: whether to use symmetric (default) or asymmetric quantization
            Alternatively, set `is_symmetric` (bool) and leave this field empty.
        scale_precision: scale dtype (default torch.fp32)
        zero_point_precision: zero point dtype (default torch.int32)
        zero_point_domain: whether zero point is in integer (default) or float domain
        is_dynamic: whether to use dynamic (default) or static scale and zero points
        range_learning (prototype): whether to learn scale and zero points during training
            (default false), not compatible with `is_dynamic`.

    Keyword args:
        group_size: size of each group in per group fake quantization,
            can be set instead of `granularity`
        is_symmetric: whether to use symmetric or asymmetric quantization,
            can be set instead of `mapping_type`

    Example usage::

        # Per token asymmetric quantization
        IntxFakeQuantizeConfig(torch.int8, "per_token", is_symmetric=False)
        IntxFakeQuantizeConfig(torch.int8, PerToken(), MappingType.ASYMMETRIC)

        # Per channel symmetric quantization
        IntxFakeQuantizeConfig(torch.int4, "per_channel")
        IntxFakeQuantizeConfig(torch.int4, "per_channel", is_symmetric=True)
        IntxFakeQuantizeConfig(torch.int4, PerAxis(0), MappingType.SYMMETRIC)

        # Per group symmetric quantization
        IntxFakeQuantizeConfig(torch.int4, group_size=32)
        IntxFakeQuantizeConfig(torch.int4, group_size=32, is_symmetric=True)
        IntxFakeQuantizeConfig(torch.int4, "per_group", group_size=32, is_symmetric=True)
        IntxFakeQuantizeConfig(torch.int4, PerGroup(32), MappingType.SYMMETRIC)
    r#   r$   mapping_typescale_precisionzero_point_precisionzero_point_domainT
is_dynamicFrange_learningNeps)r2   is_symmetricr2   r>   c
                C   s   |d u rt d|| _| ||
| _| ||| _|| _|| _|| _|| _	|| _
|	| _tjtjg}|tt  |tt  ||vrOt d||f |rW|rWt d|   d S )Nz/Please use ZeroPointDomain.NONE instead of Nonez&Unsupported dtype '%s', choose from %sz4`is_dynamic` is not compatible with `range_learning`)r'   r#   _get_granularityr$   _get_mapping_typer7   r8   r9   r:   r;   r<   r=   r-   int8uint8extendlistr   keysr   r,   )r+   r#   r$   r7   r8   r9   r:   r;   r<   r=   r2   r>   
all_dtypesr    r    r!   __init__   s*   
zIntxFakeQuantizeConfig.__init__c                 C   s   dS )zc
        For deprecation only, can remove after https://github.com/pytorch/ao/issues/2630.
        Nr    r*   r    r    r!   r,      s   z$IntxFakeQuantizeConfig.__post_init__returnc                 C   s   |dur|dkr|durt d| t|tr5t|tttfs%t d| t|tr3|jdkr3t d|S |dkr<t S |dkrEtdd	S |dkrU|du rQt d
t|S t|trdt d|g df |durrt d|t|f |du rzt dt|S )ay  
        Parse the `Granularity` represented in the args.

        Granularity can be specified in one of three ways:
            1) `Granularity` object: one of PerToken(), PerAxis(), and PerGroup(group_size)
            2) str: one of 'per_token', 'per_channel', and 'per_group'
            3) None: `group_size` must be set instead, represents per group granularity
        N	per_groupz,`group_size` conflicts with granularity '%s'z!Granularity '%s' is not supportedr   z0Only axis=0 is supported for PerAxis granularity	per_tokenper_channel)axisz7Granularity was 'per_group' but no `group_size` was setz/Unexpected granularity: '%s', must be one of %s)rJ   rK   rI   z'Granularity '%s' has unexpected type %sz9At least one of `granularity` or `group_size` must be set)	r'   r(   r   r   r   r   rL   strr)   )r+   r$   r2   r    r    r!   r?      sN   




z'IntxFakeQuantizeConfig._get_granularityc                 C   sr   |dur|durt d|du r|du rtjS |dur+|tjtjfvr)t d| |S |dus1J |r6tjS tjS )z
        Parse the `MappingType` represented in the args.

        Mapping type can be specified in one of two ways:
            1): `MappingType` object: one of SYMMETRIC or ASYMMETRIC
            2): is_symmetric bool
        Nz1Cannot set both `mapping_type` and `is_symmetric`z!MappingType '%s' is not supported)r'   r   	SYMMETRIC
ASYMMETRIC)r+   r7   r>   r    r    r!   r@     s   z(IntxFakeQuantizeConfig._get_mapping_typec                 C   s"   t | jtr
| jjS td| j )zm
        If this is per group granularity, return the group size.
        Otherwise, throw an error.
        z,`group_size` is undefined for %s granularity)r(   r$   r   r2   r'   r*   r    r    r!   r2   0  s
   z!IntxFakeQuantizeConfig.group_sizec                 C   s   | j tjkS )zT
        Return True if mapping type is symmetric, else False (asymmetric).
        )r7   r   rN   r*   r    r    r!   r>   =  s   z#IntxFakeQuantizeConfig.is_symmetricnamevaluec                    sZ   |dkrt  dt| dS |dkr$|rtjntj}t  d| dS t  || dS )zB
        Support setting `group_size` and `is_symmetric`.
        r2   r$   r>   r7   N)super__setattr__r   r   rN   rO   )r+   rP   rQ   r7   	__class__r    r!   rS   D  s   z"IntxFakeQuantizeConfig.__setattr__)!r   r   r   r   r   r-   r#   r   r.   r   r   r   r;   boolr<   r=   r   r/   float32int32INTrM   r5   rG   r,   r?   r@   propertyr2   r>   r   rS   __classcell__r    r    rT   r!   r6   h   s   
 0

	

*
<
 r6   c                   @   s   e Zd ZdZdd ZdS )FakeQuantizeConfigzd
    (Deprecated) Please use :class:`~torchao.quantization.qat.IntxFakeQuantizeConfig` instead.
    c                 C   s   t |  d S )Nr   r*   r    r    r!   r,   W  s   z FakeQuantizeConfig.__post_init__N)r   r   r   r   r,   r    r    r    r!   r\   R  s    r\   base_configrH   c                 C   s  ddl m}m} ddlm}m} ddlm}m}m	}m
}m}	m}
 t| |rBttjd| jtjkd}ttj| j| jtjkd}||fS t| |rd}| jd	krktjtjg}| j|vr`td
| tdtjd}||fS | jdkrddlm } | j!t"j#kr|t$| j% d }n| j!}ttj&| jd|d}||fS tdt$|  t| |r| jd	krtdt$|  dt'| j(\}}t)| j*|| j+| j,d}t)| j-|d}||fS t| |rt)t.t/ d}tdt.d}||fS t| |r|| j0ddd}|| j0d| j1d}||fS t| |r$|| j*| j2| j3| j4d}|| j-| j2| j3| j4d}||fS t| |	r| jd	ks4J d| j5dks>J d| j-tj6ksIJ d| jtj7ksTJ d| j8tjks_J d| j9du siJ dttjdd| j9d }t| j-| j:| j8| j9d!}||fS t| |
r| jd	ksJ d| j5dksJ d| jtjksJ d"| j-tj6ksJ d| j;du sJ d#d}t| j-| j(| j| j;d!}||fS td$|  )%a&  
    Given a base post-training quantization (PTQ) config, infer the corresponding
    `FakeQuantizeConfigBase`s for both the activations and the weights.
    This is called during the prepare phase of QAT.

    Return a 2-tuple of (activation_config, weight_config) for fake quantization.
    r   )!MXDynamicActivationMXWeightConfig'NVFP4DynamicActivationNVFP4WeightConfig)MXFakeQuantizeConfigNVFP4FakeQuantizeConfig))Float8DynamicActivationFloat8WeightConfig'Float8DynamicActivationInt4WeightConfigInt4WeightOnlyConfig%Int8DynamicActivationInt4WeightConfig%Int8DynamicActivationIntxWeightConfigIntxWeightOnlyConfigrJ   )r#   r$   r>   )r#   r2   r>   N   zPacking format must be one of r1   )r2   r3   r   )LAYOUT_TO_ZERO_POINT_DOMAINF)r#   r2   r>   r:   zUnknown version on base config zOnly version 2 of z is supported)r#   r$   r%   r&   )r#   r$   )use_per_tensor_scaleuse_swizzled_scalesuse_triton_kernelT)r#   
block_sizescaling_modekernel_preferencezOnly version 2+ is supportedunpacked_to_int8z"Only unpacked_to_int8 is supportedzOnly int2+ is supportedz/Only asymmetric activation mapping is supportedz*Only symmetric weight mapping is supportedz.Specifying weight_scale_dtype is not supported)r>   r8   )r#   r$   r7   r8   z#Only symmetric mapping is supportedz'Specifying scale_dtype is not supportedzUnexpected base config: %s)<torchao.prototype.mx_formatsr^   r_   torchao.prototype.qatr`   ra   torchao.quantizationrb   rc   rd   re   rf   rg   r(   r6   r-   rA   act_mapping_typer   rN   int4r2   r7   versionr   PLAINPRESHUFFLEDint4_packing_formatr'   r0   r4   torchao.quantization.quant_apiri   r:   r   NONEr)   layoutuint4r
   r$   r"   r3   activation_value_lbactivation_value_ubweight_dtyper   r   use_dynamic_per_tensor_scalerl   rm   rn   ro   intx_packing_formatint1rO   weight_mapping_typeweight_scale_dtypeweight_granularityscale_dtype)r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   
act_configweight_configsupported_packing_formatsri   	zp_domainact_granularityr   r    r    r!   _infer_fake_quantize_configs[  s&   
	

 
 


y
k

W
ND8r   )*abcdataclassesr   typingr   r   r   r   r-   torchao.core.configr   torchao.float8.configr   torchao.float8.inferencer	   r
    torchao.quantization.granularityr   r   r   r   r   r   %torchao.quantization.quant_primitivesr   r   r   r   r   (torchao.quantization.quantize_.workflowsr   torchao.utilsr   utilsr   ABCr   r"   r0   r6   r\   r   r    r    r    r!   <module>   s4      j	