o
    ̳i=                  	   @   s8  d dl mZmZ d dlmZ d dlmZmZ zd dlm	Z	 W n e
y-   d dlmZ	 Y nw d dlmZmZmZ zd dlmZmZ d dlmZmZmZmZ W n e
yg   d d	lmZmZmZmZmZmZ Y nw g d
ZdZzd dlmZ W n e
y   dZY nw i Zi Zi ZG dd dZdee< dee< eed< eed< G dd dZ dee < dee< eed< eed< G dd deZ!eZ"eZ#dee!< e"ed< e#ed< G dd deZ$eZ%eZ&dee$< e%ed< e&ed< dee dee' fddZ(d e'defd!d"Z)d e'defd#d$Z*	%	%d,d&ej+d'ed( d)ed( dd%fd*d+Z,d%S )-    )CallableOptional)nn)
LoRALinearQATLoRALinear)TensorCoreTiledLayout)TensorCoreTiledLayoutType)int4_weight_only#int8_dynamic_activation_int4_weight	quantize_)Int4WeightOnlyQATQuantizer Int8DynActInt4WeightQATQuantizer)disable_4w_fake_quantdisable_8da4w_fake_quantenable_4w_fake_quantenable_8da4w_fake_quant)r   r   r   r   r   r   )get_quantizer_modeInt4WeightOnlyQuantizerr   $Int4WeightOnlyQATQuantizerModuleSwapInt8DynActInt4WeightQuantizerr   *Int8DynActInt4WeightQATQuantizerModuleSwapT)qatFc                   @   s(   e Zd ZdZd	defddZdd ZdS )
r   z
    Quantizer for applying int8 per token dynamic activation + int4
    per group weight quantization to linear layers in the model.
       	groupsizec                 C   s
   || _ d S N)r   )selfr    r   S/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/training/quantization.py__init__S   s   
z&Int8DynActInt4WeightQuantizer.__init__c                 C   s   t | j}t|| |S r   )r
   r   r   )r   modelquantize_fnr   r   r   quantizeV   s   

z&Int8DynActInt4WeightQuantizer.quantizeN)r   __name__
__module____qualname____doc__intr   r!   r   r   r   r   r   M   s    r   8da4wz	8da4w-qatc                   @   s,   e Zd ZdZddedefddZdd	 Zd
S )r   z
    Quantizer for applying int4 per group weight only quantization
    to linear layers in the model using the efficient tinygemm kernel.
          r   inner_k_tilesc                 C   s   || _ || _d S r   )r   r+   )r   r   r+   r   r   r   r   m   s   
z Int4WeightOnlyQuantizer.__init__c                 C   s$   t | j}t| j|}t|| |S r   )r   r+   r	   r   r   )r   r   layout_typer    r   r   r   r!   q   s   

z Int4WeightOnlyQuantizer.quantizeN)r)   r*   r"   r   r   r   r   r   g   s    r   4wz4w-qatc                   @      e Zd ZdS )r   Nr#   r$   r%   r   r   r   r   r          r   z4w-qat-module-swapc                   @   r.   )r   Nr/   r   r   r   r   r      r0   r   z8da4w-qat-module-swap	quantizerreturnc                 C   s   t t| dS )a  Given a quantizer object, returns a string that specifies the type of quantization.

    For example, in the case of int4 weight only quantization, we'll return "4w".
    If the quantizer is not recognized as a known quantizer, we'll return None.

    Currently supported:

    - :class:`~torchtune.training.quantization.Int8DynActInt4WeightQuantizer`: "8da4w"
    - :class:`~torchtune.training.quantization.Int4WeightOnlyQuantizer`: "4w"
    - :class:`~torchao.quantization.qat.Int8DynActInt4WeightQATQuantizer`: "8da4w-qat"
    - :class:`~torchao.quantization.qat.Int4WeightOnlyQATQuantizer`: "4w-qat"

    Args:
        quantizer (Optional[Callable]): A callable object that implements the `quantize` method.

    Returns:
        Optional[str]: The quantization mode.
    N)_quantizer_to_modegettype)r1   r   r   r   r      s   r   quantizer_modec                 C      t | dS )zGiven a quantizer mode, return the corresponding function for disabling fake
    quantize in a model prepared by the quantizer.
    If the quantizer is not recognized as a known QAT quantizer, return None.
    N)%_quantizer_mode_to_disable_fake_quantr4   r6   r   r   r   _get_disable_fake_quant      r:   c                 C   r7   )zGiven a quantizer mode, return the corresponding function for enabling fake
    quantize in a model prepared by the quantizer.
    If the quantizer is not recognized as a known QAT quantizer, return None.
    N)$_quantizer_mode_to_enable_fake_quantr4   r9   r   r   r   _get_enable_fake_quant   r;   r=   Nmoduleactivation_qat_configFakeQuantizeConfigweight_qat_configc                 C   sH   |   D ]\}}t|trt|||}t| || qt||| qdS )a`  
    Swap all `LoRALinear` in the model with `QATLoRALinear`.

    This is used for combining QAT + LoRA during finetuning. The resulting linear layers
    will apply the following transformation instead:

        x -> fake_quantize(W_frozen) @ fake_quantize(x) + BAx

    Fake quantization here refers to simulating the quantization numerics without actual
    dtype casting, with the goal of providing improved accuracies when the model is
    ultimately quantized after finetuning.

    Args:
        module (nn.Module): The model to swap linear layers on
        activation_qat_config (Optional[FakeQuantizeConfig]): The config for specifying
            how to fake quantize input activations in the base linear layer
        weight_qat_config (Optional[FakeQuantizeConfig]): The config for specifying
            how to fake quantize base linear weights
    N)named_children
isinstancer   r   from_lora_linearsetattrswap_lora_linear_with_qat)r>   r?   rA   namechild
new_linearr   r   r   rF      s   
rF   )NN)-typingr   r   torchr   torchtune.modules.peft.lorar   r   torchao.dtypesr   ImportErrorr   torchao.quantizationr	   r
   r   torchao.quantization.qatr   r   torchao.quantization.qat.linearr   r   r   r   "torchao.quantization.prototype.qat__all___torchao_0_7_supportedr   r3   r8   r<   r   r   r   !disable_4w_fake_quant_module_swap enable_4w_fake_quant_module_swapr   $disable_8da4w_fake_quant_module_swap#enable_8da4w_fake_quant_module_swapstrr   r:   r=   ModulerF   r   r   r   r   <module>   s   $	