o
    io6                     @   s   d dl mZmZ d dlZd dlm  mZ d dlm	Z	 d dl
mZ d dlmZ ddlmZmZ ddlmZ dd	lmZ G d
d dejjZG dd deZG dd deZG dd dejjZdS )    )AnyOptionalN)TorchAODType)TwoStepQuantizer)get_group_qparams_symmetric   )FakeQuantizeConfigBaseIntxFakeQuantizeConfig)FakeQuantizerBase)_get_qmin_qmaxc                       s   e Zd ZdZ						ddededee dee d	ed
ededee ddf fddZ	de
jde
jfddZde
jjfddZe	dde
jjdee fddZ  ZS )FakeQuantizedEmbeddinga  
    General embedding layer with fake quantized weights.

    Specific target dtypes, granularity, schemes etc. are specified
    through separate configs for weights and activations.

    Example usage::

        weight_config = IntxFakeQuantizeConfig(
            dtype=torch.int4,
            group_size=8,
            symmetric=True,
        )
        fq_embedding = FakeQuantizedEmbedding(5, 10, weight_config)
        fq_embedding(torch.LongTensor([3]))
    N       @Fnum_embeddingsembedding_dimpadding_idxmax_norm	norm_typescale_grad_by_freqsparseweight_configreturnc	                    sT   t  j|||||||g|	R i |
 tjd |d ur%t|| _d S d | _d S )Nz/torchao.quantization.qat.FakeQuantizedEmbedding)super__init__torch_C_log_api_usage_oncer
   from_configweight_fake_quantizer)selfr   r   r   r   r   r   r   r   argskwargs	__class__ V/home/ubuntu/.local/lib/python3.10/site-packages/torchao/quantization/qat/embedding.pyr   ,   s"   	
zFakeQuantizedEmbedding.__init__xc              	   C   s>   | j d ur|  | j}n| j}t||| j| j| j| j| jS N)	r   weightF	embeddingr   r   r   r   r   )r   r%   wr#   r#   r$   forwardJ   s   
zFakeQuantizedEmbedding.forwardc                 C   sR   t jj| j| j| j| j| j| j| j	| j
j| j
jd	}| j
jt dkr'| j
|_
|S )N)devicedtypemeta)r   nn	Embeddingr   r   r   r   r   r   r   r'   r,   r-   )r   new_embeddingr#   r#   r$   to_embeddingY   s   z#FakeQuantizedEmbedding.to_embeddingmodc                 C   sP   t |j|j|j|j|j|j|j||jj	|jj
d
}|jj	t	dkr&|j|_|S )N)r   r,   r-   r.   )r   r   r   r   r   r   r   r   r'   r,   r-   r   )clsr3   r   r1   r#   r#   r$   from_embeddingl   s   z%FakeQuantizedEmbedding.from_embedding)NNr   FFNr&   )__name__
__module____qualname____doc__intr   floatboolr   r   r   Tensorr+   r/   r0   r2   classmethodr5   __classcell__r#   r#   r!   r$   r      sH    	r   c                	       s   e Zd ZdZdejejfdedejdejddf fdd	Z	d
ej
jdededej
jfddZd
ej
jdededej
jfddZdej
jfddZ  ZS )#Int4WeightOnlyEmbeddingQATQuantizerz
    Quantizer for performing QAT on a model, where embedding layers have
    int4 fake quantized grouped per channel weights.
       
group_sizescale_precisionzero_point_precisionr   Nc                    s2   t    tjd d| _|| _|| _|| _d S )Nz<torchao.quantization.qat.Int4WeightOnlyEmbeddingQATQuantizer   )	r   r   r   r   r   	bit_widthrB   rC   rD   )r   rB   rC   rD   r!   r#   r$   r      s   

z,Int4WeightOnlyEmbeddingQATQuantizer.__init__modelr   r    c                    sT   ddl m} dtjjdtdtfdd}dtjjdtjjf fdd	}|||| |S )
zP
        Swap `nn.Embedding` modules with `Int4WeightOnlyQATEmbedding`.
        r   ))_replace_with_custom_fn_if_matches_filterchildcur_fqnr   c                 S   s   t | tjjS r&   )
isinstancer   r/   r0   )rI   rJ   r#   r#   r$   	filter_fn      z>Int4WeightOnlyEmbeddingQATQuantizer.prepare.<locals>.filter_fnc                    sZ   t | j| j| j| j| j| j| j j j	 j
| jj| jjd}| jjtdkr+| j|_|S )N)r   r   r   r   r   r   r   rB   rC   rD   r,   r-   r.   )Int4WeightOnlyQATEmbeddingr   r   r   r   r   r   r   rB   rC   rD   r'   r,   r-   r   )rI   r1   r   r#   r$   replacement_fn   s"   zCInt4WeightOnlyEmbeddingQATQuantizer.prepare.<locals>.replacement_fn)torchao.quantization.quant_apirH   r   r/   Modulestrr<   )r   rG   r   r    rH   rL   rP   r#   rO   r$   prepare   s
   z+Int4WeightOnlyEmbeddingQATQuantizer.preparec                 O   s   |  | |S )z_
        Swap all `Int4WeightOnlyQATEmbedding` modules with `Int4WeightOnlyEmbedding`.
        )_convert_helper)r   rG   r   r    r#   r#   r$   convert   s   
z+Int4WeightOnlyEmbeddingQATQuantizer.convertmodulec                 C   s   ddl m} | D ]r\}}t|trw|jjj}|jjj}|jjj	}t
|j|j|j|j|j|j|j||||jj|jjd}t||| t| j\}	}
t|j| j||d\}}||}||j|||	|
tj|}||_|||_|||_q
| | q
dS )z
        Helper function to recursively swap `Int4WeightOnlyQATEmbedding`
        modules with `Int4WeightOnlyEmbedding`
        r   )8_quantized_decomposed_quantize_per_channel_group_wrapper)r   r   r   r   r   r   r   rB   rC   rD   r,   output_dtype)	precisionN)torchao._executorch_opsrX   named_childrenrK   rN   r   configrB   rC   rD   Int4WeightOnlyEmbeddingr   r   r   r   r   r   r   r'   r,   r-   setattrr   rF   r   tor   int8scale
zero_pointrU   )r   rW   rX   namerI   rB   rC   rD   quantized_embeddingqminqmaxszpq_weightr#   r#   r$   rU      sX   




	z3Int4WeightOnlyEmbeddingQATQuantizer._convert_helper)r6   r7   r8   r9   r   float32int32r:   r-   r   r/   rR   r   rT   rV   rU   r?   r#   r#   r!   r$   r@      sB    
)
	r@   c                       s   e Zd ZdZddddddejejfdededee d	ee	 d
e	de
de
dedejdejf fddZdde
fddZdd Z  ZS )rN   a>  
    This module implements a embedding layer with int4 fake quantized
    grouped per channel weights.

    args:
        group_size: the number of elements in each quantized group for weights
        scale_precision: precision of per group scales
        zero_point_precision: precision of per group zero points
    Nr   F    r   r   r   r   r   r   r   rB   rC   rD   c              	      sB   t tj|dd|	|
d}t j||||||||g|R i | d S )NT)r-   rB   is_symmetric
is_dynamicrC   rD   )r	   r   INT4r   r   )r   r   r   r   r   r   r   r   rB   rC   rD   r   r    r   r!   r#   r$   r     s,   	

z#Int4WeightOnlyQATEmbedding.__init__Tenabledc                 C   s   || j _d S r&   )r   rq   )r   rq   r#   r#   r$   enable_fake_quant<  s   z,Int4WeightOnlyQATEmbedding.enable_fake_quantc                 C   s   |  d d S )NF)rr   rO   r#   r#   r$   disable_fake_quant?  rM   z-Int4WeightOnlyQATEmbedding.disable_fake_quant)T)r6   r7   r8   r9   r   rk   rl   r:   r   r;   r<   r-   r   rr   rs   r?   r#   r#   r!   r$   rN     sB    	
$rN   c                       s   e Zd ZdZddddddejejdejf
dededee d	ee	 d
e	de
de
dedejdejdejdejf fddZdd Z  ZS )r^   zg
    This module implements a embedding layer with int4 quantized
    grouped per channel weights.
    Nr   Frm   r   r   r   r   r   r   r   rB   rC   rD   r,   rY   c                    s   t    || _|| _|| _|| _|| _|| _|| _d| _	|| _
|	| _|
| _|| _| dtj||ftj|d | dtj||| f|	|d | dtj||| f|
|d d S )NrE   r'   )r-   r,   rb   rc   )r   r   r   r   r   r   r   r   r   rF   rB   rC   rD   rY   register_bufferr   emptyra   )r   r   r   r   r   r   r   r   rB   rC   rD   r,   rY   r!   r#   r$   r   I  sF   


z Int4WeightOnlyEmbedding.__init__c              
   C   sb   ddl m} t| j\}}|| jd| jg| j| jtj	||| j
d}t||| j| j| j| j| jS )Nr   )dequantize_affiner   )rY   )%torchao.quantization.quant_primitivesrv   r   rF   r'   rB   rb   rc   r   ra   rY   r(   r)   r   r   r   r   r   )r   r%   rv   rf   rg   w_dqr#   r#   r$   r+     s*   
zInt4WeightOnlyEmbedding.forward)r6   r7   r8   r9   r   rk   rl   r:   r   r;   r<   r-   r,   r   r+   r?   r#   r#   r!   r$   r^   C  sL    		
9r^   )typingr   r   r   torch.nn.functionalr/   
functionalr(   rw   r   torchao.quantization.unifiedr   torchao.quantization.utilsr   fake_quantize_configr   r	   fake_quantizerr
   utilsr   r0   r   r@   rN   rR   r^   r#   r#   r#   r$   <module>   s   q 6