o
    iRS                     @   s  d dl Z d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
m	  mZ d dlmZ d dlmZ ddlmZmZ ddlmZ ddlmZmZmZ ejjZg d	Zd.d
dZejejfdejdejdejde de dej!dej!fddZ"G dd dej	j#Z$dejeje$dfdej	j#de dee  de%dee dej!dej!deej	j# de%fddZ&	d/d d!Z'G d"d# d#eZ(d$d% Z)G d&d' d'ej	j#Z*	d0dej	j#de de%dej!dej!deej	j# de%fd(d)Z+dej	j#de de%dej!dej!f
d*d+Z,G d,d- d-eZ-dS )1    N)AnyCallableDictOptionalType)	is_device)find_multiple   )MappingTypedequantize_affine)	Quantizer)group_quantize_tensor_symmetric groupwise_affine_quantize_tensorper_token_dynamic_quant)WeightOnlyInt4LinearInt4WeightOnlyQuantizerInt8DynActInt4WeightQuantizerc                 C   s0   | | dk}|d ur| |d  dk}|o|S |S )Nr       )k	groupsizeinner_k_tilesk_divisible_by_groupsize%k_divisible_by_16_times_inner_k_tilesr   r   ]/home/ubuntu/.local/lib/python3.10/site-packages/torchao/quantization/linear_quant_modules.py_check_linear_int4_k)   s
   r   xweight_int4packscales_and_zerosout_featuresr   	precisionscales_precisionc           
      C   s   |   }| d|d } t| jjdr)tjj| 	||||	|j	| j
d}ntjj| 	||||	|j	| j
d}|d d |f }	||	}|S )Ncpudtype)sizereshaper   devicetypetorchopsaten_weight_int4pack_mm_for_cputor%   _weight_int4pack_mm)
r   r   r   r   r   r    r!   origin_x_sizec	new_shaper   r   r   linear_forward_int41   s,   	
r3   c                       s   e Zd ZU ddgZeed< eed< ejed< dddddejejfdededed	ed
ej	dej	ddf fddZ
dejdejfddZ  ZS )r   in_featuresr   weightFN      r   r   r    r!   returnc
           
   	      s2  t    t||| | _| jr|| _t|d}|| _|| _|r$J d|| _|| _	|| _
|| _|	| _|d ur;td|d dksEJ d||d  dksQJ dt|jd	ri| d
tj||d ftj|d n| d
tj|d ||d  d|d ftj|d || _| dtj|| |df| j|d d S )N   zrequire bias=False-Please specify 'precision' instead of 'dtype'r7   r   zrequire out_features % 8 == 0r   z-require in_features % (innerKTiles * 16) == 0r#   r5      )r%   r(       r   )super__init__r   paddingorigin_in_featuresr   r4   r   r(   r   r   r    r!   
ValueErrorr   r)   register_bufferr*   zerosuint8int32r%   )
selfr4   r   biasr(   r%   r   r   r    r!   	__class__r   r   r>   U   sf   


zWeightOnlyInt4Linear.__init__inputc                 C   s@   | j rtj|d| j| j fd}t|| j| j| j| j	| j
| jS )Nr   pad)r?   FrL   r4   r@   r3   r5   r   r   r   r    r!   rF   rJ   r   r   r   forward   s   zWeightOnlyInt4Linear.forward)__name__
__module____qualname____constants__int__annotations__r*   Tensorbfloat16r%   r>   rO   __classcell__r   r   rH   r   r   O   s8   
 
	
Gr   Fmoduler   padding_allowedskip_layer_funclinear_classcopy_weightsc	                 C   s   |   D ]S\}	}
t|
tjrK|
jd u rK|d u s||
jsKt|
j||s%|rJ||
j|
jd|
jj	||||d}|rD|
jj	t
	dkrD|
j|_t| |	| qt|
||||||||	 qd S )NF)rG   r(   r   r   r    r!   meta)named_children
isinstancennLinearrG   r5   r   r4   r   r(   r*   setattr_replace_linear_int4)rY   r   r   rZ   r[   r    r!   r\   r]   namechild
new_linearr   r   r   rd      sF   

rd   c                 C   s   t | ||||td d S )N)r\   )rd   r   )rY   r   r   rZ   r[   r   r   r   replace_linear_int4   s   
rh   c                       s   e Zd Zdddedejfdededee dejd	ej	d
df fddZ
e dejjd
eeejf fddZdejjd
ejjfddZdejjdeded
ejjfddZ  ZS )r      Tr7   cudar   rZ   r   r(   r    r8   Nc                    sD   t    |dv sJ |dv sJ || _|| _|| _|| _|| _d S )N)r;      r7   )r<   @   r6   ri   )r=   r>   r   r   rZ   r(   r    )rF   r   rZ   r   r(   r    rH   r   r   r>      s   

z Int4WeightOnlyQuantizer.__init__modelc              	   C   s  |  }| D ]\}}t|tjjr|jd u r|j}|j}t	
d| d| d|  || j dks?J d| d| j d|jj}t|| j| js}| jrqdd lm  m} t	d| d	 t|d
}	|j|d|	| fd}nt	d| dd  qt|d| j| j\}
}t|
jjdrtjj|
| j| j}ntjj|
| j| j}|| j|| d< || j|| d< q|S )Nlinear: , in=, out=r   require in_features: % self.groupsize: == 0	warning: - is padded to satisfy in_features % 1024 == 0r9   rK   P is skipped, int4 requires that in_features is 32, 64, or is divisible by 1024, =and that groupsize and inner_k_tiles*16 evenly divide into itrk   r#   .weightz.scales_and_zeros) 
state_dictnamed_modulesr`   r*   ra   rb   rG   r   r4   logginginfor   r5   datar   r   rZ   torch.nn.functional
functionalwarningr   rL   r   r    r   r(   r)   r+   r,   #_convert_weight_to_int4pack_for_cpur.   _convert_weight_to_int4pack)rF   rm   cur_state_dictfqnmodr   r4   r5   rM   padded_in_featuresw_int4x8r   r   r   r   r   _create_quantized_state_dict   s`   



z4Int4WeightOnlyQuantizer._create_quantized_state_dictc              	   C   s$   t || j| j| jd | j| jd |S )N)r[   r    r!   )rd   r   r   rZ   r    rF   rm   r   r   r   _convert_for_runtime;  s   	z,Int4WeightOnlyQuantizer._convert_for_runtimeargskwargsc                 O   &   |  |}| |}|j|dd |S NF)strictr   r   load_state_dictrF   rm   r   r   ry   r   r   r   quantizeG     

z Int4WeightOnlyQuantizer.quantize)rP   rQ   rR   r*   r(   rW   rT   boolr   r%   r>   no_gradra   Moduler   strrV   r   r   r   r   rX   r   r   rH   r   r      sF    :r   c              
   C   sx   t | tjtjttjjd} d}d|d   }	d|d  d }
d|f}t||||tj|	|
|d}tjj	| ||}|S )N)scale_dtypezero_point_dtypeepsrk   r;   r	   )output_dtype)
r   r*   float32finfor   r   int8ra   r   linear)r   weight_int8rG   scalesrC   r   r   output_precisionn_bit	quant_min	quant_max
block_sizew_dqr1   r   r   r   linear_forward_8da4wQ  s,   r   c                       s   e Zd ZU ddgZeed< eed< ejed< ejed< 	 ddddejejfdededed	ej	d
ej	ddf fddZ
dejdejfddZ  ZS )Int8DynActInt4WeightLinearr4   r   r5   rG   TNri   r   r    r!   r8   c	           	         s   t    || dksJ d| d| d|| _|| _|| _|| _|d ur*td| dtj	||ftj
d | dtj	||| f|d | d	tj	||| f|d |re| d
tj	||d d S d | _d S )Nr   rq   z % groupsize:rs   r:   r5   r$   r   rC   rG   )r=   r>   r4   r   r   r    rA   rB   r*   rC   r   rG   )	rF   r4   r   rG   r(   r%   r   r    r!   rH   r   r   r>     s>   



z#Int8DynActInt4WeightLinear.__init__rJ   c              	   C   s0   | | j}t|| j| j| j| j| j| j| jS N)	r.   r    r   r5   rG   r   rC   r   r   rN   r   r   r   rO     s   z"Int8DynActInt4WeightLinear.forward)rP   rQ   rR   rS   rT   rU   r*   rV   r   r%   r>   rO   rX   r   r   rH   r   r     s6   
 

	
:r   c           
         sb   ddl m} dtjjdtdtffdd}dtjjdtjjf fdd	}	|| |	| d S )
Nr   ))_replace_with_custom_fn_if_matches_filterrf   cur_fqnr8   c                    s   t | tjot| j pS r   )r`   ra   rb   r   r4   )rf   r   )r   rZ   r   r   	filter_fn  s   z(_replace_linear_8da4w.<locals>.filter_fnc              	      sN   | j | j| jd u| jjd} r%| jjtdkr%| j|_| j|_|S )N)rG   r(   r   r    r!   r^   )r4   r   rG   r5   r(   r*   )rf   rg   )r]   r   r\   r    r!   r   r   replacement_fn  s   z-_replace_linear_8da4w.<locals>.replacement_fn)torchao.quantization.quant_apir   r*   ra   r   r   r   )
rY   r   rZ   r    r!   r\   r]   r   r   r   r   )r]   r   r\   rZ   r    r!   r   _replace_linear_8da4w  s   
 &r   c                 C   s   t | ||||t d S r   )r   r   )rY   r   rZ   r    r!   r   r   r   replace_linear_8da4w  s   r   c                       s   e Zd Zddejejedejfdede	dej
dej
dejd	ed
df fddZe dejjd
eeejf fddZdejjd
ejjfddZdejjdeded
ejjfddZ  ZS )r   ri   Fr#   r   rZ   r    r!   r(   mapping_typer8   Nc                    s2   t    || _|| _|| _|| _|| _|| _d S r   )r=   r>   r   rZ   r    r!   r(   r   )rF   r   rZ   r    r!   r(   r   rH   r   r   r>     s   
	
z&Int8DynActInt4WeightQuantizer.__init__rm   c              	   C   sZ  |  }| D ]\}}t|tjjr|j}|j}t	d| d| d|  || j
 dks:J d| d| j
 d|jj}t|| j
sv| jrjdd lm  m} td| d	 t|d
}	|j|d|	| fd}ntd| dd  qt|| jd| j
| j| jd\}
}}|
| j|| d< || j|| d< || j|| d< q|S )Nrn   ro   rp   r   rq   rr   rs   rt   ru   r9   rK   rv   rw   rk   )r   rx   z.scalesz.zeros)ry   rz   r`   r*   ra   rb   r   r4   r{   r|   r   r5   r}   r   rZ   r~   r   r   r   rL   r   r.   r    r!   r   r(   )rF   rm   r   r   r   r   r4   r5   rM   r   r   r   rC   r   r   r   r   *  sT   



z:Int8DynActInt4WeightQuantizer._create_quantized_state_dictc                 C   s   t || j| j| j| j |S r   )r   r   rZ   r    r   r   r   r   r   ]  s   z2Int8DynActInt4WeightQuantizer._convert_for_runtimer   r   c                 O   r   r   r   r   r   r   r   r   h  r   z&Int8DynActInt4WeightQuantizer.quantize)rP   rQ   rR   r*   r   r(   r
   	SYMMETRICrT   r   r%   r>   r   ra   r   r   r   rV   r   r   r   r   rX   r   r   rH   r   r     sL    2r   )r	   Nr   )F).r{   typingr   r   r   r   r   r*   torch.nnra   r~   r   rM   torchao.dtypes.utilsr   torchao.utilsr   quant_primitivesr
   r   unifiedr   utilsr   r   r   r+   r,   __all__r   rW   rV   rT   r%   r3   r   r   r   rd   rh   r   r   r   r   r   r   r   r   r   r   <module>   s   


`
	
6
e5b

&
