o
    Tis                     @   s8   d dl Z ddlmZmZ d dlmZ G dd deZdS )    N   )HFBertLayerPolicyreplace_policies)get_acceleratorc                   @   sd   e Zd ZdddZdddZddd	Zd
d ZdddZdd Zdd Z	dd Z
dd Zdd ZdS )WeightQuantizationT   c                 C   s(   g | _ g | _g | _g | _|| _|| _d S N)dense_scales
qkv_scalesmlp4hh_scalesmlph4h_scalesmlp_extra_groupingmp_size)selfr   r    r   V/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/runtime/weight_quantizer.py__init__   s   
zWeightQuantization.__init__Nc           	         s   t | d| | }dd |D } fdd|D }dd t||D } fdd|D }t ||j}|	t j
}t dd |D }||fS )Nc                 S   s"   g | ]}t |  |  qS r   )maxminabs).0gr   r   r   
<listcomp>   s   " z4WeightQuantization.quantize_data.<locals>.<listcomp>c                    s$   g | ]}t d  > d| d  qS )r   r   gh㈵>)float)r   mxquantize_bitsr   r   r      s   $ c                 S   s   g | ]\}}|| qS r   r   )r   r   sr   r   r   r      s    c                    s2   g | ]}|  d  d  >  d  d  > d  qS r   )roundclamp)r   dir   r   r   r      s    &c                 S   s   g | ]
}| d  d qS r   )	unsqueezer   r   r   r   r   r      s    )torchsplitr   viewnumelzipcatreshapeshapetoint8)	r   datar   groupskeydata_groupsmax_d
data_scaledata_intr   r   r   quantize_data   s   
z WeightQuantization.quantize_datac                 C   sD   | j |jd  | |jd  dkp!| j |jd  | |jd  dkS )Nr   r      r   r-   )r   r0   merge_countr   r   r   is_mlp"   s   " zWeightQuantization.is_mlpc                 C   s<   | j |jd  |jd  dkp| j |jd  |jd  dkS )Nr   r      r9   )r   r0   r   r   r   is_qkv&   s   zWeightQuantization.is_qkvr   c                 C   s   | j r| j|d t|dr|d9 }g }d}|D ]}| ||||\}	}
||
 |	||< |d7 }qdtj||dt 	 
dd }d|v rS| j| |S d|v r_| j| |S d	|v rk| j| |S | j| |S )
Nr   )r:   r   r   dimr   mlp.dense_4h_to_h.weightmlp.dense_h_to_4h.weight attention.query_key_value.weight)r   r;   lenr7   appendr&   r+   r.   r   current_device_namer(   r$   r   r   r
   r	   )r   
value_listr   r1   r2   	merge_dimq_scaleindexr0   r6   r5   r   r   r   Quantize*   s.   

$zWeightQuantization.Quantizec                    s4   t dd |D   fdd|D }t|dS )Nc                 S   s   g | ]}|j d  qS )r   )r-   r%   r   r   r   r   A   s    z9WeightQuantization.merge_layer_scales.<locals>.<listcomp>c              	      sN   g | ]#}|j d   k r#tj|tjd |j d   ft  dfd dn|qS )r   r   )devicer>   )r-   r&   r+   zerosr   rE   r%   max_dimr   r   r   B   s    (r   )r   r&   r+   r$   )r   layer_scalesr   rM   r   merge_layer_scales@   s
   
z%WeightQuantization.merge_layer_scalesc              	   C   sJ   g }t | j| j| j| jD ]\}}}}|| ||||g qt|S r   )	r*   r	   r
   r   r   rD   rP   r&   r+   )r   
all_scalesdense_scale	qkv_scale
m4hh_scale
mh4h_scaler   r   r   merge_scalesH   s
   
zWeightQuantization.merge_scalesc           	      C   s  dd t |D }t| j| j| j| jD ]p\}}}}t|| | }t|| | }t|| | }t|| | }t |D ]3}|| 	t
tj
|| t|| fddtj
|| t|| fdd|| || gd qF|D ]}t
| q|q|S )Nc                 S   s   g | ]}g qS r   r   )r   _r   r   r   r   P   s    z9WeightQuantization.merge_scales_split.<locals>.<listcomp>r   r>   r   )ranger*   r	   r
   r   r   r&   r'   r)   rD   r+   
zeros_liker$   )	r   split_countrQ   rR   rS   rT   rU   r   scales_ar   r   r   merge_scales_splitO   s(   $z%WeightQuantization.merge_scales_splitc                 C   sj   |  }|D ]&}|| g}d|v sd|v sd|v sd|v r&| j||||d}|d ||< q|  }||fS )Nzattention.dense.weightr@   rA   rB   )r2   r   )keysrJ   rV   )r   sdr   r1   r]   r2   rF   rQ   r   r   r   sd_quantize_megatronb   s   
z'WeightQuantization.sd_quantize_megatronc                    s   g fdd} fdd i }|d ur*|  D ]\}}||||fi qntD ]}	||	j||	fi q, ||}
|
tfS )Nc                    s  || }|  \}}}}}}| \}}}}}||||g}g }	tt|D ]X}
jr?||
 r?||
 d \}}n$|tu rX||
 rX||
 d \}}n||
 \}}||
 	| |	
d|t  dd  q% 
|	 | S )Nr   r<   r   r   r   )	attentionmlprX   rC   r   r;   r7   r   r=   copy_rD   r.   r   rE   r(   r$   rP   )layer
policy_clspolicyrW   qkvwdense_w_h4h_w_4hh_wr]   rO   r2   data_quantizedr5   )rQ   r1   r   r   r   r   quantize_fnq   s   (z6WeightQuantization.model_quantize.<locals>.quantize_fnc                    sL   |   D ]\}}|j|v r||j \}}t| |||| q || q| S r   )named_children	__class__setattr)modelpoliciesnamechildrk   replace_policy)_quantize_moduler   r   rt      s   
z;WeightQuantization.model_quantize.<locals>._quantize_module)itemsupdater   _orig_layer_classr&   r+   )r   ro   quantize_policyr   r1   rk   re   
layer_namers   plcyquantized_moduler   )rt   rQ   r1   r   r   r   model_quantizen   s   

z!WeightQuantization.model_quantize)Tr   r   r   r#   )__name__
__module____qualname__r   r7   r;   r=   rJ   rP   rV   r\   r_   r|   r   r   r   r   r      s    



r   )r&   module_inject.replace_policyr   r   deepspeed.acceleratorr   objectr   r   r   r   r   <module>   s   