o
    Ti                     @   sn   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 ddlmZmZmZmZ G dd dejZdS )	    N)comm)GATED_ACTIVATION_TYPES)get_accelerator   )	MLPGemmOpVectorMatMulOp
GELUGemmOpResidualAddOpc                       s2   e Zd Zg Zd
 fdd	Zdd Zdd	 Z  ZS )DeepSpeedMLPNr   Fc                    s\  t t|   || _| jjtjkrtjn| jj}| jjtjkr"tjn| jj}t 	 }	| jj
tv r3dnd}
| jjdkr?| jjnd| jj | j_| jj|
 | jj | _| jj| jj | _| jjr~d | _d | _d | _d | _d | _d | _d | _d | _d | _d | _nctjtj| jj||	ddd| _tjtj| jj||	ddd| _tjtj| jj| j||	ddd| _tjtj| j||	ddd| _tjtj| j| jj||	ddd| _tjtj| jj||	ddd| _|| _|r|d n|| _ t!t"#|| _$|| _%t&|| _'t(|| _)t*|| _+t,|| _-t.tj/dkr,tj| j| jj||	dtj| j||	dgt_/d S d S )N   r   r      )dtypedeviceF)requires_grad)0superr
   __init__configr   torchint8halfr   current_device_namemlp_act_func_typer   intermediate_sizehidden_sizemp_sizeintm_w_sz_per_partitionintm_o_sz_per_partitionset_empty_paramsattn_nwattn_nbinter_winter_b
inter_up_w
inter_up_binter_gate_winter_gate_boutput_woutput_bnn	Parameteremptyq_scalesq_groupsintmathlog2merge_countmp_groupr   mlp_gemm_funcr   vector_matmul_funcr   fused_gemm_gelur	   residual_add_funclen_inter_w_buffers)selfr   r1   r+   r,   r0   mlp_extra_grouping	data_typedata_type_fpr   proj_factor	__class__ ^/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/ops/transformer/inference/ds_mlp.pyr      sv   
$




zDeepSpeedMLP.__init__c                 C   s   t jd }| j|d | jd d d f< | j|| jd d d d f< | jd ur?t jd }| j|d | jd < | j|| jd d < t jS )Nr   r   r   )r
   r7   r"   r   r$   r#   r%   )r8   r    r!   r?   r?   r@   _merge_inter_wQ   s   


zDeepSpeedMLP._merge_inter_wc              
   C   s   | j d u r|  \| _| _n| j | _| j| _d }| jd u r*| j|| j| j| jd}n| j||| j| j|| j| j| j	d\}}| j
|||d u||d urL|n| j| j|d}| jd urjtj| jddkrjtj|| jd |S )N)inputweightbias
weight_out)rB   residualweight_intermrE   
input_biasrD   gammabeta)hidden_staterF   add_biasattention_outputattention_bias
final_biasresidual_add)groupr   )r    rA   _inter_w_inter_br!   r   r4   r&   r2   r   r5   r'   r1   distget_world_size
all_reduce)r8   rB   rF   residual_normrD   rP   outputr?   r?   r@   forward[   s>   


	zDeepSpeedMLP.forward)NNr   r   F)__name__
__module____qualname__r7   r   rA   rY   __classcell__r?   r?   r=   r@   r
      s
    ?
r
   )r.   r   torch.nnr(   	deepspeedr   rT   deepspeed.utils.typesr   deepspeed.acceleratorr   
op_bindingr   r   r   r	   Moduler
   r?   r?   r?   r@   <module>   s   