o
    Ti                     @   sb   d dl Z d dlZd dlmZ d dlmZ d dlmZ ddl	m
Z
mZmZmZ G dd dejZdS )    N)get_accelerator)comm   )	MLPGemmOpVectorMatMulOp
GELUGemmOpResidualAddOpc                       s&   e Zd Zd fdd	Zdd Z  ZS )		TritonMLPN   Fc                    sl  t t|   || _| jj}| jjtjkrtjn| jj}t 	 }	t
jtj| jj||	ddd| _t
jtj| jj||	ddd| _| jj| jj }
t
jtj| jj|
||	ddd| _t
jtj|
||	ddd| _t
jtj|
| jj||	ddd| _t
jtj| jj||	ddd| _|| _|r|d n|| _tt|| _|| _t|| _t|| _ t!|| _"t#|| _$d S )N)dtypedeviceF)requires_gradr   )%superr	   __init__configr   torchint8halfr   current_device_namenn	Parameteremptyhidden_sizeattn_nwattn_nbintermediate_sizemp_sizeinter_winter_boutput_woutput_bq_scalesq_groupsintmathlog2merge_countmp_groupr   mlp_gemm_funcr   vector_matmul_funcr   fused_gemm_gelur   residual_add_func)selfr   r'   r!   r"   r&   mlp_extra_grouping	data_typedata_type_fpr   intm_size_per_partition	__class__ b/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/ops/transformer/inference/triton/mlp.pyr      sP   




zTritonMLP.__init__c              
   C   s   d }| j d u r| j|| j| j| jd}n| j|||| j| j| j| j | jd\}}| j||||d ur3|n| j| j|d u|d}| j	d urTt
j| j	ddkrTt
j|| j	d |S )N)inputweightbias
weight_out)r5   residual
input_biasweight_intermr8   r7   gammabeta)hidden_stater9   attention_outputattention_bias
final_biasadd_biasresidual_add)groupr
   )r   r*   r   r   r   r(   r   r+   r    r'   distget_world_size
all_reduce)r,   r5   r9   residual_normr7   rC   outputr3   r3   r4   forward6   s6   

zTritonMLP.forward)NNr
   r
   F)__name__
__module____qualname__r   rJ   __classcell__r3   r3   r1   r4   r	      s    &r	   )r   r$   torch.nnr   deepspeed.acceleratorr   	deepspeedr   rE   
op_bindingr   r   r   r   Moduler	   r3   r3   r3   r4   <module>   s   