o
    Ti                     @   s,   d dl mZ ddlmZ G dd deZdS )    )abstractmethod   )HybridEngineContainerc                       s`   e Zd ZdZ fddZedd ZdddZ fd	d
Zdd Z	dddZ
 fddZ  ZS )HybridGatedMLPContainerz
    The HybridGatedMLPContainer supports models for which the first MLP layer
    is represented with two separate weights, one for the activation function
    and one for the gating function.
    c                    s   t  |||| |   d S N)superset_mlpset_mlp_gate)self_h4h_w_h4h_b_4hh_w_4hh_b	__class__ i/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/containers/features/gated_mlp.pyr      s   zHybridGatedMLPContainer.set_mlpc                 C   s   t d)a  
        In `set_mlp_gate`, it is necessary to populate the following variables (where appropriate)
        for the given model:
            self.inter_up_w: inter up weight
            self.inter_up_b: inter up bias
            self.inter_gate_w: inter gate weight
            self.inter_gate_b: inter gate bias
        If the parameter does not exist in the original model, set the attribute to None.
        zA set_mlp_gate() function must be defined in the model container                                     in order to set the unfused inter up and gate tensors.)NotImplementedError)r
   r   r   r   r	      s   z$HybridGatedMLPContainer.set_mlp_gateFc                 C   s   | j jjd u rH| j jj| jf| j jj| jf| j jj| jf| j jj| jfg}|D ]\}}|d urC|j|d | jjd |j	  |||dnd }q'd S |j
| j jj| jd|d| j j_|j
| j jj| jd|d| j j_d S )Nr   )int8allocate_tensor   )
num_splitsr   )modulemlpinter_w
inter_up_w
inter_up_binter_gate_winter_gate_bcopyshapemp_sizestrided_copyr   inter_br   )r
   
mp_replacereversed_dimparamsdstsrcr   r   r   mlp_inter_mp$   s4   z$HybridGatedMLPContainer.mlp_inter_mpc                    sT   t    | jjj| jf| jjj| jf| jjj| jf| jjj| jfg}| | d S r   )	r   release_mlpr   r   r   r   r   r   _release_params)r
   gated_mlp_paramsr   r   r   r*   <   s   
z#HybridGatedMLPContainer.release_mlpc                 C   s4  | j j| jjd | j jd < | jj| jj| j jd d < | jd ur=| jj| jjd | jjd < | jj| jj| jjd d < | j j| jjg}| jd urU|| jj| jjg | jjd | j jd  | j _| jj| j jd d  | j_| jd ur| jjd | jjd  | j_| jj| jjd d  | j_|D ]}~qd S Nr   )	r   datar   r    r   r   r   r   extend)r
   
inter_datar.   r   r   r   	reset_mlpG   s    


z!HybridGatedMLPContainer.reset_mlpc                 C   s  | j | jj_| j| jj_|sm| j| jj_| j| jj_	| jd | j
jd d d f | j
_| j| j
jd d d d f | j_| jd urk| jd urQ| jd | j
jd  nd | j_| jd ure| j| j
jd d  nd | j_d S d S | j
| jj_
| j| jj_| j| jj_| j| jj_d S r-   )r   r   r   output_wr   output_br   r   r   r#   r   r    r.   r   r   r   )r
   
Z3_enabledr   r   r   set_mlp_params_wo_copy]   s   ""
(,z.HybridGatedMLPContainer.set_mlp_params_wo_copyc                    s(   t   }|| j| j| j| jg |S r   )r   get_mlp_paramsr/   r   r   r   r   )r
   r&   r   r   r   r6   s   s   
z&HybridGatedMLPContainer.get_mlp_params)F)__name__
__module____qualname____doc__r   r   r	   r)   r*   r1   r5   r6   __classcell__r   r   r   r   r      s    


r   N)abcr   hybrid_enginer   r   r   r   r   r   <module>   s   