o
    Ti!                     @   s6   d dl Z ddlmZ ddlmZ G dd deeZdS )    N   )HybridEngineContainer)MegatronContainerc                   @   s>   e Zd ZdejfddZdddZdejfdd	Zd
d ZdS )HybridMegatronContainerxc           	   
   C   s  |j d | j }| j|f| dd  }|j| }t|j dkr)t| d nd}tj||j | d |d\}}}t|j dkrk|jtj	|
d|j d |
d|j d |
d|j d fdd
|j  dS |jtj	|
d|
d|
dfdd
|j  dS )z
        Internal helper for accepting the head-contiguous weight matrix and chunking
        the query, key, and value components.
        r   r   N      dim)shapenum_attention_headssizeviewlentorchsplitdatacopy_catreshape)	selfr   attention_head_sizenew_x_shapex_1div_dimqkv r   o/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/containers/features/hybrid_megatron.py
_align_qkv   s   
" 6:z"HybridMegatronContainer._align_qkvreturnNc                       t | jdrDddlm} ddlm  | j| jg} fdd|D }|| | | j | | j W d   dS 1 s=w   Y  dS | | j | | j dS )a  
        Overrides the HybridEngineContainer implementation.

        The alternative layout of the QKV matrix for Megatron is such that each head's Q, K, and V
        are sequential in memory. This is different from the default layout in which all of the Qs
        are sequential, followed by all of the Ks, and then all of the Vs. Here, we take the default
        layout and transform it to the inference layout.
        ds_idr   GatheredParametersZeroParamStatusc                    &   g | ]}t |d r|j jkr|qS r$   hasattr	ds_statusNOT_AVAILABLE.0paramr'   r   r    
<listcomp>,       zCHybridMegatronContainer.transform_for_inference.<locals>.<listcomp>N)r,   qkvwdeepspeed.runtime.zeror&   +deepspeed.runtime.zero.partition_parametersr(   qkvbr!   r   r&   
param_listnon_active_paramsr   r'   r    transform_for_inference      	
"z/HybridMegatronContainer.transform_for_inferencec                    s   t j||jd d dd}|d jd | j }| j|f| dd    fdd|D \}}}t|jdkrN|jt j|||fd	d	d
|jd
  dS |jt j|||fd
d	d
 dS )zm
        Internal helper for taking contiguous QKV and partitioning it for contiguous
        heads.
        r   r	   r
   r   Nc                    s   g | ]}|j   qS r   )r   )r0   r   r   r   r    r2   =   s    z:HybridMegatronContainer._partition_qkv.<locals>.<listcomp>r   r   )
r   r   r   r   r   r   r   r   r   r   )r   r   q_k_vr   r   r   r   r   r=   r    _partition_qkv5   s   .&z&HybridMegatronContainer._partition_qkvc                    r#   )a  
        Overrides the HybridEngineContainer implementation.

        The alternative layout of the QKV matrix for Megatron is such that each head's Q, K, and V
        are sequential in memory. This is different from the default layout in which all of the Qs
        are sequential, followed by all of the Ks, and then all of the Vs. This function takes the inference format and reverts it back to the default format.
        r$   r   r%   r'   c                    r)   r*   r+   r/   r'   r   r    r2   P   r3   zBHybridMegatronContainer.transform_for_training.<locals>.<listcomp>N)r,   r4   r5   r&   r6   r(   r7   r@   r8   r   r'   r    transform_for_trainingC   r<   z.HybridMegatronContainer.transform_for_training)r"   N)	__name__
__module____qualname__r   Tensorr!   r;   r@   rA   r   r   r   r    r      s
    
r   )r   hybrid_enginer   megatronr   r   r   r   r   r    <module>   s   