o
    Ti                     @   s(   d dl Z d dlmZ G dd deZdS )    N)ABCc                       s0   e Zd Z fddZdd Z fddZ  ZS )MegatronContainerc                    s    t  jdi | | jj| _d S )N )super__init__policyis_megatron_v2megatron_v2)selfkwargs	__class__r   h/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/containers/features/megatron.pyr      s   zMegatronContainer.__init__c                 C   s   |j d | j }| d d | j|f }|j| }tj||j d d | d d\}}}t|j dkrXtj|	|j d d|	|j d d|	|j d dfdd	|j S tj|	d|	d|	dfdd	|j S )N      )dim   r   )
shapenum_attention_headssizeviewtorchsplitr   lencatreshape)r
   xattention_head_sizenew_x_shapex_1qkvr   r   r   _align_qkv_transposed   s   
(6.z'MegatronContainer._align_qkv_transposedc                    sP   t    | jr&tjj| | j	 | _tjj| | j
	 | _
d S d S )N)r   	transposer	   r   nn	parameter	Parameterr$   qkvw
contiguousqkvb)r
   r   r   r   r%      s
   
 zMegatronContainer.transpose)__name__
__module____qualname__r   r$   r%   __classcell__r   r   r   r   r   
   s    r   )r   abcr   r   r   r   r   r   <module>   s   