o
    ߥi[                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlm  m	Z	 d dl
mZ d dlm  mZ d dlmZ G dd dejZedkrdZedddd	ed
 Zejedddgejd Zejeddgejd Ze  e	jdd2 edD ]ZeeeZqzej  e Z edD ]ZeeeZqej  e Z!W d   n1 sw   Y  e"de!e  d d  d dS dS )    N)FlashAttentionc                       s8   e Zd Z				d	 fdd	Zdd Zd
ddZ  ZS )FlashAttentionBlockN   c                    s   |r|| n|}|| }|| |ksJ t t|   || _|| _|| _|| _t|d| _	t
d|| _t
||d d| _|d urLt
||d | _t
||d| _| jdkrg| jd dkrgtd d	d
| _t
j| jj d S )Ng      п                   r           )softmax_scaleattention_dropout)superr   __init__dimcontext_dim	num_headshead_dimmathpowscalenn	GroupNormnormConv2dto_qkvLinear
context_kvprojr   
flash_attninitzeros_weight)selfr   r   r   r   
batch_size	__class__ i/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/multi_modal/videocomposer/mha_flash.pyr      s&   zFlashAttentionBlock.__init__c                 C   s|   t |tjr|jjjddd |jd ur|jj  d S d S t |tjr:|jjjddd |jd ur<|jj  d S d S d S )Nr   g333333?)meanstd)	
isinstancer   r   r"   datanormal_biaszero_r   )r#   moduler'   r'   r(   _init_weight1   s   

z FlashAttentionBlock._init_weightc                 C   s  |}g |  | j| jR \}}}}}}	| |}| |||d |	|| jddd\}
}}|durx| ||d|d |		ddddjddd\}}t
j||gdd}t
j||gdd}t
j|||	dg|
j|
jd	}t
j|
|gdd}
t
j|
||gdd}|j}|	dddd|dd||	  }| |\}}|| |dur|dddd
ddddf }|	dddd||||}| |}|| S )zGx:       [B, C, H, W].
            context: [B, L, C] or None.
        r   r   )r   Nr   r   r   )dtypedevice)sizer   r   r   r   viewchunkr   reshapepermutetorchcatzerosr3   r4   half
contiguousr   tor   )r#   xcontextidentitybchwndqkvckcvcqqkvorigin_dtypeout_r'   r'   r(   forward;   s@   &
.

 
zFlashAttentionBlock.forward)NNNr   )N)__name__
__module____qualname__r   r1   rT   __classcell__r'   r'   r%   r(   r      s    
r   __main__r
   i   i   @   )r   r   r   r   r$   r   )r3   r   T)enabled   
   zAverage cost time i  z ms)#r   osrandomtimenumpynpr;   torch.cuda.ampcudaamptorch.nnr   torch.nn.functional
functionalFflash_attn.flash_attentionr   Moduler   rU   r$   	flash_netrandnfloat32rA   rB   evalautocastrangeiysynchronizes1s2printr'   r'   r'   r(   <module>   sJ   O


 