o
    }oiT                     @   s  d dl Z d dlZd dlmZ d dlZd dlm  mZ d dl	m
Z
mZ d dlmZmZ d dlmZ ejdddkrAd d	lmZ nz
d d
lmZ dZW n eyZ   ed dZY nw d dlmZ d dlmZ d dlmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ e$dd\Z%Z&e$dd\Z'Z(e$dd\Z)Z*e&oe(oe*Z+dd Z,zd dl-mZ d dl.m/Z/m0Z0 e, Z1ee0j2e0_2ee/j2e/_2W n e3y   dZ1Y nw dd Z4dd Z5d d! Z6d"d# Z7d$d% Z8G d&d' d'ej9Z:G d(d) d)ej9Z;d*d+ Z<dCd.d/Z=G d0d1 d1ej9Z>G d2d3 d3ej9Z?d4ej@d5eAd6ej@fd7d8ZBd4ej@d5eAd6ej@fd9d:ZCG d;d< d<ejDe jEZFG d=d> d>ej9ZGG d?d@ d@ej9ZHG dAdB dBej9ZIdS )D    N)
isfunction)	rearrangerepeat)einsumnn)disableUSE_NATIVE_GROUP_NORM01)GroupNormNormlization	GroupNormTz2Fused optimized group norm has not been installed.F)
fast_geglu)
checkpoint)AdapterNameParallelLinearAdapterConfig)adapter_mixins)logging)safe_import_fromz$transformer_engine.pytorch.attentionDotProductAttentionz!transformer_engine.pytorch.moduleLayerNormLinearLayerNormMLPc                  C   sN   t j s	tdt j } t j| }|jdko|jdk}|jdk}|p&|S )NzCUDA is not available         )torchcudais_availableImportErrorcurrent_deviceget_device_propertiesmajorminor)
cur_devicedpropsis_sm75is_sm8x_or_later r'   r/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/multimodal/modules/stable_diffusion/attention.py
check_cuda3   s   


r)   )FlashCrossAttentionFlashSelfAttentionc                 C   s   | d uS Nr'   )valr'   r'   r(   existsL   s   r.   c                 C   s   dd | D   S )Nc                 S   s   i | ]}|d qS )Tr'   ).0elr'   r'   r(   
<dictcomp>Q   s    zuniq.<locals>.<dictcomp>)keys)arrr'   r'   r(   uniqP      r4   c                 C   s4   t | r| S t|tjttfr|S t|r| S |S r,   )r.   
isinstancer   Tensorfloatintr   )r-   dr'   r'   r(   defaultT   s
   r;   c                 C   s   t | jj S r,   )r   finfodtypemax)tr'   r'   r(   max_neg_value\   s   r@   c                 C   s*   | j d }dt| }| | | | S )N   )shapemathsqrtuniform_)tensordimstdr'   r'   r(   init_`   s   
rJ   c                       $   e Zd Z fddZdd Z  ZS )GEGLUc                    s   t    t||d | _d S )N   )super__init__LinearWrapperproj)selfdim_indim_out	__class__r'   r(   rO   i   s   
zGEGLU.__init__c                 C   s   |  |}t|S r,   )rQ   r   geglurR   xr'   r'   r(   forwardm   s   

zGEGLU.forward__name__
__module____qualname__rO   rZ   __classcell__r'   r'   rU   r(   rL   h   s    rL   c                       s&   e Zd Zd	 fdd	Zdd Z  ZS )
FeedForwardN   F        c                    s   t    t|| }t||}|r"|sdnd}t|||d| _d S t|}	|s4tt	||t
 nt||}
t|	|
t|t	||| _d S )NgelurW   )hidden_sizeffn_hidden_size
activation)rN   rO   r9   r;   r   netr   	LayerNorm
SequentialrP   GELUrL   Dropout)rR   rH   rT   multgludropoutuse_te	inner_dimrf   norm
project_inrU   r'   r(   rO   s   s   


$"zFeedForward.__init__c                 C   s
   |  |S r,   )rg   rX   r'   r'   r(   rZ      s   
zFeedForward.forward)Nra   Frb   Fr[   r'   r'   rU   r(   r`   r   s    r`   c                 C   s   |   D ]}|   q| S )z<
    Zero out the parameters of a module and return it.
    )
parametersdetachzero_)modulepr'   r'   r(   zero_module   s   rx        c                 C   s   t || dd|dS )Ngư>T)
num_groupsnum_channelsepsaffineactr   )in_channelsr{   r   r'   r'   r(   	Normalize   r5   r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )LinearAttentionra   ry   c                    sD   t    || _|| }tj||d ddd| _t||d| _d S )N   rB   Fbias)rN   rO   headsr   Conv2dto_qkvto_out)rR   rH   r   dim_head
hidden_dimrU   r'   r(   rO      s
   
zLinearAttention.__init__c                 C   sv   |j \}}}}| |}t|d| jdd\}}}	|jdd}td||	}
td|
|}t|d| j||d	}| |S )
Nz*b (qkv heads c) h w -> qkv b heads c (h w)r   )r   qkvrA   rH   zbhdn,bhen->bhdezbhde,bhdn->bhenz"b heads c (h w) -> b (heads c) h w)r   hw)rC   r   r   r   softmaxr   r   r   )rR   rY   bcr   r   r   qkvcontextoutr'   r'   r(   rZ      s   

zLinearAttention.forward)ra   ry   r[   r'   r'   rU   r(   r      s    r   c                       rK   )SpatialSelfAttentionc                    s~   t    || _t|| _tjj||dddd| _tjj||dddd| _	tjj||dddd| _
tjj||dddd| _d S )NrB   r   kernel_sizestridepadding)rN   rO   r   r   rq   r   r   r   r   r   r   proj_out)rR   r   rU   r'   r(   rO      s   

zSpatialSelfAttention.__init__c                 C   s   |}|  |}| |}| |}| |}|j\}}}}	t|d}t|d}td||}
|
t|d  }
tj	j
j|
dd}
t|d}t|
d}
td||
}t|d|d	}| |}|| S )
Nzb c h w -> b (h w) czb c h w -> b c (h w)zbij,bjk->bik      rM   r   zb i j -> b j izb c (h w) -> b c h w)r   )rq   r   r   r   rC   r   r   r   r9   r   
functionalr   r   )rR   rY   h_r   r   r   r   r   r   r   w_r'   r'   r(   rZ      s"   








zSpatialSelfAttention.forwardr[   r'   r'   rU   r(   r      s    
r   r?   r   returnc                 C   s2   | j \}}}| |||ddd|| |dS )NrA   rB   rM   rC   view	transposereshape)r?   r   r   nchr'   r'   r(   rearrange_heads_outer   s   &r   c                 C   s:   | j d | }| j d }| |||ddd||dS )Nr   rB   rA   rM   r   )r?   r   r   r   r'   r'   r(   rearrange_heads_inner   s   
"r   c                       s2   e Zd Zd	 fdd	Z fddZdd Z  ZS )
rP   TNc                    s(   t  ||| | tjg || _d S r,   )rN   rO   set_accepted_adapter_typesr   _target_lora_network_alpha)rR   in_featuresout_featuresr   r   rU   r'   r(   rO      s   
zLinearWrapper.__init__c                    sj   t  |}|  r3| tj}|d s|S | tj}||}| jr/||| j|j   }|S || }|S )Nenabled)	rN   rZ   is_adapter_availableget_adapter_cfgr   PARALLEL_LINEAR_ADAPTERget_adapter_moduler   rH   )rR   rY   mixed_xcfglora_linear_adapterlora_mixed_xrU   r'   r(   rZ      s   zLinearWrapper.forwardc                 K   s(   |j | _i }tjj| ||fi | d S r,   )network_alphar   r   AdapterModuleMixinadd_adapter)rR   namer   kwargsr'   r'   r(   r      s   zLinearWrapper.add_adapter)TN)r\   r]   r^   rO   rZ   r   r_   r'   r'   rU   r(   rP      s    rP   c                       sB   e Zd Z								d fdd	Zdd	d
ZdddZ  ZS )CrossAttentionNr   @   rb   Fc
                    s  t    |r|rJ d|rtsJ d|rtsJ d|| | _|d u r*d| _nd| _t||}|| _|| _|| _	|d | _
|| _t|| jd|d| _t|| jd|d| _|| _|	| _|	rq| jrddnd}
t|| jd|
d| _nt|| _t|| jdd	| _tt| j||d
t|| _|| _|dkr|d dkr| jr||krt| j
d| _d S t| j
d| _d S | jrt|| j| d||krdndd| j
d| _d S d S d S d S )Nziuse_te_dpa and use_flash_attention cannot be True together. Please specify the attention you want to use.z"Flash-attention must be installed.z1TransformerEngine is required to run with TE DPA.TFr   )r   r   )r   return_layernorm_outputr   )r      r   r   )softmax_scaleno_maskrR   crossbshd)kv_channelsnum_attention_headsattn_mask_typeattention_type
qkv_formatr   ) rN   rO   flash_attn_installedHAVE_TErp   is_self_attnr;   context_dim	query_dimr   scaler   rP   to_kto_v
use_te_dparo   r   	norm_to_qr   rh   rq   to_qri   rk   r   use_flash_attentionr+   
flash_attnr*   r   te_dpa)rR   r   r   r   r   rn   r   r   r   ro   r   rU   r'   r(   rO      sh   





zCrossAttention.__init__r   c                 C   s  | j }|d ur|jd }tj||gdd}| jr2| |}| jr*|\}	}
t||
}n|}	t||}n| |}| 	|}	t||}| 
|}| |}|rw|jd | dksXJ |jd | }t|d d | d|d}t|d d | d|d}| j|	|||d d}| |S )NrB   r   r   zb ... -> (b n) ...)r   )additional_tokens)r   rC   r   catro   r   r   r;   rq   r   r   r   r   
_attentionr   )rR   rY   r   maskr   n_times_crossframe_attn_in_selfr   n_tokens_to_maskq_outr   ln_outr   r   n_cpr   r'   r'   r(   rZ   A  s.   







zCrossAttention.forwardc              
   C   s<  | j }tr!| js| jr!|jtjks!| jdks!| jd dks!|d uryt||}t||}t||}t	d||| j
 }t|rg||jd d}|j\}}	|d|||	|| d|	}|| | j|j  |jdd}
t	d|
|}t||}n| jr|j\}}}|jd }|| }| |||||||||||||||}nh| j| jkrtj|||gd	d}|j\}}}}|| }||||||}| |}||||}n7tj||gd	d}|jd }|j\}}}}|| }|||||}||||||}| ||}||||}|d ur|d d td f }|S )
Nr   r   r   zb i d, b j d -> b i jrA   rB   r   zb i j, b j d -> b i drM   )r   r   r   r   r=   r   float32r   r   r   r   r.   r   rC   	unsqueezeexpandr   masked_fill_max_negr   r   r   r   r   stackr   r   )rR   r   r   r   r   r   r   simr   jattnr   s_kvhds_qr:   r   sr?   kvr'   r'   r(   r   d  sZ   



"
4


zCrossAttention._attention)Nr   r   rb   FFNF)NNNr   )NN)r\   r]   r^   rO   rZ   r   r_   r'   r'   rU   r(   r      s    
H#r   c                       sD   e Zd Z									d fdd	Zddd	Zdd
dZ  ZS )BasicTransformerBlockrb   NTFc                    sn   t    |
| _t||||||	| jr|nd ||d	| _t||||d| _t|||||||	||d	| _|| _d S )N)	r   r   r   rn   r   r   r   r   ro   )rn   rm   ro   )	r   r   r   r   rn   r   r   r   ro   )	rN   rO   disable_self_attnr   attn1r`   ffattn2use_checkpoint)rR   rH   n_headsd_headrn   r   gated_ffr   r   r   r   r   ro   rU   r'   r(   rO     s4   

zBasicTransformerBlock.__init__r   c                 C   sr   d|i}|d ur| d|i |d ur| d|i |r#| d|i | jr3t| j||f|  | jS | ||S )NrY   r   r   r   )updater   r   _forwardrs   )rR   rY   r   r   r   r   r'   r'   r(   rZ     s   zBasicTransformerBlock.forwardc                 C   sP   | j || jr|nd || js|ndd| }| j|||d| }| || }|S )Nr   )r   r   r   )r   r   )r   r   r   r   )rR   rY   r   r   r   r'   r'   r(   r     s   	zBasicTransformerBlock._forward)	rb   NTFFFFNF)NNr   )r\   r]   r^   rO   rZ   r   r_   r'   r'   rU   r(   r     s    
*r   c                       s@   e Zd ZdZ										d
 fdd	Zddd	Z  ZS )SpatialTransformerz
    Transformer block for image-like data.
    First, project the input (aka embedding)
    and reshape to b, t, d.
    Then apply standard transformer action.
    Finally, reshape to image
    rB   rb   NFc                    s  t    td| jj d| d| d d	 ddlm} t r-t	 t
|fs- g t rqt	 t
rq|t krptd| jj d	  dt  d
| d| d g  d tt fdd siJ d| d g  n	 d u rzd g|  || _ t|| _|stj|dddd| _nt|| _t 	
fddt|D | _|sttj|dddd| _n	tt|| _|| _d S )Nzconstructing z
 of depth z w/ z channels and z headsr   )
ListConfigz	WARNING: z: Found context dims z0, which does not match the specified 'depth' of z. Setting context_dim to z now.c                    s   |  d kS )Nr   r'   )rY   )r   r'   r(   <lambda>  s    z-SpatialTransformer.__init__.<locals>.<lambda>z8need homogenous context_dim to match depth automaticallyrB   r   c                    s.   g | ]}t  | 
	d qS ))rn   r   r   r   r   r   r   ro   )r   )r/   r:   r   r   r   rn   rp   r   r   r   r   ro   r   r'   r(   
<listcomp>(  s     z/SpatialTransformer.__init__.<locals>.<listcomp>)rN   rO   r   inforV   r\   	omegaconfr   r.   r6   listlenallmapr   r   rq   r   r   proj_inLinear
ModuleListrangetransformer_blocksrx   r   
use_linear)rR   r   r   r   depthrn   r   r   r  r   r   r   r   ro   r   rU   r   r(   rO     sP   
 


zSpatialTransformer.__init__c           
      C   s   t |ts|g}|j\}}}}|}| |}| js| |}|||ddd}| jr1| |}t| j	D ]\}}	|dkrFt
|dkrFd}|	||| d}q6| jrW| |}|dd||||}| jsk| |}|| S )NrA   rB   rM   r   )r   )r6   r  rC   rq   r  r  r   r   	enumerater
  r  r   )
rR   rY   r   r   r   r   r   x_iniblockr'   r'   r(   rZ   B  s(   





zSpatialTransformer.forward)
rB   rb   NFFFFFNFr,   )r\   r]   r^   __doc__rO   rZ   r_   r'   r'   rU   r(   r     s    Hr   )ry   rz   )JrD   osinspectr   r   torch.nn.functionalr   r   Feinopsr   r   r   torch._dynamor   environgetnemo.gn_nativer   r   apex.contrib.group_normOPT_GROUP_NORM	Exceptionprint4nemo.collections.multimodal.modules.stable_diffusionr   Jnemo.collections.multimodal.modules.stable_diffusion.diffusionmodules.utilr   Gnemo.collections.nlp.modules.common.megatron.adapters.parallel_adaptersr   r   	nemo.corer   
nemo.utilsr   nemo.utils.import_utilsr   r   HAVE_DPAr   HAVE_LN_LINEARr   HAVE_LN_MLPr   r)   torch.nnflash_attn.modules.mhar*   r+   r   rZ   r   r.   r4   r;   r@   rJ   ModulerL   r`   rx   r   r   r   r7   r9   r   r   r  r   rP   r   r   r   r'   r'   r'   r(   <module>   sn   

	& 0J