o
    ۷iڦ                    @  sL  d dl mZ d dlZd dlZd dlmZ d dlZd dlm  m	Z
 d dlmZ ddlmZ ddlmZmZmZ ddlmZmZmZ dd	lmZmZ eeZe rUd dlZe rad dlZd dlZndZe rzed
drwd dlmZ d dl m!Z! dZ"ndZ"eG dd dej#Z$G dd dej#Z%G dd dej#Z&G dd dej#Z'G dd dZ(G dd dZ)G dd dej#Z*G dd dZ+G d d! d!Z,G d"d# d#Z-G d$d% d%Z.G d&d' d'Z/G d(d) d)Z0G d*d+ d+Z1G d,d- d-Z2G d.d/ d/Z3G d0d1 d1Z4G d2d3 d3Z5G d4d5 d5Z6G d6d7 d7Z7G d8d9 d9Z8G d:d; d;Z9G d<d= d=Z:G d>d? d?Z;G d@dA dAZ<G dBdC dCZ=G dDdE dEZ>G dFdG dGZ?G dHdI dIZ@G dJdK dKZAG dLdM dMZBG dNdO dOZCG dPdQ dQej#ZDG dRdS dSej#ZEG dTdU dUZFG dVdW dWZGG dXdY dYej#ZHG dZd[ d[ej#ZIG d\d] d]ejj#ZJG d^d_ d_ejj#ZKG d`da daejj#ZLG dbdc dcZMG ddde deZNG dfdg dgZOG dhdi diZPG djdk dkZQG dldm dmZRG dndo doZSG dpdq dqZTG drds dsZUG dtdu duZVG dvdw dwZWG dxdy dyZXG dzd{ d{ZYG d|d} d}ZZG d~d dZ[G dd dZ\G dd dZ]e+eGe,e7fZ^e)e:e8eFeIeJeZfZ_e)e*B e+B e,B e-B e.B e/B e0B e2B e3B e4B eWB e[B eYB e\B e5B e6B e7B e8B e;B e9B e:B e<B e(B e=B e>B e?B e@B eAB eBB eCB eDB eEB eFB eGB eTB eUB eVB e&B eOB e%B eIB eJB eKB eLB eMB eNB ePB eQB eRB eSB Z`dS )    )annotationsN)Callable)nn   )IPAdapterMaskProcessor)	deprecateis_torch_xla_availablelogging)is_torch_npu_availableis_torch_xla_versionis_xformers_available)is_torch_versionmaybe_allow_in_graph>z2.2)flash_attention)is_spmdTFc                      s  e Zd ZdZ																					
									dbdc fd.d/Z		ddded5d6Zdfd8d9Z	dgdhd=d>Zdid@dAZdjdCdDZ	dkdldGdHZ
		dmdndNdOZdodQdRZdpdqdTdUZ	dgdrdXdYZ	Sdpdsd\d]Zdtd^d_Ze dud`daZ  ZS )v	Attentiona  
    A cross attention layer.

    Parameters:
        query_dim (`int`):
            The number of channels in the query.
        cross_attention_dim (`int`, *optional*):
            The number of channels in the encoder_hidden_states. If not given, defaults to `query_dim`.
        heads (`int`,  *optional*, defaults to 8):
            The number of heads to use for multi-head attention.
        kv_heads (`int`,  *optional*, defaults to `None`):
            The number of key and value heads to use for multi-head attention. Defaults to `heads`. If
            `kv_heads=heads`, the model will use Multi Head Attention (MHA), if `kv_heads=1` the model will use Multi
            Query Attention (MQA) otherwise GQA is used.
        dim_head (`int`,  *optional*, defaults to 64):
            The number of channels in each head.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
        bias (`bool`, *optional*, defaults to False):
            Set to `True` for the query, key, and value linear layers to contain a bias parameter.
        upcast_attention (`bool`, *optional*, defaults to False):
            Set to `True` to upcast the attention computation to `float32`.
        upcast_softmax (`bool`, *optional*, defaults to False):
            Set to `True` to upcast the softmax computation to `float32`.
        cross_attention_norm (`str`, *optional*, defaults to `None`):
            The type of normalization to use for the cross attention. Can be `None`, `layer_norm`, or `group_norm`.
        cross_attention_norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups to use for the group norm in the cross attention.
        added_kv_proj_dim (`int`, *optional*, defaults to `None`):
            The number of channels to use for the added key and value projections. If `None`, no projection is used.
        norm_num_groups (`int`, *optional*, defaults to `None`):
            The number of groups to use for the group norm in the attention.
        spatial_norm_dim (`int`, *optional*, defaults to `None`):
            The number of channels to use for the spatial normalization.
        out_bias (`bool`, *optional*, defaults to `True`):
            Set to `True` to use a bias in the output linear layer.
        scale_qk (`bool`, *optional*, defaults to `True`):
            Set to `True` to scale the query and key by `1 / sqrt(dim_head)`.
        only_cross_attention (`bool`, *optional*, defaults to `False`):
            Set to `True` to only use cross attention and not added_kv_proj_dim. Can only be set to `True` if
            `added_kv_proj_dim` is not `None`.
        eps (`float`, *optional*, defaults to 1e-5):
            An additional value added to the denominator in group normalization that is used for numerical stability.
        rescale_output_factor (`float`, *optional*, defaults to 1.0):
            A factor to rescale the output by dividing it with this value.
        residual_connection (`bool`, *optional*, defaults to `False`):
            Set to `True` to add the residual connection to the output.
        _from_deprecated_attn_block (`bool`, *optional*, defaults to `False`):
            Set to `True` if the attention block is loaded from a deprecated state dict.
        processor (`AttnProcessor`, *optional*, defaults to `None`):
            The attention processor to use. If `None`, defaults to `AttnProcessor2_0` if `torch 2.x` is used and
            `AttnProcessor` otherwise.
    N   @           F    Th㈵>      ?	query_dimintcross_attention_dim
int | Noneheadskv_headsdim_headdropoutfloatbiasboolupcast_attentionupcast_softmaxcross_attention_norm
str | Nonecross_attention_norm_num_groupsqk_normadded_kv_proj_dimadded_proj_biasbool | Nonenorm_num_groupsspatial_norm_dimout_biasscale_qkonly_cross_attentionepsrescale_output_factorresidual_connection_from_deprecated_attn_block	processor'AttnProcessor' | Noneout_dimout_context_dimelementwise_affine	is_causalc           #        sv  t    ddlm}m} m}! |d ur|n|| | _|d u r!| jn|| | _|| _|| _	|d u| _
|d ur7|n|| _|| _|	| _|| _|| _|| _d| _|d urR|n|| _|d ur[|n|| _|| _|| _|| _|| _|| _| jrt|d nd| _|d ur|| n|| _|| _|| _|| _| jd u r| jrtd|d urtj |||dd| _!nd | _!|d urt"||d	| _#nd | _#|d u rd | _$d | _%n|d
krtj&|||d| _$tj&|||d| _%n}|dkr||dd|d| _$||dd|d| _%nf|dkrtj&|| |d| _$tj&|| |d| _%nL|dkr!|!|||d| _$|!|||d| _%n6|dkr9|!|| |d| _$|!|| |d| _%n|dkrO| dd|d| _$| dd|d| _%ntd| d|
d u r`d | _'n1|
d
krmt&| j| _'n$|
dkr| jd ur{|}"n| j}"tj |"|ddd| _'ntd|
 dtj(|| j|d| _)| jstj(| j| j|d| _*tj(| j| j|d| _+nd | _*d | _+|| _,| jd urtj(|| j|d| _-tj(|| j|d| _.| jd urtj(|| j|d| _/n	d | _/d | _-d | _.| jst0g | _1| j12tj(| j| j|d | j12t3| nd | _1| jd ur.| js.tj(| j| j|d| _4nd | _4|d ur|d ur|d
krStj&|||d| _5tj&|||d| _6nL|dkrk||dd|d| _5||dd|d| _6n4|dkr|!||d| _5|!||d| _6n |dkrd | _5|!|| |d| _6ntd| dd | _5d | _6|d u rt7t8dr| jrt9 nt: }| ;| d S )N   )FP32LayerNormLpNormRMSNormFg      r   z`only_cross_attention` can only be set to True if `added_kv_proj_dim` is not None. Make sure to set either `only_cross_attention=False` or define `added_kv_proj_dim`.Tnum_channels
num_groupsr2   affine)
f_channelszq_channels
layer_norm)r2   r:   fp32_layer_norm)r:   r"   r2   layer_norm_across_heads)r2   rms_normrms_norm_across_headsl2r   )pdimr2   zunknown qk_norm: z. Should be one of None, 'layer_norm', 'fp32_layer_norm', 'layer_norm_across_heads', 'rms_norm', 'rms_norm_across_heads', 'l2'.
group_normr   zunknown cross_attention_norm: z.. Should be None, 'layer_norm' or 'group_norm'r"   zC. Should be one of `None,'layer_norm','fp32_layer_norm','rms_norm'`scaled_dot_product_attention)<super__init__normalizationr=   r>   r?   	inner_diminner_kv_dimr   use_biasis_cross_attentionr   r$   r%   r3   r4   r    fused_projectionsr8   r9   context_pre_onlypre_onlyr;   r5   r0   scaler   sliceable_head_dimr*   r1   
ValueErrorr   	GroupNormrO   SpatialNormspatial_normnorm_qnorm_k	LayerNorm
norm_crossLinearto_qto_kto_vr+   
add_k_proj
add_v_proj
add_q_proj
ModuleListto_outappendDropout
to_add_outnorm_added_qnorm_added_khasattrFAttnProcessor2_0AttnProcessorset_processor)#selfr   r   r   r   r   r    r"   r$   r%   r&   r(   r)   r*   r+   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r8   r9   rZ   r[   r:   r;   r=   r>   r?   norm_cross_num_channels	__class__ Z/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/models/attention_processor.pyrS   k   s   
!
















zAttention.__init__use_xla_flash_attentionpartition_spectuple[str | None, ...] | NonereturnNonec                 C  sp   |r#t sdtddrdt rtddrd|rt|}nt|}nttdr.| jr.t nt	 }| 
| dS )	ak  
        Set whether to use xla flash attention from `torch_xla` or not.

        Args:
            use_xla_flash_attention (`bool`):
                Whether to use pallas flash attention kernel from `torch_xla` or not.
            partition_spec (`tuple[]`, *optional*):
                Specify the partition specification if using SPMD. Otherwise None.
        ztorch_xla is not available<2.3zEflash attention pallas kernel is supported from torch_xla version 2.32.4zPflash attention pallas kernel using SPMD is supported from torch_xla version 2.4rQ   N)r   r   r   XLAFluxFlashAttnProcessor2_0XLAFlashAttnProcessor2_0rt   ru   r0   rv   rw   rx   )ry   r   r   is_fluxr6   r}   r}   r~   set_use_xla_flash_attention7  s   


z%Attention.set_use_xla_flash_attentionuse_npu_flash_attentionc                 C  s6   |rt  }nttdr| jrt nt }| | dS )zR
        Set whether to use npu flash attention from `torch_npu` or not.

        rQ   N)AttnProcessorNPUrt   ru   r0   rv   rw   rx   )ry   r   r6   r}   r}   r~   set_use_npu_flash_attentionX  s
   z%Attention.set_use_npu_flash_attention'use_memory_efficient_attention_xformersattention_opCallable | Nonec              
   C  s  t | dot| jtttf}t | dot| jtttt	f}t | do*t| jt
ttf}t | do7t| jttf}|r|rG|rGtd| j t sPtdddtj sYtdz!d}|duri|\}}	|j^}}
tjdd	|d
}tj|||}
W n ty } z|d}~ww |rt| jj| jj| jj| jj|d}| | j!  t | jdr|"| jj#j$j% n|rt&'d t	|d}n|rt| jj| jj| jj(| jj)|d}| | j!  t | jdr|j"| jj*d j$j%| jj*d j$j+d
 n|rt|d}nt,|d}n||r:t t-drtnt}|| jj| jj| jj| jjd}| | j!  t | jdr9|"| jj#j$j% nF|rpt| jj| jj| jj(| jj)d}| | j!  t | jdro|j"| jj*d j$j%| jj*d j$j+d
 nt t-dr}| j.r}t/ nt0 }| 1| dS )a  
        Set whether to use memory efficient attention from `xformers` or not.

        Args:
            use_memory_efficient_attention_xformers (`bool`):
                Whether to use memory efficient attention from `xformers` or not.
            attention_op (`Callable`, *optional*):
                The attention operation to use. Defaults to `None` which uses the default attention operation from
                `xformers`.
        r6   zhMemory efficient attention is currently not supported for custom diffusion for attention processor type zeRefer to https://github.com/facebookresearch/xformers for more information on how to install xformersxformers)namezvtorch.cuda.is_available() should be True but is False. xformers' memory efficient attention is only available for GPU N)r<   r   (   cudadevicedtype)train_kvtrain_q_outhidden_sizer   r   to_k_custom_diffusionzMemory efficient attention with `xformers` might currently not work correctly if an attention mask is required for the attention operation.r   )r   r   
num_tokensr\   r   to_k_ipr   rQ   )r   r   r   r   )r   r   r   r\   )2rt   
isinstancer6   CustomDiffusionAttnProcessor$CustomDiffusionXFormersAttnProcessorCustomDiffusionAttnProcessor2_0AttnAddedKVProcessorAttnAddedKVProcessor2_0SlicedAttnAddedKVProcessorXFormersAttnAddedKVProcessorIPAdapterAttnProcessorIPAdapterAttnProcessor2_0IPAdapterXFormersAttnProcessorJointAttnProcessor2_0XFormersJointAttnProcessorNotImplementedErrorr   ModuleNotFoundErrortorchr   is_availabler^   SUPPORTED_DTYPESrandnr   opsmemory_efficient_attention	Exceptionr   r   r   r   load_state_dict
state_dicttor   weightr   loggerinfor   r\   r   r   XFormersAttnProcessorru   r0   rv   rw   rx   )ry   r   r   is_custom_diffusionis_added_kv_processoris_ip_adapteris_joint_processorr   op_fwop_bw_qer6   attn_processor_classr}   r}   r~   +set_use_memory_efficient_attention_xformersi  s   	




z5Attention.set_use_memory_efficient_attention_xformers
slice_sizec                 C  s   |dur|| j krtd| d| j  d|dur#| jdur#t|}n |dur,t|}n| jdur5t }nttdr@| jr@t	 nt
 }| | dS )z
        Set the slice size for attention computation.

        Args:
            slice_size (`int`):
                The slice size for attention computation.
        Nzslice_size z has to be smaller or equal to .rQ   )r]   r^   r*   r   SlicedAttnProcessorr   rt   ru   r0   rv   rw   rx   )ry   r   r6   r}   r}   r~   set_attention_slice  s   


zAttention.set_attention_slice'AttnProcessor'c                 C  sV   t | dr&t| jtjjr&t|tjjs&td| j d|  | j	d || _dS )z
        Set the attention processor to use.

        Args:
            processor (`AttnProcessor`):
                The attention processor to use.
        r6   z-You are removing possibly trained weights of z with N)
rt   r   r6   r   r   Moduler   r   _modulespop)ry   r6   r}   r}   r~   rx     s   
zAttention.set_processorreturn_deprecated_lora'AttentionProcessor'c                 C  s   |s| j S dS )a7  
        Get the attention processor in use.

        Args:
            return_deprecated_lora (`bool`, *optional*, defaults to `False`):
                Set to `True` to return the deprecated LoRA attention processor.

        Returns:
            "AttentionProcessor": The attention processor in use.
        Nr6   )ry   r   r}   r}   r~   get_processor+  s   zAttention.get_processorhidden_statestorch.Tensorencoder_hidden_statestorch.Tensor | Noneattention_maskc                   s   t t| jjj  ddh fdd| D }t|dkr1t	
d| d| jjj d  fd	d
| D }| j| |f||d|S )ah  
        The forward method of the `Attention` class.

        Args:
            hidden_states (`torch.Tensor`):
                The hidden states of the query.
            encoder_hidden_states (`torch.Tensor`, *optional*):
                The hidden states of the encoder.
            attention_mask (`torch.Tensor`, *optional*):
                The attention mask to use. If `None`, no mask is applied.
            **cross_attention_kwargs:
                Additional keyword arguments to pass along to the cross attention.

        Returns:
            `torch.Tensor`: The output of the attention layer.
        ip_adapter_masksip_hidden_statesc                   s$   g | ]\}}| vr|vr|qS r}   r}   ).0kr   attn_parametersquiet_attn_parametersr}   r~   
<listcomp>V  s    z%Attention.forward.<locals>.<listcomp>r   zcross_attention_kwargs z are not expected by z and will be ignored.c                   s   i | ]\}}| v r||qS r}   r}   )r   r   w)r   r}   r~   
<dictcomp>]  s    z%Attention.forward.<locals>.<dictcomp>r   r   )setinspect	signaturer6   __call__
parameterskeysitemslenr   warningr|   __name__)ry   r   r   r   cross_attention_kwargsunused_kwargsr}   r   r~   forward9  s&   zAttention.forwardtensorc                 C  sL   | j }|j\}}}||| |||}|dddd|| ||| }|S )ac  
        Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size // heads, seq_len, dim * heads]`. `heads`
        is the number of heads initialized while constructing the `Attention` class.

        Args:
            tensor (`torch.Tensor`): The tensor to reshape.

        Returns:
            `torch.Tensor`: The reshaped tensor.
        r   r   r<      )r   shapereshapepermute)ry   r   	head_size
batch_sizeseq_lenrN   r}   r}   r~   batch_to_head_dimg  s
   "zAttention.batch_to_head_dimr   c                 C  s~   | j }|jdkr|j\}}}d}n|j\}}}}|||| ||| }|dddd}|dkr=||| || || }|S )a   
        Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size, seq_len, heads, dim // heads]` `heads` is
        the number of heads initialized while constructing the `Attention` class.

        Args:
            tensor (`torch.Tensor`): The tensor to reshape.
            out_dim (`int`, *optional*, defaults to `3`): The output dimension of the tensor. If `3`, the tensor is
                reshaped to `[batch_size * heads, seq_len, dim // heads]`.

        Returns:
            `torch.Tensor`: The reshaped tensor.
        r   r<   r   r   )r   ndimr   r   r   )ry   r   r8   r   r   r   rN   	extra_dimr}   r}   r~   head_to_batch_dimx  s   
zAttention.head_to_batch_dimquerykeyc           	      C  s   |j }| jr| }| }|du r*tj|jd |jd |jd |j |jd}d}n|}d}tj|||dd|| j	d}~| j
rE| }|jdd}~||}|S )	ak  
        Compute the attention scores.

        Args:
            query (`torch.Tensor`): The query tensor.
            key (`torch.Tensor`): The key tensor.
            attention_mask (`torch.Tensor`, *optional*): The attention mask to use. If `None`, no mask is applied.

        Returns:
            `torch.Tensor`: The attention probabilities/scores.
        Nr   r<   r   r   rL   )betaalpharN   )r   r$   r!   r   emptyr   r   baddbmm	transposer\   r%   softmaxr   )	ry   r   r   r   r   baddbmm_inputr   attention_scoresattention_probsr}   r}   r~   get_attention_scores  s2    

zAttention.get_attention_scorestarget_lengthr   c           	      C  s   | j }|du r	|S |jd }||krA|jjdkr7|jd |jd |f}tj||j|jd}tj||gdd}n
tj	|d|fd	d
}|dkr]|jd || k r[|j
|d|jd | d}|S |dkrs|d}|j
|d|jd | d}|S )a  
        Prepare the attention mask for the attention computation.

        Args:
            attention_mask (`torch.Tensor`):
                The attention mask to prepare.
            target_length (`int`):
                The target length of the attention mask. This is the length of the attention mask after padding.
            batch_size (`int`):
                The batch size, which is used to repeat the attention mask.
            out_dim (`int`, *optional*, defaults to `3`):
                The output dimension of the attention mask. Can be either `3` or `4`.

        Returns:
            `torch.Tensor`: The prepared attention mask.
        NrL   mpsr   r<   r   r   r  r   )valuer   rN   output_size   )r   r   r   typer   zerosr   catru   padrepeat_interleave	unsqueeze)	ry   r   r  r   r8   r   current_lengthpadding_shapepaddingr}   r}   r~   prepare_attention_mask  s,   
	
z Attention.prepare_attention_maskc                 C  sf   | j dus	J dt| j tjr|  |}|S t| j tjr1|dd}|  |}|dd}|S J )aG  
        Normalize the encoder hidden states. Requires `self.norm_cross` to be specified when constructing the
        `Attention` class.

        Args:
            encoder_hidden_states (`torch.Tensor`): Hidden states of the encoder.

        Returns:
            `torch.Tensor`: The normalized encoder hidden states.
        NzGself.norm_cross must be defined to call self.norm_encoder_hidden_statesr<   r   )re   r   r   rd   r_   r  )ry   r   r}   r}   r~   norm_encoder_hidden_states  s   

z$Attention.norm_encoder_hidden_statesc                 C  s   | j jjj}| j jjj}| jsYt| j jj| jjj| j	jjg}|j
d }|j
d }tj||| j||d| _| jj| | jrXt| j jj| jjj| j	jjg}| jj| nAt| jjj| j	jjg}|j
d }|j
d }tj||| j||d| _| jj| | jrt| jjj| j	jjg}| jj| t| dd d urt| dd d urt| dd d urt| jjj| jjj| jjjg}|j
d }|j
d }tj||| j||d| _| jj| | jrt| jjj| jjj| jjjg}| jj| || _d S )Nr<   r   )r"   r   r   rl   rj   rk   )rg   r   datar   r   rX   r   r  rh   ri   r   r   rf   rW   to_qkvcopy_r"   to_kvgetattrrl   rj   rk   r+   to_added_qkvrY   )ry   fuser   r   concatenated_weightsin_featuresout_featuresconcatenated_biasr}   r}   r~   fuse_projections  sL   "

"




zAttention.fuse_projections)Nr   Nr   r   FFFNr   NNTNNTTFr   r   FFNNNNFTF)8r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r#   r%   r#   r&   r'   r(   r   r)   r'   r*   r   r+   r,   r-   r   r.   r   r/   r#   r0   r#   r1   r#   r2   r!   r3   r!   r4   r#   r5   r#   r6   r7   r8   r   r9   r   r:   r#   r;   r#   )NF)r   r#   r   r   r   r   )r   r#   r   r   N)r   r#   r   r   r   r   )r   r   r   r   )r6   r   r   r   )F)r   r#   r   r   NN)r   r   r   r   r   r   r   r   )r   r   r   r   )r   )r   r   r8   r   r   r   )r   r   r   r   r   r   r   r   )
r   r   r  r   r   r   r8   r   r   r   )r   r   r   r   )T)r   
__module____qualname____doc__rS   r   r   r   r   rx   r   r   r   r   r
  r  r  r   no_gradr'  __classcell__r}   r}   r{   r~   r   3   sn    9 P
! 


.0
3r   c                      s(   e Zd Zd fddZdddZ  ZS )!SanaMultiscaleAttentionProjectionin_channelsr   num_attention_headskernel_sizer   r   c              	     sP   t    d| }tj||||d |dd| _tj||dddd| dd| _d S )Nr   r   F)r  groupsr"   r<   r   )r3  r"   )rR   rS   r   Conv2dproj_inproj_out)ry   r0  r1  r2  channelsr{   r}   r~   rS   I  s   
"z*SanaMultiscaleAttentionProjection.__init__r   r   c                 C  s   |  |}| |}|S r(  )r5  r6  ry   r   r}   r}   r~   r   \  s   

z)SanaMultiscaleAttentionProjection.forward)r0  r   r1  r   r2  r   r   r   r   r   r   r   r   r*  r+  rS   r   r.  r}   r}   r{   r~   r/  H  s    r/  c                      sP   e Zd ZdZ							d&d' fddZd(dd Zd(d!d"Zd)d$d%Z  ZS )*SanaMultiscaleLinearAttentionz(Lightweight multi-scale linear attentionNr   r   
batch_norm   V瞯<Fr0  r   out_channelsr1  r   attention_head_dimmultr!   	norm_typestrkernel_sizestuple[int, ...]r2   r4   r#   c
                   s   t    ddlm}
 || _|| _|| _|	| _|d u r#t|| | n|}|| }t	j
||dd| _t	j
||dd| _t	j
||dd| _t	 | _|D ]}| jt||| qKt	 | _t	j
|dt|  |dd| _|
||d| _t | _d S )Nr<   )get_normalizationFrP   )num_features)rR   rS   rT   rG  r2   rA  rC  r4   r   r   rf   rg   rh   ri   rm   to_qkv_multiscalero   r/  ReLUnonlinearityr   rn   norm_outSanaMultiscaleAttnProcessor2_0r6   )ry   r0  r@  r1  rA  rB  rC  rE  r2   r4   rG  rU   r2  r{   r}   r~   rS   e  s*   



z&SanaMultiscaleLinearAttention.__init__r   r   r   r  r   c                 C  s~   t j|dddd}t||dd}t||}|jtjd}|d d d d d df |d d d d dd f | j  }|S )Nr   r   r   r<   constantr<   moder  rL   r   r   )ru   r  r   matmulr  r   float32r2   ry   r   r   r  scoresr   r}   r}   r~   apply_linear_attention  s   :z4SanaMultiscaleLinearAttention.apply_linear_attentionc                 C  sT   t |dd|}|jt jd}|t j|ddd| j  }t |||j}|S )NrL   r   rR  r   T)rN   keepdim)r   rS  r  r   rT  sumr2   r   rU  r}   r}   r~   apply_quadratic_attention  s
   z7SanaMultiscaleLinearAttention.apply_quadratic_attentionr   c                 C  s   |  | |S r(  r   r8  r}   r}   r~   r     s   z%SanaMultiscaleLinearAttention.forward)Nr   r   r<  r=  r?  F)r0  r   r@  r   r1  r   rA  r   rB  r!   rC  rD  rE  rF  r2   r!   r4   r#   )r   r   r   r   r  r   r   r   r9  )	r   r*  r+  r,  rS   rW  rZ  r   r.  r}   r}   r{   r~   r;  b  s    
+
	r;  c                      sD   e Zd Z										d#d$ fddZ		d%d&d!d"Z  ZS )'MochiAttentionr   r   r   FTNr   r   r   r*   r6   'MochiAttnProcessor2_0'r   r   r    r!   r"   r#   r+   r8   r   r9   r/   rZ   r2   c                   s  t    ddlm} |	d ur|	n|| | _|	d ur|	n|| _|
r#|
n|| _|| _|	d ur1|	| n|| _|||d| _	|||d| _
|||d| _|||d| _tj|| j|d| _tj|| j|d| _tj|| j|d| _tj|| j|d| _tj|| j|d| _| jd urtj|| j|d| _tg | _| jtj| j| j|d | jt| | jstj| j| j|d| _|| _d S )Nr<   )MochiRMSNormTrP   )rR   rS   rT   r]  rU   r8   r9   rZ   r   rb   rc   rr   rs   r   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   r6   )ry   r   r*   r6   r   r   r    r"   r+   r8   r9   r/   rZ   r2   r]  r{   r}   r~   rS     s0   


zMochiAttention.__init__r   r   r   r   r   c                 K  s   | j | |f||d|S )Nr   r   )ry   r   r   r   kwargsr}   r}   r~   r     s   zMochiAttention.forward)
r   r   r   FTNNTFr   )r   r   r*   r   r6   r\  r   r   r   r   r    r!   r"   r#   r+   r#   r8   r   r9   r   r/   r#   rZ   r#   r2   r!   r)  )r   r   r   r   r   r   r:  r}   r}   r{   r~   r[    s    4r[  c                   @  s&   e Zd ZdZdd Z	ddddZdS )MochiAttnProcessor2_0z"Attention processor used in Mochi.c                 C     t tds	tdd S )NrQ   zUMochiAttnProcessor2_0 requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0.rt   ru   ImportErrorry   r}   r}   r~   rS        
zMochiAttnProcessor2_0.__init__Nattn'MochiAttention'r   r   r   r   image_rotary_embr   r   c               	   C  sJ  | |}||}||}|d|jdf}|d|jdf}|d|jdf}|jd ur4||}|jd ur>||}||}	||}
|	|}|	d|jdf}	|
d|jdf}
|d|jdf}|j
d urr|
|	}	|jd ur|||
}
|d urdd }||g|R  }||g|R  }|dd|dd|dd}}}|	dd|
dd|dd}	}
}|d}|	d}|| }|j\}}}}g }t|D ]}|| d d d f }tj| dd }|	||d d d |d d f }|
||d d d |d d f }|||d d d |d d f }tj|||d  |gdd}tj|||d  |gdd}tj|||d  |gdd}tj|||d	dd
}|d}t|ddd|| f}|| qtj|dd}|dddd}|j||fdd\}}|jd |}|jd |}t|dr||}||fS )Nr   rL   c                 S  st   | ddd df   }| ddd df   }|| ||  | j}|| ||  | j}tj||gdddS )N.r   r   r<   rL   r  r   )r!   r   r   r   stackflatten)x	freqs_cos	freqs_sinx_evenx_oddcossinr}   r}   r~   apply_rotary_emb  s
   z8MochiAttnProcessor2_0.__call__.<locals>.apply_rotary_embr<   F)as_tupler  r   	dropout_pr;   r   r   rq   )rg   rh   ri   	unflattenr   rb   rc   rl   rj   rk   rr   rs   r  sizer   ranger   nonzerori  r  ru   rQ   r  ro   split_with_sizesrn   rt   rq   ) ry   re  r   r   r   rg  r   r   r  encoder_queryencoder_keyencoder_valuerq  sequence_lengthencoder_sequence_lengthtotal_lengthr   r   r   rN   attn_outputsidxmaskvalid_prompt_token_indicesvalid_encoder_queryvalid_encoder_keyvalid_encoder_valuevalid_query	valid_keyvalid_valueattn_outputvalid_sequence_lengthr}   r}   r~   r     sv   













	(





   



zMochiAttnProcessor2_0.__call__r(  )re  rf  r   r   r   r   r   r   rg  r   r   r   r   r*  r+  r,  rS   r   r}   r}   r}   r~   r_    s
    
r_  c                   @  s"   e Zd ZdZ			ddddZdS )rw   zJ
    Default processor for performing attention-related computations.
    Nre  r   r   r   r   r   r   tembr   c                 O  s  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}||||}|j	d urd|	|dddd}|
|}|d u rp|}n|jrx||}||}||}||}||}||}||||}t||}||}|jd |}|jd |}|
dkr|dd	||||}|jr||	 }||j }|S )
Nr   r\   The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.1.0.0r  r<   r   rL   r   )r   getr   ra   r   r   viewr  r  rO   rg   re   r  rh   ri   r   r
  r   bmmr   rn   r   r4   r3   )ry   re  r   r   r   r  argsr^  deprecation_messageresidual
input_ndimr   channelheightwidthr}  r   r   r   r  r	  r}   r}   r~   r   T  sH   











zAttnProcessor.__call__NNNre  r   r   r   r   r   r   r   r  r   r   r   r   r*  r+  r,  r   r}   r}   r}   r~   rw   O  s    rw   c                      @   e Zd ZdZ						dd fddZ		ddddZ  ZS )r   aK  
    Processor for implementing attention for the Custom Diffusion method.

    Args:
        train_kv (`bool`, defaults to `True`):
            Whether to newly train the key and value matrices corresponding to the text features.
        train_q_out (`bool`, defaults to `True`):
            Whether to newly train query matrices corresponding to the latent image features.
        hidden_size (`int`, *optional*, defaults to `None`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*, defaults to `None`):
            The number of channels in the `encoder_hidden_states`.
        out_bias (`bool`, defaults to `True`):
            Whether to include the bias parameter in `train_q_out`.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
    TNr   r   r#   r   r   r   r   r/   r    r!   c                      t    || _|| _|| _|| _| jr*tj|p||dd| _tj|p$||dd| _	| jrStj||dd| _
tg | _| jtj|||d | jt| d S d S NFrP   rR   rS   r   r   r   r   r   rf   r   to_v_custom_diffusionto_q_custom_diffusionrm   to_out_custom_diffusionro   rp   ry   r   r   r   r   r/   r    r{   r}   r~   rS        
	z%CustomDiffusionAttnProcessor.__init__re  r   r   r   r   r   r   r   c                 C  s  |j \}}}||||}| jr| ||jjj}n|||jjj}|d u r0d}	|}n
d}	|jr:|	|}| j
rd| || jjj}
| || jjj}|
|jjj}
||jjj}n
||}
||}|	rt|
}|d d d dd d f d |d d d dd d f< ||
 d| |
   }
|| d| |   }||}||
}
||}|||
|}t||}||}| jr| jd |}| jd |}|S |jd |}|jd |}|S )NFTr<   r   r   )r   r  r   r  r   rg   r   r   re   r  r   r   r  rh   ri   r   	ones_likedetachr   r
  r  r   r  rn   )ry   re  r   r   r   r   r}  r   r   	crossattnr   r  r  r	  r}   r}   r~   r     sH   



4



z%CustomDiffusionAttnProcessor.__call__TTNNTr   r   r#   r   r#   r   r   r   r   r/   r#   r    r!   r)  
re  r   r   r   r   r   r   r   r   r   r   r*  r+  r,  rS   r   r.  r}   r}   r{   r~   r     s    r   c                   @      e Zd ZdZ		ddddZdS )r   z
    Processor for performing attention-related computations with extra learnable key and value matrices for the text
    encoder.
    Nre  r   r   r   r   r   r   r   c                 O  s  t |dks|dd d urd}tdd| |}||jd |jd ddd}|j\}	}
}|||
|	}|d u r=|}n|jrE||}|	|dddd}|
|}||}||}||}||}||}|js||}||}||}||}tj||gdd}tj||gdd}n|}|}||||}t||}||}|jd |}|jd |}|dd	|j}|| }|S )
Nr   r\   r  r  r<   rL   r   r  r   )r   r  r   r  r   r  r  re   r  rO   rg   r   rj   rk   r1   rh   ri   r   r  r
  r  r   rn   r   )ry   re  r   r   r   r  r^  r  r  r   r}  r   r   encoder_hidden_states_key_proj encoder_hidden_states_value_projr   r  r	  r}   r}   r~   r     sF   	"











zAttnAddedKVProcessor.__call__r)  r  r  r}   r}   r}   r~   r     s
    	r   c                   @  (   e Zd ZdZdd Z		ddddZdS )r   z
    Processor for performing scaled dot-product attention (enabled by default if you're using PyTorch 2.0), with extra
    learnable key and value matrices for the text encoder.
    c                 C  r`  )NrQ   zWAttnAddedKVProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS   H  
   
z AttnAddedKVProcessor2_0.__init__Nre  r   r   r   r   r   r   r   c                 O  s  t |dks|dd d urd}tdd| |}||jd |jd ddd}|j\}	}
}|j||
|	dd	}|d u r?|}n|jrG||}|	|dddd}|
|}|j|dd	}||}||}|j|dd	}|j|dd	}|js||}||}|j|dd	}|j|dd	}tj||gdd
}tj||gdd
}n|}|}tj||||ddd}|dd|	d|jd }|jd |}|jd |}|dd|j}|| }|S )Nr   r\   r  r  r<   rL   r   r  )r8   r  r   F	attn_maskrt  r;   r   )r   r  r   r  r   r  r  re   r  rO   rg   r   rj   rk   r1   rh   ri   r   r  ru   rQ   r   rn   )ry   re  r   r   r   r  r^  r  r  r   r}  r   r   r  r  r   r  r}   r}   r~   r   N  sH   	"





z AttnAddedKVProcessor2_0.__call__r)  r  r  r}   r}   r}   r~   r   B  s    
r   c                   @  r  )r   YAttention processor used typically in processing the SD3-like self-attention projections.c                 C  r`  )NrQ   zUJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     rd  zJointAttnProcessor2_0.__init__Nre  r   r   torch.FloatTensorr   r   torch.FloatTensor | Noner   c                 O  sH  |}|j d }||}	||}
||}|
j d }||j }|	|d|j|dd}	|
|d|j|dd}
||d|j|dd}|jd urQ||	}	|jd ur[||
}
|d ur|	|}|
|}||}||d|j|dd}||d|j|dd}||d|j|dd}|jd ur||}|jd ur||}tj|	|gdd}	tj|
|gdd}
tj||gdd}tj|	|
|ddd}|dd|d|j| }||	j}|d ur|d d d |j d f |d d |j d d f }}|js||}|jd |}|jd |}|d ur"||fS |S )	Nr   rL   r<   r   r  r   Frs  )r   rg   rh   ri   r   r  r  rb   rc   rl   rj   rk   rr   rs   r   r  ru   rQ   r   r   r   rZ   rq   rn   )ry   re  r   r   r   r  r^  r  r   r   r   r  rU   head_dim encoder_hidden_states_query_projr  r  r}   r}   r~   r     sl   	






















zJointAttnProcessor2_0.__call__r)  
re  r   r   r  r   r  r   r  r   r  r  r}   r}   r}   r~   r         r   c                   @  r  )PAGJointAttnProcessor2_0r  c                 C  r`  )NrQ   zXPAGJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     r  z!PAGJointAttnProcessor2_0.__init__Nre  r   r   r  r   r   r  r   c           !      C  s  |}|j }|dkr|j\}}}	}
||||	|
 dd}|j }|dkr8|j\}}}	}
||||	|
 dd}|jd }|d\}}|d\}}|jd }||}||}||}||}|	|}|
|}tj||gdd}tj||gdd}tj||gdd}|jd }||j }||d|j|dd}||d|j|dd}||d|j|dd}tj|||ddd	}|dd|d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js
||}|dkr|dd
|||	|
}|dkr,|dd
|||	|
}|jd }||}||}||}||}|	|}|
|}tj||gdd}tj||gdd}tj||gdd}|jd }||j }||d|j|dd}||d|j|dd}||d|j|dd}|d}tj||f|j|jd} td| d |d |f< | d |d |f d | dd} tj|||| ddd}|dd|d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js||}|dkr/|dd
|||	|
}|dkr@|dd
|||	|
}t||g}t||g}||fS )Nr  r<   r   r   r  rL   r   Frs  r   r   -infr  )r   r   r  r  chunkrg   rh   ri   rl   rj   rk   r   r  r   ru   rQ   r   r   r   rn   rZ   rq   rv  r  r   r!   fill_diagonal_r  )!ry   re  r   r   r   r  r  r   r  r  r  context_input_ndimidentity_block_sizehidden_states_orghidden_states_ptbencoder_hidden_states_orgencoder_hidden_states_ptb	query_orgkey_org	value_org$encoder_hidden_states_org_query_proj"encoder_hidden_states_org_key_proj$encoder_hidden_states_org_value_projrU   r  	query_ptbkey_ptb	value_ptb$encoder_hidden_states_ptb_query_proj"encoder_hidden_states_ptb_key_proj$encoder_hidden_states_ptb_value_projr   	full_maskr}   r}   r~   r     s   


























z!PAGJointAttnProcessor2_0.__call__r)  r  r  r}   r}   r}   r~   r        
r  c                   @  r  )PAGCFGJointAttnProcessor2_0r  c                 C  r`  )NrQ   z[PAGCFGJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     r  z$PAGCFGJointAttnProcessor2_0.__init__Nre  r   r   r  r   r   r  r   c           %      O  s  |}|j }|dkr|j\}	}
}}||	|
|| dd}|j }|dkr8|j\}	}
}}||	|
|| dd}|jd }|d\}}}t||g}|d\}}}t||g}|jd }	||}||}|	|}|
|}||}||}tj||gdd}tj||gdd}tj||gdd}|jd }||j }||	d|j|dd}||	d|j|dd}||	d|j|dd}tj|||dd	d
}|dd|	d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js||}|dkr+|dd|	|
||}|dkr<|dd|	|
||}|jd }	||}||}|	|}|
|} ||}!||}"tj|| gdd}tj||!gdd}tj||"gdd}|jd }||j }||	d|j|dd}||	d|j|dd}||	d|j|dd}|d}#tj|#|#f|j|jd}$td|$d |d |f< |$d |d |f d |$dd}$tj||||$dd	d}|dd|	d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js.||}|dkr?|dd|	|
||}|dkrP|dd|	|
||}t||g}t||g}||fS )Nr  r<   r   r   r   r  rL   r   Frs  r   r   r  r  )r   r   r  r  r  r   r  rg   rh   ri   rl   rj   rk   r   ru   rQ   r   r   r   rn   rZ   rq   rv  r  r   r!   r  r  )%ry   re  r   r   r   r  r^  r  r  r   r  r  r  r  r  hidden_states_uncondr  r  encoder_hidden_states_uncondr  r  r  r  r  r  r  r  rU   r  r  r  r  r  r  r  r   r  r}   r}   r~   r     s   	

























z$PAGCFGJointAttnProcessor2_0.__call__r)  r  r  r}   r}   r}   r~   r    r  r  c                   @  r  )FusedJointAttnProcessor2_0r  c                 C  r`  NrQ   zPAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS   (  rd  z#FusedJointAttnProcessor2_0.__init__Nre  r   r   r  r   r   r  r   c                 O  sZ  |}|j }|dkr|j\}	}
}}||	|
|| dd}|j }|dkr8|j\}	}
}}||	|
|| dd}|jd }	||}|jd d }tj||dd\}}}||}|jd d }tj||dd\}}}tj||gdd}tj||gdd}tj||gdd}|jd }||j	 }||	d|j	|dd}||	d|j	|dd}||	d|j	|dd}t
j|||dd	d
}|dd|	d|j	| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js||}|dkr|dd|	|
||}|dkr)|dd|	|
||}||fS )Nr  r<   r   r   rL   r   r  r   Frs  r   )r   r   r  r  r  r   splitr!  r  r   ru   rQ   r   r   r   rn   rZ   rq   )ry   re  r   r   r   r  r^  r  r  r   r  r  r  r  qkv
split_sizer   r   r  encoder_qkvr  r  r  rU   r  r}   r}   r~   r   ,  sV   	







z#FusedJointAttnProcessor2_0.__call__r)  r  r  r}   r}   r}   r~   r  %  r  r  c                   @  ,   e Zd ZdZddddZ		ddddZdS )r     
    Processor for implementing memory efficient attention using xFormers.

    Args:
        attention_op (`Callable`, *optional*, defaults to `None`):
            The base
            [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to
            use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best
            operator.
    Nr   r   c                 C  
   || _ d S r(  r   ry   r   r}   r}   r~   rS        
z#XFormersJointAttnProcessor.__init__re  r   r   r  r   r   r  r   c                 O  s  |}| |}||}	||}
|| }||	 }	||
 }
|jd ur0||}|jd ur:||	}	|d ur||}||}|	|}|| }|| }|| }|j
d url|
|}|jd urv||}tj||gdd}tj|	|gdd}	tj|
|gdd}
tjj||	|
|| j|jd}||j}||}|d ur|d d d |jd f |d d |jd d f }}|js||}|jd |}|jd |}|d ur||fS |S )Nr<   r  	attn_biasopr\   r   )rg   rh   ri   r   
contiguousrb   rc   rl   rj   rk   rr   rs   r   r  r   r   r   r   r\   r   r   r   r   rZ   rq   rn   )ry   re  r   r   r   r  r^  r  r   r   r  r  r  r  r}   r}   r~   r     sR   	















z#XFormersJointAttnProcessor.__call__r(  r   r   r)  r  r  r}   r}   r}   r~   r   t      r   c                   @  ,   e Zd ZdZdd Z				ddddZdS )AllegroAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the Allegro model. It applies a normalization layer and rotary embedding on the query and key vector.
    c                 C  r`  )NrQ   zWAllegroAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     r  z AllegroAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   r  rg  r   c                 C  s6  |}|j d ur| ||}|j}|dkr(|j\}	}
}}||	|
|| dd}|d u r/|jn|j\}	}}|d urL||||	}||	|jd|jd }|jd ur^||dddd}||}|d u rj|}n|j	rr|
|}||}||}|jd }||j }||	d|j|dd}||	d|j|dd}||	d|j|dd}|d ur|jsddlm} |||d |d }|||d |d }tj||||ddd	}|dd|	d|j| }||j}|jd |}|jd |}|dkr|dd
|	|
||}|jr|| }||j }|S )Nr  r<   r   rL   )apply_rotary_emb_allegror   r   Fr  r   )ra   r   r   r  r  r  r   rO   rg   re   r  rh   ri   rX   
embeddingsr  ru   rQ   r   r   r   rn   r4   r3   )ry   re  r   r   r   r  rg  r  r  r   r  r  r  r}  r   r   r   r  rU   r  r  r}   r}   r~   r     sV   	









z AllegroAttnProcessor2_0.__call__NNNNre  r   r   r   r   r   r   r   r  r   rg  r   r   r   r  r}   r}   r}   r~   r    s    
r  c                   @  &   e Zd ZdZdd Z	ddddZdS )AuraFlowAttnProcessor2_0z;Attention processor used typically in processing Aura Flow.c                 C  $   t tdstddrtdd S d S )NrQ   r   2.1zAuraFlowAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to at least 2.1 or above as we use `scale` in `F.scaled_dot_product_attention()`. rt   ru   r   rb  rc  r}   r}   r~   rS   *  
   z!AuraFlowAttnProcessor2_0.__init__Nre  r   r   r  r   r   c                 O  sD  |j d }||}||}||}	|d ur'||}
||}||}|j d }||j }||d|j|}||d|j|}|	|d|j|}	|j	d urV|	|}|j
d ur`|
|}|d ur|
|d|j|}
||d|j|}||d|j|}|jd ur||
}
|jd ur||}tj|
|gdd}tj||gdd}tj||	gdd}	|dd}|dd}|	dd}	tj|||	d|jdd}|dd|d|j| }||j}|d ur|d d |j d d f |d d d |j d f }}|jd |}|jd |}|d ur||}|d ur ||fS |S )	Nr   rL   r<   r  r   r   Frt  r\   r;   )r   rg   rh   ri   rl   rj   rk   r   r  rb   rc   rr   rs   r   r  r  ru   rQ   r\   r   r   r   rn   rq   )ry   re  r   r   r  r^  r   r   r   r  r  r  r  rU   r  r}   r}   r~   r   0  sf   





















z!AuraFlowAttnProcessor2_0.__call__r(  re  r   r   r  r   r  r   r  r  r}   r}   r}   r~   r  '  
    
r  c                   @  r  )FusedAuraFlowAttnProcessor2_0zRAttention processor used typically in processing Aura Flow with fused projections.c                 C  r  )NrQ   r   r  zFusedAuraFlowAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to at least 2.1 or above as we use `scale` in `F.scaled_dot_product_attention()`. r  rc  r}   r}   r~   rS     r  z&FusedAuraFlowAttnProcessor2_0.__init__Nre  r   r   r  r   r   c                 O  sf  |j d }||}|j d d }tj||dd\}	}
}|d ur7||}|j d d }tj||dd\}}}|
j d }||j }|	|d|j|}	|
|d|j|}
||d|j|}|jd urf||	}	|jd urp||
}
|d ur||d|j|}||d|j|}||d|j|}|j	d ur|	|}|j
d ur|	|}tj||	gdd}	tj||
gdd}
tj||gdd}|	dd}	|
dd}
|dd}tj|	|
|d|jdd	}|dd|d|j| }||	j}|d ur|d d |j d d f |d d d |j d f }}|jd |}|jd |}|d ur(||}|d ur1||fS |S )
Nr   rL   r   r  r<   r   r   Fr  )r   r  r   r  r!  r   r  rb   rc   rr   rs   r  r  ru   rQ   r\   r   r   r   rn   rq   )ry   re  r   r   r  r^  r   r  r  r   r   r  r  r  r  r  rU   r  r}   r}   r~   r     sn   


















z&FusedAuraFlowAttnProcessor2_0.__call__r(  r  r  r}   r}   r}   r~   r    r  r  c                   @  (   e Zd ZdZdd Z		ddddZdS )CogVideoXAttnProcessor2_0
    Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
    query and key vectors, but does not include spatial normalization.
    c                 C  r`  NrQ   zVCogVideoXAttnProcessor requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     rd  z"CogVideoXAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   rg  r   c                 C  s  | d}tj||gdd}|j\}}}	|d ur+||||}|||jd|jd }||}
||}|	|}|jd }||j }|
|d|j|
dd}
||d|j|
dd}||d|j|
dd}|jd uru||
}
|jd ur||}|d urddlm} ||
d d d d |d f ||
d d d d |d f< |js||d d d d |d f ||d d d d |d f< tj|
|||ddd}|
dd|d|j| }|jd	 |}|jd |}|j|| d| gdd\}}||fS )
Nr<   r  rL   r   rq  r   Fr  r   )rv  r   r  r   r  r  r   rg   rh   ri   r  rb   rc   r  rq  rX   ru   rQ   r   rn   r  )ry   re  r   r   r   rg  text_seq_lengthr   r}  r   r   r   r  rU   r  rq  r}   r}   r~   r     sB   









66
z"CogVideoXAttnProcessor2_0.__call__r)  re  r   r   r   r   r   r   r   rg  r   r   r   r  r}   r}   r}   r~   r        	r  c                   @  r  )FusedCogVideoXAttnProcessor2_0r  c                 C  r`  r  ra  rc  r}   r}   r~   rS   0	  rd  z'FusedCogVideoXAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   rg  r   c                 C  s  | d}tj||gdd}|d u r|jn|j\}}}	|d ur2||||}|||jd|jd }||}
|
jd d }tj|
|dd\}}}|jd }||j }||d|j|	dd}||d|j|	dd}||d|j|	dd}|j
d ur|
|}|jd ur||}|d urddlm} ||d d d d |d f ||d d d d |d f< |js||d d d d |d f ||d d d d |d f< tj||||ddd	}|	dd|d|j| }|jd
 |}|jd |}|j|| d| gdd\}}||fS )Nr<   r  rL   r   r   r  r   Fr  r   )rv  r   r  r   r  r  r   r  r  r  rb   rc   r  rq  rX   ru   rQ   r   rn   )ry   re  r   r   r   rg  r  r   r}  r   r  r  r   r   r  rU   r  rq  r}   r}   r~   r   4	  sD   







66
z'FusedCogVideoXAttnProcessor2_0.__call__r)  r  r  r}   r}   r}   r~   r  *	  r  r  c                   @  r  )r   r  Nr   r   c                 C  r  r(  r   r  r}   r}   r~   rS   }	  r  z%XFormersAttnAddedKVProcessor.__init__re  r   r   r   r   r   r   r   c                 C  s~  |}| |jd |jd ddd}|j\}}}||||}|d u r'|}n|jr/||}||dddd}||}	||	}	|	|}
|
|}||
}
||}|js||}||}||}||}tj|
|gdd}tj||gdd}n|
}|}tjj|	|||| j|jd}||	j}||}|jd |}|jd |}|dd|j}|| }|S )Nr   r<   rL   r   r  r  r   )r  r   r  r  re   r  rO   rg   r   rj   rk   r1   rh   ri   r   r  r   r   r   r   r\   r   r   r   rn   r   )ry   re  r   r   r   r  r   r}  r   r   r  r  r   r  r}   r}   r~   r   	  sD   "











z%XFormersAttnAddedKVProcessor.__call__r(  r  r)  r  r  r}   r}   r}   r~   r   q	  r  r   c                   @  .   e Zd ZdZddddZ			ddddZdS )r   r  Nr   r   c                 C  r  r(  r   r  r}   r}   r~   rS   	  r  zXFormersAttnProcessor.__init__re  r   r   r   r   r   r   r  r   c                 O  s  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}||||}|d urc|j\}}}|	d|d}|j
d uru|
|dddd}||}|d u r|}n|jr||}||}||}|| }|| }|| }tjj||||| j|jd	}||j}||}|jd |}|jd |}|
dkr|dd
||||}|jr||	 }||j }|S )Nr   r\   r  r  r  r<   r   rL   r  r   )r   r  r   ra   r   r   r  r  r  expandrO   rg   re   r  rh   ri   r   r  r   r   r   r   r\   r   r   r   rn   r   r4   r3   )ry   re  r   r   r   r  r  r^  r  r  r  r   r  r  r  
key_tokensr   query_tokensr   r   r  r}   r}   r~   r   	  sR   








zXFormersAttnProcessor.__call__r(  r  r  r  r  r}   r}   r}   r~   r   	  s    r   c                   @  *   e Zd ZdZdd Z			ddddZdS )r   a  
    Processor for implementing flash attention using torch_npu. Torch_npu supports only fp16 and bf16 data types. If
    fp32 is used, F.scaled_dot_product_attention will be used for computation, but the acceleration effect on NPU is
    not significant.

    c                 C  s   t  stdd S )NzTAttnProcessorNPU requires torch_npu extensions and is supported only on npu devices.)r
   rb  rc  r}   r}   r~   rS   
  s   zAttnProcessorNPU.__init__Nre  r   r   r   r   r   r   r  r   c                 O  s  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}|d ur||||}|||j	d|jd }|
dd|jd d}|jtjkr{t| }n| }|jd ur||dddd}||}|d u r|}n|jr||}||}||}|jd }||j	 }||d|j	|dd}||d|j	|dd}||d|j	|dd}|jtjtjfv rtj||||j	d	d |d
t|jd  ddd
dddd }ntj||||ddd}|dd|d|j	| }||j}|jd |}|jd |}|
dkrF|dd||||}|j rN||	 }||j! }|S )Nr   r\   r  r  r  r<   r   rL   BNSDr   i   F)	input_layoutpse
atten_maskr\   pre_tockensnext_tockens	keep_probsyncinner_preciser   r  r   )"r   r  r   ra   r   r   r  r  r  r   repeatr   r   r#   logical_notrO   rg   re   r  rh   ri   float16bfloat16	torch_npunpu_fusion_attentionmathsqrtru   rQ   r   r   rn   r4   r3   ry   re  r   r   r   r  r  r^  r  r  r  r   r  r  r  r}  r   r   r   r  rU   r  r}   r}   r~   r   "
  s   










zAttnProcessorNPU.__call__r  r  r  r}   r}   r}   r~   r   
  s    r   c                   @  r  )rv   s
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
    c                 C  r`  r  ra  rc  r}   r}   r~   rS   
  rd  zAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   r  r   c                 O  sH  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}|d urb||||}|||j	d|jd }|j
d urt|
|dddd}||}|d u r|}n|jr||}||}||}|jd }||j	 }||d|j	|dd}||d|j	|dd}||d|j	|dd}|jd ur||}|jd ur||}tj||||d	d
d}|dd|d|j	| }||j}|jd |}|jd |}|
dkr|dd||||}|jr||	 }||j }|S )Nr   r\   r  r  r  r<   r   rL   r   Fr  r   )r   r  r   ra   r   r   r  r  r  r   rO   rg   re   r  rh   ri   rb   rc   ru   rQ   r   r   r   rn   r4   r3   r  r}   r}   r~   r   
  s\   














zAttnProcessor2_0.__call__r  r  r  r}   r}   r}   r~   rv   
  s    rv   c                   @  r   )r   z
    Processor for implementing scaled dot-product attention with pallas flash attention kernel if using `torch_xla`.
    Nr   r   c                 C  sF   t tds	tdtddrtdt rtddrtd|| _d S )NrQ   zXXLAFlashAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r   r   6XLA flash attention requires torch_xla version >= 2.3.r   DSPMD support for XLA flash attention needs torch_xla version >= 2.4.)rt   ru   rb  r   r   r   )ry   r   r}   r}   r~   rS   
  s   


z!XLAFlashAttnProcessor2_0.__init__re  r   r   r   r   r   r   r  r   c                 O  s  |}|j d ur| ||}|j}	|	dkr(|j\}
}}}||
||| dd}|d u r/|jn|j\}
}}|d urL||||
}||
|jd|jd }|jd ur^||dddd}||}|d u rj|}n|j	rr|
|}||}||}|jd }||j }||
d|j|dd}||
d|j|dd}||
d|j|dd}|jd ur||}|jd ur||}tdd |||fD r|d ur||
dd|jd }| |dktd|dktd	}|| }|t|jd
  }t r| jnd }t|||d|d}ntd tj||||d	dd}|dd|
d|j| }||j}|jd |}|jd |}|	dkrU|dd|
|||}|jr]|| }||j  }|S )Nr  r<   r   rL   c                 s  s    | ]
}|j d  dkV  qdS )r   i   N)r   )r   r   r}   r}   r~   	<genexpr>0  s    z4XLAFlashAttnProcessor2_0.__call__.<locals>.<genexpr>r   r  r   r   F)causalr   z[Unable to use the flash attention pallas kernel API call due to QKV sequence length < 4096.r  r   )!ra   r   r   r  r  r  r   rO   rg   re   r  rh   ri   rb   rc   allr!   masked_fillr  r  r   r   r   r   r   ru   rQ   r   r   r   rn   r4   r3   )ry   re  r   r   r   r  r  r^  r  r  r   r  r  r  r}  r   r   r   r  rU   r  r   r}   r}   r~   r   
  sr   














z!XLAFlashAttnProcessor2_0.__call__r(  )r   r   r  r  r  r}   r}   r}   r~   r   
  s    r   c                   @  r  )MochiVaeAttnProcessor2_0z0
    Attention processor used in Mochi VAE.
    c                 C  r`  r  ra  rc  r}   r}   r~   rS   _  rd  z!MochiVaeAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   r   c                 C  s  |}|j d dk}|d u r|j n|j \}}}	|d ur-||||}|||jd|j d }|rP||}|jd |}|jd |}|jrI|| }||j }|S ||}
|d u r[|}|	|}||}|j d }||j }|
|d|j|
dd}
||d|j|
dd}||d|j|
dd}|jd ur||
}
|jd ur||}tj|
|||d|jd}|
dd|d|j| }||
j}|jd |}|jd |}|jr|| }||j }|S )Nr<   rL   r   r   r   r  )r   r  r  r   ri   rn   r4   r3   rg   rh   r  rb   rc   ru   rQ   r;   r   r   r   )ry   re  r   r   r   r  is_single_framer   r}  r   r   r   r  rU   r  r}   r}   r~   r   c  sP   











z!MochiVaeAttnProcessor2_0.__call__r)  r  r  r}   r}   r}   r~   r  Z  s    r  c                   @  s4   e Zd ZdZdd Zdd	d
Z			ddddZdS )StableAudioAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the Stable Audio model. It applies rotary embedding on query and key vector, and allows MHA, GQA or MQA.
    c                 C  r`  )NrQ   z[StableAudioAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     r  z$StableAudioAttnProcessor2_0.__init__rj  r   	freqs_cistuple[torch.Tensor]r   c           	      C  sb   ddl m} |d jd }|dd |f |d|d f }}|||ddd}tj||fdd	}|S )
Nr<   r  r   rL   .Tr   use_realuse_real_unbind_dimr  )r  rq  r   r   r  )	ry   rj  r"  rq  rot_dimx_to_rotatex_unrotated	x_rotatedoutr}   r}   r~   apply_partial_rotary_emb  s   "z4StableAudioAttnProcessor2_0.apply_partial_rotary_embNre  r   r   r   r   r   
rotary_embc                 C  s   ddl m} |}|j}|dkr#|j\}	}
}}||	|
|| dd}|d u r*|jn|j\}	}}|d urG||||	}||	|jd|jd }||}|d u rS|}n|j	r[|
|}||}||}|jd |j }|jd | }||	d|j|dd}||	d||dd}||	d||dd}||jkr|j| }tj||d|jd | d}tj||d|jd | d}|jd ur||}|jd ur||}|d urC|j}|j}|tj}|tj}|d jd }|dd |f |d|d f }}|||d	d
d}tj||fdd}|js9|dd |f |d|d f }}|||d	d
d}tj||fdd}||}||}tj||||ddd}|dd|	d|j| }||j}|jd |}|jd |}|dkr|dd
|	|
||}|jr|| }||j }|S )Nr<   r  r  r   rL   r  r   .Tr   r$  r  r   Fr  )r  rq  r   r   r  r  r  r   rg   re   r  rh   ri   r   r  rb   rc   r   r   rT  r  rX   ru   rQ   r   rn   r4   r3   )ry   re  r   r   r   r-  rq  r  r  r   r  r  r  r}  r   r   r   r  r  r   heads_per_kv_headquery_dtype	key_dtyper'  query_to_rotatequery_unrotatedquery_rotatedkey_to_rotatekey_unrotatedkey_rotatedr}   r}   r~   r     sz   










""



z$StableAudioAttnProcessor2_0.__call__)rj  r   r"  r#  r   r   r  )re  r   r   r   r   r   r   r   r-  r   r   r   )r   r*  r+  r,  rS   r,  r   r}   r}   r}   r~   r!    s    
r!  c                   @  r  )HunyuanAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on query and key vector.
    c                 C  r`  r  ra  rc  r}   r}   r~   rS   :  rd  z HunyuanAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   r  rg  r   c                 C  sJ  ddl m} |}|jd ur|||}|j}	|	dkr.|j\}
}}}||
||| dd}|d u r5|jn|j\}
}}|d urR||||
}||
|jd|jd }|j	d urd|	|dddd}|
|}|d u rp|}n|jrx||}||}||}|jd }||j }||
d|j|dd}||
d|j|dd}||
d|j|dd}|jd ur||}|jd ur||}|d ur|||}|js|||}tj||||ddd}|dd|
d|j| }||j}|jd	 |}|jd |}|	dkr|dd
|
|||}|jr|| }||j }|S Nr<   r  r  r   rL   r   Fr  r   r   )r  rq  ra   r   r   r  r  r  r   rO   rg   re   r  rh   ri   rb   rc   rX   ru   rQ   r   r   r   rn   r4   r3   )ry   re  r   r   r   r  rg  rq  r  r  r   r  r  r  r}  r   r   r   r  rU   r  r}   r}   r~   r   >  s`   	















z HunyuanAttnProcessor2_0.__call__r  r  r  r}   r}   r}   r~   r7  4  s    r7  c                   @  r  )FusedHunyuanAttnProcessor2_0a  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0) with fused
    projection layers. This is used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on
    query and key vector.
    c                 C  r`  )NrQ   z\FusedHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     r  z%FusedHunyuanAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   r  rg  r   c                 C  s  ddl m} |}|jd ur|||}|j}	|	dkr.|j\}
}}}||
||| dd}|d u r5|jn|j\}
}}|d urR||||
}||
|jd|jd }|j	d urd|	|dddd}|d u r|
|}|jd d }tj||dd\}}}n#|jr||}||}||}|jd d }tj||dd\}}|jd }||j }||
d|j|dd}||
d|j|dd}||
d|j|dd}|jd ur||}|jd ur||}|d ur|||}|js|||}tj||||dd	d
}|dd|
d|j| }||j}|jd |}|jd |}|	dkr7|dd|
|||}|jr?|| }||j }|S )Nr<   r  r  r   rL   r   r  r   Fr  r   r   )r  rq  ra   r   r   r  r  r  r   rO   r  r   r  re   r  rg   r  rb   rc   rX   ru   rQ   r   r   r   rn   r4   r3   )ry   re  r   r   r   r  rg  rq  r  r  r   r  r  r  r}  r   r  r  r   r   r  kvrU   r  r}   r}   r~   r     sf   	















z%FusedHunyuanAttnProcessor2_0.__call__r  r  r  r}   r}   r}   r~   r9        
r9  c                   @  r  )PAGHunyuanAttnProcessor2_0_  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the HunyuanDiT model. It applies a normalization layer and rotary embedding on query and key vector. This
    variant of the processor employs [Pertubed Attention Guidance](https://huggingface.co/papers/2403.17377).
    c                 C  r`  )NrQ   zZPAGHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     r  z#PAGHunyuanAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   r  rg  r   c                 C  s  ddl m} |}|jd ur|||}|j}	|	dkr.|j\}
}}}||
||| dd}|d\}}|d u r<|jn|j\}
}}|d urY||||
}||
|j	d|jd }|j
d urk|
|dddd}||}|d u rw|}n|jr||}||}||}|jd }||j	 }||
d|j	|dd}||
d|j	|dd}||
d|j	|dd}|jd ur||}|jd ur||}|d ur|||}|js|||}tj||||ddd}|dd|
d|j	| }||j}|jd	 |}|jd |}|	dkr|dd
|
|||}|j
d ur0|
|dddd}||}||j}|jd	 |}|jd |}|	dkrZ|dd
|
|||}t||g}|jri|| }||j }|S r8  )r  rq  ra   r   r   r  r  r  r  r   rO   rg   re   r  rh   ri   rb   rc   rX   ru   rQ   r   r   r   rn   r   r  r4   r3   )ry   re  r   r   r   r  rg  rq  r  r  r   r  r  r  r  r  r}  r   r   r   r  rU   r  r}   r}   r~   r   
  st   	

















z#PAGHunyuanAttnProcessor2_0.__call__r  r  r  r}   r}   r}   r~   r<    r;  r<  c                   @  r  )PAGCFGHunyuanAttnProcessor2_0r=  c                 C  r`  )NrQ   z]PAGCFGHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     r  z&PAGCFGHunyuanAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   r  rg  r   c                 C  s  ddl m} |}|jd ur|||}|j}	|	dkr.|j\}
}}}||
||| dd}|d\}}}t	||g}|d u rD|jn|j\}
}}|d ura|
|||
}||
|jd|jd }|jd urs||dddd}||}|d u r|}n|jr||}||}||}|jd }||j }||
d|j|dd}||
d|j|dd}||
d|j|dd}|jd ur||}|jd ur||}|d ur|||}|js|||}tj||||ddd	}|dd|
d|j| }||j}|jd
 |}|jd |}|	dkr%|dd|
|||}|jd ur8||dddd}||}||j}|jd
 |}|jd |}|	dkrb|dd|
|||}t	||g}|jrq|| }||j }|S )Nr<   r  r  r   r   rL   r   Fr  r   r   )r  rq  ra   r   r   r  r  r  r   r  r  r   rO   rg   re   r  rh   ri   rb   rc   rX   ru   rQ   r   r   r   rn   r4   r3   )ry   re  r   r   r   r  rg  rq  r  r  r   r  r  r  r  r  r  r}  r   r   r   r  rU   r  r}   r}   r~   r     sv   	

















z&PAGCFGHunyuanAttnProcessor2_0.__call__r  r  r  r}   r}   r}   r~   r>  x  r;  r>  c                   @  s,   e Zd ZdZdd Z				ddddZdS )LuminaAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the LuminaNextDiT model. It applies a s normalization layer and rotary embedding on query and key vector.
    c                 C  r`  r  ra  rc  r}   r}   r~   rS     rd  zLuminaAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   query_rotary_embkey_rotary_embbase_sequence_lengthr   r   c                 C  s0  ddl m} |j}	|	dkr!|j\}
}}}||
||| dd}|j\}
}}||}||}||}|jd }|jd }||j	 }|j
}|| }|jd urV||}|jd ur`||}||
d|j	|}||
d||}||
d||}|d ur|||dd}|d ur|||dd}||||}}|d u rd }n|d urtt|||j }n|j}|j	| }|dkr|dddd|ddd}|dddd|ddd}| |
ddd}|d|j	|d}|dd}|dd}|dd}tj|||||d	}|dd|}|S )
Nr<   r  r  r   rL   F)r%  r   )r  r\   )r  rq  r   r   r  r  rg   rh   ri   r   r   rb   rc   r   r  r  logr\   r  r  ri  r#   r  ru   rQ   )ry   re  r   r   r   r@  rA  rB  rq  r  r   r  r  r  r}  r   r   r   r  r   rU   r  r   r   softmax_scalen_repr}   r}   r~   r     sZ   











  
zLuminaAttnProcessor2_0.__call__r  )re  r   r   r   r   r   r   r   r@  r   rA  r   rB  r   r   r   r  r}   r}   r}   r~   r?    s    	r?  c                   @  r  )FusedAttnProcessor2_0u  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). It uses
    fused projection layers. For self-attention modules, all projection matrices (i.e., query, key, value) are fused.
    For cross-attention modules, key and value projection matrices are fused.

    > [!WARNING] > This API is currently 🧪 experimental in nature and can change in future.
    c                 C  r`  )NrQ   z`FusedAttnProcessor2_0 requires at least PyTorch 2.0, to use it. Please upgrade PyTorch to > 2.0.ra  rc  r}   r}   r~   rS   ]  r  zFusedAttnProcessor2_0.__init__Nre  r   r   r   r   r   r   r  r   c                 O  s  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}|d urb||||}|||j	d|jd }|j
d urt|
|dddd}|d u r||}|jd d	 }tj||dd
\}}}n#|jr||}||}||}|jd d }tj||dd
\}}|jd }||j	 }||d|j	|dd}||d|j	|dd}||d|j	|dd}|jd ur||}|jd ur||}tj||||ddd}|dd|d|j	| }||j}|jd |}|jd |}|
dkr6|dd||||}|jr>||	 }||j }|S )Nr   r\   r  r  r  r<   r   rL   r   r  r   Fr  r   )r   r  r   ra   r   r   r  r  r  r   rO   r  r   r  re   r  rg   r  rb   rc   ru   rQ   r   r   r   rn   r4   r3   )ry   re  r   r   r   r  r  r^  r  r  r  r   r  r  r  r}  r   r  r  r   r   r  r:  rU   r  r}   r}   r~   r   c  sb   














zFusedAttnProcessor2_0.__call__r  r  r  r}   r}   r}   r~   rF  T  s    
rF  c                      sB   e Zd ZdZ							dd fddZ		dd ddZ  ZS )!r   az  
    Processor for implementing memory efficient attention using xFormers for the Custom Diffusion method.

    Args:
    train_kv (`bool`, defaults to `True`):
        Whether to newly train the key and value matrices corresponding to the text features.
    train_q_out (`bool`, defaults to `True`):
        Whether to newly train query matrices corresponding to the latent image features.
    hidden_size (`int`, *optional*, defaults to `None`):
        The hidden size of the attention layer.
    cross_attention_dim (`int`, *optional*, defaults to `None`):
        The number of channels in the `encoder_hidden_states`.
    out_bias (`bool`, defaults to `True`):
        Whether to include the bias parameter in `train_q_out`.
    dropout (`float`, *optional*, defaults to 0.0):
        The dropout probability to use.
    attention_op (`Callable`, *optional*, defaults to `None`):
        The base
        [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to use
        as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best operator.
    TFNr   r   r#   r   r   r   r   r/   r    r!   r   r   c                   s   t    || _|| _|| _|| _|| _| jr-tj|p||dd| _	tj|p'||dd| _
| jrVtj||dd| _tg | _| jtj|||d | jt| d S d S r  )rR   rS   r   r   r   r   r   r   rf   r   r  r  rm   r  ro   rp   )ry   r   r   r   r   r/   r    r   r{   r}   r~   rS     s   

z-CustomDiffusionXFormersAttnProcessor.__init__re  r   r   r   r   r   r   r   c                 C  s  |d u r|j n|j \}}}||||}| jr#| ||jjj}n|||jjj}|d u r7d}	|}n
d}	|jrA|	|}| j
rk| || jjj}
| || jjj}|
|jjj}
||jjj}n
||}
||}|	rt|
}|d d d dd d f d |d d d dd d f< ||
 d| |
   }
|| d| |   }|| }||
 }
|| }tjj||
||| j|jd}||j}||}| jr| jd |}| jd |}|S |jd |}|jd |}|S )NFTr<   r   r  r   )r   r  r   r  r   rg   r   r   re   r  r   r   r  rh   ri   r   r  r  r   r  r   r   r   r   r\   r   r  rn   )ry   re  r   r   r   r   r}  r   r   r  r   r  r  r}   r}   r~   r     sN   



4
z-CustomDiffusionXFormersAttnProcessor.__call__)TFNNTr   N)r   r#   r   r#   r   r   r   r   r/   r#   r    r!   r   r   r)  r  r  r}   r}   r{   r~   r     s     r   c                      r  )r   u  
    Processor for implementing attention for the Custom Diffusion method using PyTorch 2.0’s memory-efficient scaled
    dot-product attention.

    Args:
        train_kv (`bool`, defaults to `True`):
            Whether to newly train the key and value matrices corresponding to the text features.
        train_q_out (`bool`, defaults to `True`):
            Whether to newly train query matrices corresponding to the latent image features.
        hidden_size (`int`, *optional*, defaults to `None`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*, defaults to `None`):
            The number of channels in the `encoder_hidden_states`.
        out_bias (`bool`, defaults to `True`):
            Whether to include the bias parameter in `train_q_out`.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
    TNr   r   r#   r   r   r   r   r/   r    r!   c                   r  r  r  r  r{   r}   r~   rS   B  r  z(CustomDiffusionAttnProcessor2_0.__init__re  r   r   r   r   r   r   r   c                 C  s  |j \}}}||||}| jr| |}n||}|d u r$d}	|}n
d}	|jr.||}| jrX| |	| jj
j}
| |	| jj
j}|
	|jj
j}
|	|jj
j}n
||}
||}|	rt|
}|d d d dd d f d |d d d dd d f< ||
 d| |
   }
|| d| |   }|j d }||j }||d|j|dd}|
|d|j|dd}
||d|j|dd}tj||
||ddd}|dd|d|j| }|	|j}| jr| jd |}| jd |}|S |jd |}|jd |}|S )	NFTr<   r   rL   r   r  r   )r   r  r   r  rg   re   r  r   r   r   r   r   r  rh   ri   r   r  r  r   r  r  ru   rQ   r   r  rn   )ry   re  r   r   r   r   r}  r   r   r  r   r  r  rU   r  r}   r}   r~   r   \  sP   




4

z(CustomDiffusionAttnProcessor2_0.__call__r  r  r)  r  r  r}   r}   r{   r~   r   .  s    r   c                   @  s*   e Zd ZdZdddZ		ddddZdS )r   a'  
    Processor for implementing sliced attention.

    Args:
        slice_size (`int`, *optional*):
            The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
            `attention_head_dim` must be a multiple of the `slice_size`.
    r   r   c                 C  r  r(  r   ry   r   r}   r}   r~   rS     r  zSlicedAttnProcessor.__init__Nre  r   r   r   r   r   r   r   c                 C  s
  |}|j }|dkr|j\}}}	}
||||	|
 dd}|d u r$|jn|j\}}}||||}|jd urC||dddd}||}|jd }||}|d u rY|}n|jra|	|}|
|}||}||}||}|j\}}}tj||||j f|j|jd}t|d | j d D ]=}|| j }|d | j }||| }||| }|d ur||| nd }||||}t|||| }||||< q||}|jd |}|jd |}|dkr|dd|||	|
}|jr|| }||j }|S )Nr  r<   r   rL   r   r   r   )r   r   r  r  r  rO   rg   r   re   r  rh   ri   r   r  r   r   r   rw  r   r
  r  r   rn   r   r4   r3   )ry   re  r   r   r   r  r  r   r  r  r  r}  r   r   rN   r   r  batch_size_attentionr  i	start_idxend_idxquery_slice	key_sliceattn_mask_slice
attn_slicer}   r}   r~   r     sV   











zSlicedAttnProcessor.__call__)r   r   r)  r  r  r}   r}   r}   r~   r     s    
	r   c                   @  r  )r   ah  
    Processor for implementing sliced attention with extra learnable key and value matrices for the text encoder.

    Args:
        slice_size (`int`, *optional*):
            The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
            `attention_head_dim` must be a multiple of the `slice_size`.
    c                 C  r  r(  rG  rH  r}   r}   r~   rS     r  z#SlicedAttnAddedKVProcessor.__init__Nre  'Attention'r   r   r   r   r   r  r   c                 C  s4  |}|j d ur| ||}||jd |jd ddd}|j\}}}	||||}|d u r2|}n|jr:||}||dddd}||}
|
jd }|	|
}
|
|}||}|	|}|	|}|js||}||}|	|}|	|}tj||gdd}tj||gdd}n|}|}|
j\}}}	tj||||j f|
j|
jd}t|d | j d D ]=}|| j }|d | j }|
|| }||| }|d ur||| nd }||||}t|||| }||||< q||}|jd |}|jd |}|dd|j}|| }|S )Nr   r<   rL   r   r  r   r   )ra   r  r   r  r  re   r  rO   rg   r   rj   rk   r1   rh   ri   r   r  r  r   r   r   rw  r   r
  r  r   rn   r   )ry   re  r   r   r   r  r  r   r}  r   r   rN   r  r  r   r  rI  r  rJ  rK  rL  rM  rN  rO  rP  r}   r}   r~   r     s\   
"













z#SlicedAttnAddedKVProcessor.__call__r  )re  rQ  r   r   r   r   r   r   r  r   r   r   r  r}   r}   r}   r~   r     s    	r   c                      s,   e Zd ZdZd fddZdddZ  ZS )r`   aq  
    Spatially conditioned normalization as defined in https://huggingface.co/papers/2209.09002.

    Args:
        f_channels (`int`):
            The number of channels for input to group normalization layer, and output of the spatial norm layer.
        zq_channels (`int`):
            The number of channels for the quantized vector as described in the paper.
    rD   r   rE   c                   sN   t    tj|dddd| _tj||dddd| _tj||dddd| _d S )Nr   ư>Tr@   r<   r   )r2  strider  )rR   rS   r   r_   
norm_layerr4  conv_yconv_b)ry   rD   rE   r{   r}   r~   rS   ^  s   
zSpatialNorm.__init__fr   zqr   c                 C  sD   |j dd  }tj||dd}| |}|| | | | }|S )Nr   nearest)rv  rQ  )r   ru   interpolaterT  rU  rV  )ry   rW  rX  f_sizenorm_fnew_fr}   r}   r~   r   h  s
   
zSpatialNorm.forward)rD   r   rE   r   )rW  r   rX  r   r   r   )r   r*  r+  r,  rS   r   r.  r}   r}   r{   r~   r`   S  s    

r`   c                      8   e Zd ZdZd fdd	Z					ddddZ  ZS )r   a  
    Attention processor for Multiple IP-Adapters.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `tuple[int]` or `list[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or list[`float`], defaults to 1.0):
            the weight scale of image prompt.
    Nr  r   c                   s   t    | _ | _t|ttfs|g}|| _t|ts$|gt| }t|t|kr0t	d|| _
t fddtt|D | _t fddtt|D | _d S )NJ`scale` should be a list of integers with the same length as `num_tokens`.c                      g | ]
}t j d dqS FrP   r   rf   r   r   r   r   r}   r~   r         z3IPAdapterAttnProcessor.__init__.<locals>.<listcomp>c                   ra  rb  rc  rd  re  r}   r~   r     rf  )rR   rS   r   r   r   tuplelistr   r   r^   r\   r   rm   rw  r   to_v_ipry   r   r   r   r\   r{   re  r~   rS     s"   


zIPAdapterAttnProcessor.__init__re  r   r   r   r   r   r   r  r\   r!   r   c           %   
   C  s  |}|d ur@t |tr|\}}	n0d}
tdd|
dd |jd | jd  }|d d d |d d f |d d |d d d f g}}	|jd urK|||}|j}|dkrf|j\}}}}||||| dd	}|d u rm|jn|j\}}}|	|||}|j
d ur|
|dd	dd	}||}|d u r|}n|jr||}||}||}||}||}||}||||}t||}||}|d urot |tst|d}t|t| j  krt|	ksn td
t| dt| j dt|	 dtt|| j|	D ]`\}\}}}|d u rqt |tjr(|jdkr,td|jd |jd krJtd|jd  d|jd  d| t |trlt||jd ksltd|jd  dt| d| qnd gt| j }t|	| j| j| j|D ]\}}}}}d}t |trtdd |D rd}n|dkrd}|s]|d ur3t |ts|g|jd  }|jd }t |D ]l}||d d |d d d d f } ||d d |d d d d f }!|| } ||!}!||| d }"t|"|!}#||#}#t!"|d d |d d d d f ||#jd |#jd	 }$|$j#|j$|j%d}$||| |#|$   }qĐq||} ||}!|| } ||!}!||| d }"t|"|!}||}|||  }q|j&d |}|j&d |}|dkr~|dd'||||}|j(r|| }||j) }|S )NYou have passed a tensor as `encoder_hidden_states`. This is deprecated and will be removed in a future release. Please make sure to update your script to pass `encoder_hidden_states` as a tuple to suppress this warning.!encoder_hidden_states not a tupler  Fstandard_warnr<   r   r  r   "Length of ip_adapter_masks array ()) must match length of self.scale array (") and number of ip_hidden_states ()Each element of the ip_adapter_masks array should be a tensor with shape [1, num_images_for_ip_adapter, height, width]. Please use `IPAdapterMaskProcessor` to preprocess your maskNumber of masks (&) does not match number of ip images () at index #) does not match number of scales (c                 s      | ]}|d kV  qdS r   Nr}   r   sr}   r}   r~   r        z2IPAdapterAttnProcessor.__call__.<locals>.<genexpr>Tr   rL   r   )*r   rg  r   r   r   ra   r   r  r  r  rO   rg   re   r  rh   ri   r   r
  r   r  r   rh  r  r   r\   r^   	enumeratezipTensorr   ri  r  rw  r   
downsampler   r   r   rn   r   r4   r3   )%ry   re  r   r   r   r  r\   r   r  r   r  end_posr  r   r  r  r  r}  r   r   r   r  r	  indexr  ip_statecurrent_ip_hidden_statesr   ri  skipcurrent_num_imagesrJ  ip_keyip_valueip_attention_probs_current_ip_hidden_statesmask_downsampler}   r}   r~   r     s   














& 
 


  







zIPAdapterAttnProcessor.__call__Nr_  r   NNNr   Nre  r   r   r   r   r   r   r   r  r   r\   r!   r   r   r  r}   r}   r{   r~   r   p  s    r   c                      r^  )r   a  
    Attention processor for IP-Adapter for PyTorch 2.0.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `tuple[int]` or `list[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or `list[float]`, defaults to 1.0):
            the weight scale of image prompt.
    Nr_  r   c                   s   t    ttdst| jj d| _ | _t	|t
tfs#|g}|| _t	|ts2|gt| }t|t|kr>td|| _t fddtt|D | _t fddtt|D | _d S )NrQ   z@ requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r`  c                   ra  rb  rc  rd  re  r}   r~   r   ]  rf  z6IPAdapterAttnProcessor2_0.__init__.<locals>.<listcomp>c                   ra  rb  rc  rd  re  r}   r~   r   `  rf  )rR   rS   rt   ru   rb  r|   r   r   r   r   rg  rh  r   r   r^   r\   r   rm   rw  r   ri  rj  r{   re  r~   rS   G  s*   



z"IPAdapterAttnProcessor2_0.__init__re  r   r   r   r   r   r   r  r\   r!   r   c           %   
   C  s  |}|d ur@t |tr|\}}	n0d}
tdd|
dd |jd | jd  }|d d d |d d f |d d |d d d f g}}	|jd urK|||}|j}|dkrf|j\}}}}||||| dd	}|d u rm|jn|j\}}}|d ur|	|||}|||j
d
|jd
 }|jd ur||dd	dd	}||}|d u r|}n|jr||}||}||}|jd
 }||j
 }||d
|j
|dd	}||d
|j
|dd	}||d
|j
|dd	}tj||||ddd}|dd	|d
|j
| }||j}|d urt |tst|d}t|t| j  kr/t|	ksEn tdt| dt| j dt|	 dtt|| j|	D ]`\}\}}}|d u r\qNt |tjri|jdkrmtd|jd |jd krtd|jd  d|jd  d| t |trt||jd kstd|jd  dt| d| qNnd gt| j }t|	| j| j| j |D ]\}}}}}d}t |trt!dd |D rd}n|dkrd}|s|d urt |ts|g|jd  }|jd }t"|D ]} ||d d | d d d d f }!||d d | d d d d f }"|!|d
|j
|dd	}!|"|d
|j
|dd	}"tj||!|"d ddd}#|#dd	|d
|j
| }#|#|j}#t#$|d d | d d d d f ||#jd |#jd	 }$|$j|j|j%d}$|||  |#|$   }qq||}!||}"|!|d
|j
|dd	}!|"|d
|j
|dd	}"tj||!|"d ddd}|dd	|d
|j
| }||j}|||  }q|j&d |}|j&d |}|dkr|d
d||||}|j'r|| }||j( }|S )Nrk  rl  r  Frm  r<   r   r  r   rL   r   r  ro  rp  rq  rr  rs  rt  ru  rv  rw  c                 s  rx  ry  r}   rz  r}   r}   r~   r    r|  z5IPAdapterAttnProcessor2_0.__call__.<locals>.<genexpr>Tr   r   ))r   rg  r   r   r   ra   r   r  r  r  r   rO   rg   re   r  rh   ri   ru   rQ   r   r   r   rh  r  r   r\   r^   r}  r~  r   r  r   ri  r  rw  r   r  r   rn   r4   r3   )%ry   re  r   r   r   r  r\   r   r  r   r  r  r  r   r  r  r  r}  r   r   r   r  rU   r  r  r  r  r  r   ri  r  r  rJ  r  r  r  r  r}   r}   r~   r   c  s  











( 
 


  

z"IPAdapterAttnProcessor2_0.__call__r  r  r  r  r}   r}   r{   r~   r   8  s     r   c                      sB   e Zd ZdZ				dd fddZ					ddddZ  ZS )r   aG  
    Attention processor for IP-Adapter using xFormers.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `tuple[int]` or `list[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or `list[float]`, defaults to 1.0):
            the weight scale of image prompt.
        attention_op (`Callable`, *optional*, defaults to `None`):
            The base
            [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to
            use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best
            operator.
    Nr_  r   r   r   c                   s   t    | _ | _|| _t|ttfs|g}|| _t|ts'|gt	| }t	|t	|kr3t
d|| _t fddtt	|D | _t fddtt	|D | _d S )Nr`  c                       g | ]}t j p	d dqS rb  rc  rd  re  r}   r~   r   M       z;IPAdapterXFormersAttnProcessor.__init__.<locals>.<listcomp>c                   r  rb  rc  rd  re  r}   r~   r   P  r  )rR   rS   r   r   r   r   rg  rh  r   r   r^   r\   r   rm   rw  r   ri  )ry   r   r   r   r\   r   r{   re  r~   rS   4  s$   


z'IPAdapterXFormersAttnProcessor.__init__re  r   r   r  r   r  r   r  r\   r!   r   c           $   
   C  s  |}|d ur@t |tr|\}}	n0d}
tdd|
dd |jd | jd  }|d d d |d d f |d d |d d d f g}}	|jd urK|||}|j}|dkrf|j\}}}}||||| dd	}|d u rm|jn|j\}}}|d ur|	|||}|j\}}}|
d
|d
}|jd ur||dd	dd	}||}|d u r|}n|jr||}||}||}|| }|| }|| }tjj||||| jd}||j}||}|	r|d urt |tst|d}t|t| j  krt|	ks$n tdt| dt| j dt|	 dtt || j|	D ]`\}\}}}|d u r;q-t |t!j"rH|jdkrLtd|jd |jd krjtd|jd  d|jd  d| t |trt||jd kstd|jd  dt| d| q-nd gt| j }t |	| j| j#| j$|D ]\}}}}}d}t |trt%dd |D rd}n|dkrd}|s|d ura|t!j&}t |ts|g|jd  }|jd }t'|D ]t}||d d |d d d d f } ||d d |d d d d f }!||  } ||! }!tjj|| |!| jd}"|"|j}"||"}"t()|d d |d d d d f ||"jd |"jd	 }#|#j|j|j*d}#||| |"|#   }qq||} ||}!||  } ||! }!tjj|| |!| jd}||j}||}|||  }q|j+d |}|j+d |}|dkr|d
d,||||}|j-r|| }||j. }|S )Nrk  rl  r  Frm  r<   r   r  r   rL   )r  r  ro  rp  rq  rr  rs  rt  ru  rv  rw  c                 s  rx  ry  r}   rz  r}   r}   r~   r    r|  z:IPAdapterXFormersAttnProcessor.__call__.<locals>.<genexpr>T)r  r   r   )/r   rg  r   r   r   ra   r   r  r  r  r  rO   rg   re   r  rh   ri   r   r  r   r   r   r   r   r   r   rh  r  r   r\   r^   r}  r~  r   r  r   ri  r  r  rw  r   r  r   rn   r   r4   r3   )$ry   re  r   r   r   r  r\   r   r  r   r  r  r  r   r  r  r  r}  r   r  r   r   r  r  r  r  r  r   ri  r  r  rJ  r  r  r  r  r}   r}   r~   r   S  s  











(
 


  





z'IPAdapterXFormersAttnProcessor.__call__)Nr_  r   Nr  r  )re  r   r   r  r   r  r   r  r  r  r\   r!   r   r  r  r}   r}   r{   r~   r      s    #r   c                      s<   e Zd ZdZ		dd fddZ				ddddZ  ZS )!SD3IPAdapterJointAttnProcessor2_0aZ  
    Attention processor for IP-Adapter used typically in processing the SD3-like self-attention projections, with
    additional image-based information and timestep embeddings.

    Args:
        hidden_size (`int`):
            The number of hidden channels.
        ip_hidden_states_dim (`int`):
            The image feature dimension.
        head_dim (`int`):
            The number of head channels.
        timesteps_emb_dim (`int`, defaults to 1280):
            The number of input channels for timestep embedding.
        scale (`float`, defaults to 0.5):
            IP-Adapter scale.
             ?r   r   ip_hidden_states_dimr  timesteps_emb_dimr\   r!   c                   s   t    ddlm}m} |||d ddd| _tj||dd| _tj||dd| _	||d| _
||d| _||d| _|| _d S )Nr<   )AdaLayerNormr?   r   rR  )
output_dimnorm_eps	chunk_dimFrP   )rR   rS   rT   r  r?   norm_ipr   rf   r   ri  rb   rc   	norm_ip_kr\   )ry   r   r  r  r  r\   r  r?   r{   r}   r~   rS     s   

z*SD3IPAdapterJointAttnProcessor2_0.__init__Nre  r   r   r  r   r   r  r   r  r   c                 C  sL  |}|j d }||}	||}
||}|
j d }||j }|	|d|j|dd}	|
|d|j|dd}
||d|j|dd}|	}|
}|}|jdurW||	}	|jdura||
}
|dur|	|}|
|}||}||d|j|dd}||d|j|dd}||d|j|dd}|jdur||}|jdur||}tj|	|gdd}	tj|
|gdd}
tj||gdd}tj|	|
|ddd	}|dd|d|j| }||	j}|dur|ddd|j d f |dd|j d df }}|js||}| jdkr|dur| j||d
}| |}| |}||d|j|dd}||d|j|dd}| |}	| |}| |}tj||gdd}
tj||gdd}tj|	|
|ddd	}|dd|d|j| }||	j}||| j  }|jd |}|jd |}|dur||fS |S )aP  
        Perform the attention computation, integrating image features (if provided) and timestep embeddings.

        If `ip_hidden_states` is `None`, this is equivalent to using JointAttnProcessor2_0.

        Args:
            attn (`Attention`):
                Attention instance.
            hidden_states (`torch.FloatTensor`):
                Input `hidden_states`.
            encoder_hidden_states (`torch.FloatTensor`, *optional*):
                The encoder hidden states.
            attention_mask (`torch.FloatTensor`, *optional*):
                Attention mask.
            ip_hidden_states (`torch.FloatTensor`, *optional*):
                Image embeddings.
            temb (`torch.FloatTensor`, *optional*):
                Timestep embeddings.

        Returns:
            `torch.FloatTensor`: Output hidden states.
        r   rL   r<   r   Nr  r   Frs  )r  )r   rg   rh   ri   r   r  r  rb   rc   rl   rj   rk   rr   rs   r   r  ru   rQ   r   r   r   rZ   rq   r\   r  r   ri  r  rn   )ry   re  r   r   r   r   r  r  r   r   r   r  rU   r  	img_queryimg_key	img_valuer  r  r  norm_ip_hidden_statesr  r  r}   r}   r~   r   /  s   



























z*SD3IPAdapterJointAttnProcessor2_0.__call__)r  r  )
r   r   r  r   r  r   r  r   r\   r!   r  )re  r   r   r  r   r  r   r  r   r  r  r  r   r  r  r}   r}   r{   r~   r    s    r  c                   @  *   e Zd ZdZdd Z			ddddZdS )PAGIdentitySelfAttnProcessor2_0
    Processor for implementing PAG using scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
    PAG reference: https://huggingface.co/papers/2403.17377
    c                 C  r`  )NrQ   z_PAGIdentitySelfAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     r  z(PAGIdentitySelfAttnProcessor2_0.__init__Nre  r   r   r  r   r  r   r  r   r   c                 C  sf  |}|j d ur| ||}|j}|dkr(|j\}}	}
}|||	|
| dd}|d\}}|j\}}}|d urL||||}|||jd|jd }|jd ur^||dddd}|	|}|
|}||}|jd }||j }||d|j|dd}||d|j|dd}||d|j|dd}tj||||ddd}|dd|d|j| }||j}|jd |}|jd |}|dkr|dd	||	|
|}|j\}}}|jd ur||dddd}||}||j}|jd |}|jd |}|dkr|dd	||	|
|}t||g}|jr,|| }||j }|S )
Nr  r<   r   rL   r   Fr  r   r   )ra   r   r   r  r  r  r  r   rO   rg   rh   ri   ru   rQ   r   r   r   rn   r   r  r4   r3   )ry   re  r   r   r   r  r  r  r   r  r  r  r  r  r}  r   r   r   r  rU   r  r}   r}   r~   r     sZ   










z(PAGIdentitySelfAttnProcessor2_0.__call__r  re  r   r   r  r   r  r   r  r  r  r   r   r  r}   r}   r}   r~   r        
r  c                   @  r  )"PAGCFGIdentitySelfAttnProcessor2_0r  c                 C  r`  )NrQ   zbPAGCFGIdentitySelfAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.ra  rc  r}   r}   r~   rS     r  z+PAGCFGIdentitySelfAttnProcessor2_0.__init__Nre  r   r   r  r   r  r   r  r   r   c                 C  sz  |}|j d ur| ||}|j}|dkr(|j\}}	}
}|||	|
| dd}|d\}}}t||g}|j\}}}|d urT||||}|||j	d|jd }|j
d urf|
|dddd}||}||}||}|jd }||j	 }||d|j	|dd}||d|j	|dd}||d|j	|dd}tj||||ddd}|dd|d|j	| }||j}|jd	 |}|jd |}|dkr|dd
||	|
|}|j\}}}|j
d ur|
|dddd}||}|}||j}|jd	 |}|jd |}|dkr'|dd
||	|
|}t||g}|jr6|| }||j }|S )Nr  r<   r   r   rL   r   Fr  r   r   )ra   r   r   r  r  r  r   r  r  r   rO   rg   rh   ri   ru   rQ   r   r   r   rn   r4   r3   )ry   re  r   r   r   r  r  r  r   r  r  r  r  r  r  r}  r   r   r   r  rU   r  r}   r}   r~   r   "  s^   










z+PAGCFGIdentitySelfAttnProcessor2_0.__call__r  r  r  r}   r}   r}   r~   r    r  r  c                   @  s   e Zd ZdZd
ddZd	S )rM  zD
    Processor for implementing multiscale quadratic attention.
    re  r;  r   r   r   c                 C  s  |j dd  \}}|| |jkrd}nd}|}t| \}}}}|j}	|dd}||}
||}||}t	j
|
||gdd}|dd}|g}|jD ]	}||| qOt	j
|dd}|ri|jt	jd}||dd|j || }|jdd	d\}
}}||
}
||}|r||
||}|j|	d}n||
||}t	||d||f}||dddd}|jd
kr||dddd}n||}|jr|| }|S )Nr   TFr<   rL   r   r  rR  r   rI   )r   rA  rh  rv  r   movedimrg   rh   ri   r   r  rI  ro   r   rT  r   r  rK  rW  rZ  rn   rC  rL  r4   )ry   re  r   r  r  use_linear_attentionr  r   r   original_dtyper   r   r  multi_scale_qkvblockr}   r}   r~   r     sF   







z'SanaMultiscaleAttnProcessor2_0.__call__N)re  r;  r   r   r   r   r  r}   r}   r}   r~   rM  }  s    rM  c                   @     e Zd ZdZdd ZdS )LoRAAttnProcessorz9
    Processor for implementing attention with LoRA.
    c                 C     d S r(  r}   rc  r}   r}   r~   rS        zLoRAAttnProcessor.__init__Nr   r*  r+  r,  rS   r}   r}   r}   r~   r        r  c                   @  r  )LoRAAttnProcessor2_0zj
    Processor for implementing attention with LoRA (enabled by default if you're using PyTorch 2.0).
    c                 C  r  r(  r}   rc  r}   r}   r~   rS     r  zLoRAAttnProcessor2_0.__init__Nr  r}   r}   r}   r~   r    r  r  c                   @  r  )LoRAXFormersAttnProcessorzH
    Processor for implementing attention with LoRA using xFormers.
    c                 C  r  r(  r}   rc  r}   r}   r~   rS     r  z"LoRAXFormersAttnProcessor.__init__Nr  r}   r}   r}   r~   r    r  r  c                   @  r  )LoRAAttnAddedKVProcessorzz
    Processor for implementing attention with LoRA with extra learnable key and value matrices for the text encoder.
    c                 C  r  r(  r}   rc  r}   r}   r~   rS     r  z!LoRAAttnAddedKVProcessor.__init__Nr  r}   r}   r}   r~   r    r  r  c                   @  r  )SanaLinearAttnProcessor2_0I
    Processor for implementing scaled dot-product linear attention.
    Nre  r   r   r   r   r   r   r   c           
      C  s  |j }|d u r	|}||}||}||}|jd ur"||}|jd ur,||}|ddd|jdf}|ddd|jdfdd}|ddd|jdf}t	
|}t	
|}| | | }}}t	j|dddd}t||}	t|	|}|d d d d d df |d d d d dd f d	  }|dddd}||}|jd
 |}|jd |}|tjkr|dd}|S )Nr<   r   rL   r   rN  rO  r   rP  r?  r       )r   rg   rh   ri   rb   rc   r  ru  r   ru   relur!   r  r   rS  ri  r   rn   r  clip)
ry   re  r   r   r   r  r   r   r  rV  r}   r}   r~   r     s6   






"

8

z#SanaLinearAttnProcessor2_0.__call__r)  r  r  r}   r}   r}   r~   r    
    r  c                   @  r  ) PAGCFGSanaLinearAttnProcessor2_0r  Nre  r   r   r   r   r   r   r   c                 C  s  |j }|d\}}}t||g}||}	||}
||}|	ddd|j	df}	|
ddd|j	dfdd}
|ddd|j	df}t
|	}	t
|
}
|	 |
 | }	}
}t
j|dddd}t||
}t||	}|d d d d d df |d d d d dd f d	  }|dddd}||}|jd
 |}|jd |}|||}|jd
 |}|jd |}t||g}|tjkr|dd}|S )Nr   r<   r   rL   rN  rO  r   rP  r?  r   r  r  )r   r  r   r  rg   rh   ri   r  ru  r   ru   r  r!   r  rS  ri  r   rn   r  r  )ry   re  r   r   r   r  r  r  r  r   r   r  rV  r}   r}   r~   r     s6   


"

8

z)PAGCFGSanaLinearAttnProcessor2_0.__call__r)  r  r  r}   r}   r}   r~   r    r  r  c                   @  r  )%PAGIdentitySanaLinearAttnProcessor2_0r  Nre  r   r   r   r   r   r   r   c                 C  s  |j }|d\}}||}||}	||}
|ddd|jdf}|	ddd|jdfdd}	|
ddd|jdf}
t	|}t	|	}	|
 |	
 |

 }}	}
tj|
dddd}
t|
|	}t||}|j tjtjfv r~|
 }|d d d d d df |d d d d dd f d	  }|dddd}||}|jd
 |}|jd |}|||}|jd
 |}|jd |}t||g}|tjkr|dd}|S )Nr   r<   rL   r   rN  rO  r   rP  r?  r   r  r  )r   r  rg   rh   ri   r  ru  r   ru   r  r!   r  r   rS  r  r  ri  r   rn   r  r  )ry   re  r   r   r   r  r  r  r   r   r  rV  r}   r}   r~   r   M  s8   


"

8

z.PAGIdentitySanaLinearAttnProcessor2_0.__call__r)  r  r  r}   r}   r}   r~   r  H  r  r  c                   @     e Zd Zdd ZdS )FluxAttnProcessor2_0c                 O  *   d}t dd| ddlm} ||i |S )Nzq`FluxAttnProcessor2_0` is deprecated and this will be removed in a future version. Please use `FluxAttnProcessor`r  r  r<   FluxAttnProcessorr   transformers.transformer_fluxr  clsr  r^  r  r  r}   r}   r~   __new__     zFluxAttnProcessor2_0.__new__Nr   r*  r+  r  r}   r}   r}   r~   r        r  c                   @  r  )FluxSingleAttnProcessor2_0r  c                 O  r  )Nz|`FluxSingleAttnProcessor` is deprecated and will be removed in a future version. Please use `FluxAttnProcessorSDPA` instead.r  r  r<   r  r  r  r}   r}   r~   r    r  z"FluxSingleAttnProcessor2_0.__new__Nr   r*  r+  r,  r  r}   r}   r}   r~   r    r  r  c                   @  r  )FusedFluxAttnProcessor2_0c                 O  r  )Nzv`FusedFluxAttnProcessor2_0` is deprecated and this will be removed in a future version. Please use `FluxAttnProcessor`r  r  r<   r  r  r  r}   r}   r~   r    r  z!FusedFluxAttnProcessor2_0.__new__Nr  r}   r}   r}   r~   r    r  r  c                   @  r  )"FluxIPAdapterJointAttnProcessor2_0c                 O  r  )Nz`FluxIPAdapterJointAttnProcessor2_0` is deprecated and this will be removed in a future version. Please use `FluxIPAdapterAttnProcessor`r  r  r<   )FluxIPAdapterAttnProcessor)r   r  r  )r  r  r^  r  r  r}   r}   r~   r    r  z*FluxIPAdapterJointAttnProcessor2_0.__new__Nr  r}   r}   r}   r~   r    r  r  c                   @  r  )FluxAttnProcessor2_0_NPUc                 O  s0   d}t dd|dd ddlm} | }d|_|S )	NzFluxAttnProcessor2_0_NPU is deprecated and will be removed in a future version. An alternative solution to use NPU Flash Attention will be provided in the future.r  r  Frm  r<   r  _native_npur   r  r  _attention_backendr  r  r^  r  r  r6   r}   r}   r~   r       z FluxAttnProcessor2_0_NPU.__new__Nr  r}   r}   r}   r~   r    r  r  c                   @  r  )FusedFluxAttnProcessor2_0_NPUc                 C  s0   d}t dd|dd ddlm} | }d|_|S )	NzFusedFluxAttnProcessor2_0_NPU is deprecated and will be removed in a future version. An alternative solution to use NPU Flash Attention will be provided in the future.r  r  Frm  r<   r  
_fused_npur  )ry   r  r  r6   r}   r}   r~   r    r  z%FusedFluxAttnProcessor2_0_NPU.__new__Nr  r}   r}   r}   r~   r    r  r  c                   @  r  )r   r  c                 O  s   d}t dd|dd tddrtdt rtdd	rtd
ddlm} t|dks3|dd d ur;d}t dd| ||i |}d|_|S )NzXLAFluxFlashAttnProcessor2_0 is deprecated and will be removed in diffusers 1.0.0. An alternative solution to using XLA Flash Attention will be provided in the future.r   r  Frm  r   r   r  r   r  r<   r  r   r   zpartition_spec was not used in the processor implementation when it was added. Passing it is a no-op and support for it will be removed._native_xla)	r   r   rb  r   r  r  r   r  r  r  r}   r}   r~   r    s   
z$XLAFluxFlashAttnProcessor2_0.__new__Nr  r}   r}   r}   r~   r     r  r   )a
__future__r   r   r  typingr   r   torch.nn.functionalr   
functionalru   image_processorr   utilsr   r   r	   utils.import_utilsr
   r   r   utils.torch_utilsr   r   
get_loggerr   r   r  r   xformers.ops$torch_xla.experimental.custom_kernelr   torch_xla.runtimer   XLA_AVAILABLEr   r   r/  r;  r[  r_  rw   r   r   r   r   r  r  r  r   r  r  r  r  r  r   r   r   rv   r   r  r!  r7  r9  r<  r>  r?  rF  r   r   r   r   r`   r   r   r   r  r  r  rM  r  r  r  r  r  r  r  r  r  r  r  r  r  r   ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORSAttentionProcessorr}   r}   r}   r~   <module>   s  


      BBiHhCLV  &OU^]aEGG^r^tU bg{|`ftrW\ I i i ,cg<				479


 	
 !"#$%&'()*+,-./012