o
    pi                    @   s:  d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	m
  mZ d dlm
Z
 ddlmZ ddlmZmZ ddlmZmZ ddlmZmZ eeZe rSd dlZe r_d dlZd dlZndZeG d	d
 d
e
jZG dd dZ G dd de
jZ!G dd dZ"G dd dZ#G dd dZ$G dd dZ%G dd dZ&G dd dZ'G dd dZ(G dd dZ)dd  Z*G d!d" d"Z+G d#d$ d$Z,G d%d& d&Z-G d'd( d(Z.G d)d* d*Z/G d+d, d,Z0G d-d. d.Z1G d/d0 d0Z2G d1d2 d2Z3G d3d4 d4Z4G d5d6 d6Z5G d7d8 d8Z6G d9d: d:Z7G d;d< d<e
jZ8G d=d> d>e
jZ9G d?d@ d@Z:G dAdB dBZ;G dCdD dDe
jZ<G dEdF dFe
jZ=G dGdH dHej
jZ>G dIdJ dJZ?G dKdL dLZ@G dMdN dNZAG dOdP dPZBG dQdR dRZCG dSdT dTZDe"e;e#e-fZEe e0e.e:e=e>fZFee e0e7e.e:e"e;e#e-e!e8e9e@e?e5e4f ZGdS )U    N)CallableListOptionalTupleUnion)nn   )IPAdapterMaskProcessor)	deprecatelogging)is_torch_npu_availableis_xformers_available)is_torch_versionmaybe_allow_in_graphc                3       s  e Zd ZdZ																					
						dOdedee dedee dedededededee dedee dee dee dee dee dededededed ed!ed"ed# d$ef2 fd%d&Z	d'ed(dfd)d*Z
	dPd+ed,ee d(dfd-d.Zd/ed(dfd0d1ZdQd2d3ZdRd4ed(d5fd6d7Z		dSd8ejd9eej d:eej d(ejfd;d<Zd=ejd(ejfd>d?ZdTd=ejd$ed(ejfdAdBZ	dPdCejdDejd:eej d(ejfdEdFZ	@dTd:ejdGedHed$ed(ejf
dIdJZd9ejd(ejfdKdLZe dUdMdNZ  ZS )V	Attentiona  
    A cross attention layer.

    Parameters:
        query_dim (`int`):
            The number of channels in the query.
        cross_attention_dim (`int`, *optional*):
            The number of channels in the encoder_hidden_states. If not given, defaults to `query_dim`.
        heads (`int`,  *optional*, defaults to 8):
            The number of heads to use for multi-head attention.
        kv_heads (`int`,  *optional*, defaults to `None`):
            The number of key and value heads to use for multi-head attention. Defaults to `heads`. If
            `kv_heads=heads`, the model will use Multi Head Attention (MHA), if `kv_heads=1` the model will use Multi
            Query Attention (MQA) otherwise GQA is used.
        dim_head (`int`,  *optional*, defaults to 64):
            The number of channels in each head.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
        bias (`bool`, *optional*, defaults to False):
            Set to `True` for the query, key, and value linear layers to contain a bias parameter.
        upcast_attention (`bool`, *optional*, defaults to False):
            Set to `True` to upcast the attention computation to `float32`.
        upcast_softmax (`bool`, *optional*, defaults to False):
            Set to `True` to upcast the softmax computation to `float32`.
        cross_attention_norm (`str`, *optional*, defaults to `None`):
            The type of normalization to use for the cross attention. Can be `None`, `layer_norm`, or `group_norm`.
        cross_attention_norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups to use for the group norm in the cross attention.
        added_kv_proj_dim (`int`, *optional*, defaults to `None`):
            The number of channels to use for the added key and value projections. If `None`, no projection is used.
        norm_num_groups (`int`, *optional*, defaults to `None`):
            The number of groups to use for the group norm in the attention.
        spatial_norm_dim (`int`, *optional*, defaults to `None`):
            The number of channels to use for the spatial normalization.
        out_bias (`bool`, *optional*, defaults to `True`):
            Set to `True` to use a bias in the output linear layer.
        scale_qk (`bool`, *optional*, defaults to `True`):
            Set to `True` to scale the query and key by `1 / sqrt(dim_head)`.
        only_cross_attention (`bool`, *optional*, defaults to `False`):
            Set to `True` to only use cross attention and not added_kv_proj_dim. Can only be set to `True` if
            `added_kv_proj_dim` is not `None`.
        eps (`float`, *optional*, defaults to 1e-5):
            An additional value added to the denominator in group normalization that is used for numerical stability.
        rescale_output_factor (`float`, *optional*, defaults to 1.0):
            A factor to rescale the output by dividing it with this value.
        residual_connection (`bool`, *optional*, defaults to `False`):
            Set to `True` to add the residual connection to the output.
        _from_deprecated_attn_block (`bool`, *optional*, defaults to `False`):
            Set to `True` if the attention block is loaded from a deprecated state dict.
        processor (`AttnProcessor`, *optional*, defaults to `None`):
            The attention processor to use. If `None`, defaults to `AttnProcessor2_0` if `torch 2.x` is used and
            `AttnProcessor` otherwise.
    N   @           F    Th㈵>      ?	query_dimcross_attention_dimheadskv_headsdim_headdropoutbiasupcast_attentionupcast_softmaxcross_attention_normcross_attention_norm_num_groupsqk_normadded_kv_proj_dimadded_proj_biasnorm_num_groupsspatial_norm_dimout_biasscale_qkonly_cross_attentionepsrescale_output_factorresidual_connection_from_deprecated_attn_block	processorAttnProcessorout_dimc                    sl  t    ddlm}m} |d ur|n|| | _|d u r| jn|| | _|| _|| _|d u| _	|d ur5|n|| _
|| _|	| _|| _|| _|| _d| _|d urP|n|| _|| _|| _|| _|| _| jrf|d nd| _|d urq|| n|| _|| _|| _|| _| jd u r| jrtd|d urtj|||dd| _nd | _|d urt||d	| _ nd | _ |d u rd | _!d | _"na|d
krtj#||d| _!tj#||d| _"nL|dkr||dd|d| _!||dd|d| _"n5|dkrtj#|| |d| _!tj#|| |d| _"n|dkr|||d| _!|||d| _"ntd| d|
d u rd | _$n1|
d
kr,t#| j
| _$n$|
dkrH| jd ur:|}n| j
}tj||ddd| _$ntd|
 dtj%|| j|d| _&| jsutj%| j
| j|d| _'tj%| j
| j|d| _(nd | _'d | _(|| _)| jd urtj%|| j|d| _*tj%|| j|d| _+| jd urtj%|| j|d| _,| jst-g | _.| j./tj%| j| j|d | j./t0| | jd ur| jstj%| j| j|d| _1|d ur|d ur|dkr ||dd|d| _2||dd|d| _3n|dkr|||d| _2|||d| _3nd | _2d | _3|d u r/t4t5dr,| jr,t6 nt7 }| 8| d S )N   )FP32LayerNormRMSNormFg      r   z`only_cross_attention` can only be set to True if `added_kv_proj_dim` is not None. Make sure to set either `only_cross_attention=False` or define `added_kv_proj_dim`.Tnum_channels
num_groupsr*   affine)
f_channelszq_channels
layer_norm)r*   fp32_layer_norm)elementwise_affiner   r*   layer_norm_across_headsrms_normzunknown qk_norm: z . Should be None or 'layer_norm'
group_normr   zunknown cross_attention_norm: z.. Should be None, 'layer_norm' or 'group_norm'r   scaled_dot_product_attention)9super__init__normalizationr2   r3   	inner_diminner_kv_dimr   use_biasis_cross_attentionr   r   r   r+   r,   r   fused_projectionsr0   context_pre_onlypre_onlyr-   r(   scaler   sliceable_head_dimr#   r)   
ValueErrorr   	GroupNormr?   SpatialNormspatial_normnorm_qnorm_k	LayerNorm
norm_crossLinearto_qto_kto_vr$   
add_k_proj
add_v_proj
add_q_proj
ModuleListto_outappendDropout
to_add_outnorm_added_qnorm_added_khasattrFAttnProcessor2_0r/   set_processor)selfr   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r0   rJ   rK   r2   r3   norm_cross_num_channels	__class__ b/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/models/attention_processor.pyrC   `   s   










zAttention.__init__use_npu_flash_attentionreturnc                 C   s6   |rt  }nttdr| jrt nt }| | dS )zR
        Set whether to use npu flash attention from `torch_npu` or not.

        rA   N)AttnProcessorNPUrd   re   r(   rf   r/   rg   )rh   rn   r.   rl   rl   rm   set_use_npu_flash_attention
  s
   z%Attention.set_use_npu_flash_attention'use_memory_efficient_attention_xformersattention_opc           	   
   C   s  t | dot| jtttf}t | dot| jtttt	f}|r|r+|r+t
d| j t s4tdddtj s=tdztjtjddd	tjddd	tjddd	}W n tyd } z|d
}~ww |rt| jj| jj| jj| jj|d}|| j  t | jdr|| jjjj nT|rt !d t	|d}nGt"|d}nA|rt t#drtnt}|| jj| jj| jj| jjd}|| j  t | jdr|| jjjj nt t#dr| j$rt% nt& }| '| d
S )a  
        Set whether to use memory efficient attention from `xformers` or not.

        Args:
            use_memory_efficient_attention_xformers (`bool`):
                Whether to use memory efficient attention from `xformers` or not.
            attention_op (`Callable`, *optional*):
                The attention operation to use. Defaults to `None` which uses the default attention operation from
                `xformers`.
        r.   zhMemory efficient attention is currently not supported for custom diffusion for attention processor type zeRefer to https://github.com/facebookresearch/xformers for more information on how to install xformersxformers)namezvtorch.cuda.is_available() should be True but is False. xformers' memory efficient attention is only available for GPU )r1   r   (   cuda)deviceN)train_kvtrain_q_outhidden_sizer   rs   to_k_custom_diffusionzMemory efficient attention with `xformers` might currently not work correctly if an attention mask is required for the attention operation.rs   rA   )ry   rz   r{   r   )(rd   
isinstancer.   CustomDiffusionAttnProcessor$CustomDiffusionXFormersAttnProcessorCustomDiffusionAttnProcessor2_0AttnAddedKVProcessorAttnAddedKVProcessor2_0SlicedAttnAddedKVProcessorXFormersAttnAddedKVProcessorNotImplementedErrorr   ModuleNotFoundErrortorchrw   is_availablerN   rt   opsmemory_efficient_attentionrandn	Exceptionry   rz   r{   r   load_state_dict
state_dicttor|   weightrx   loggerinfoXFormersAttnProcessorre   r(   rf   r/   rg   )	rh   rr   rs   is_custom_diffusionis_added_kv_processor_er.   attn_processor_classrl   rl   rm   +set_use_memory_efficient_attention_xformers  s   


z5Attention.set_use_memory_efficient_attention_xformers
slice_sizec                 C   s   |dur|| j krtd| d| j  d|dur#| jdur#t|}n |dur,t|}n| jdur5t }nttdr@| jr@t	 nt
 }| | dS )z
        Set the slice size for attention computation.

        Args:
            slice_size (`int`):
                The slice size for attention computation.
        Nzslice_size z has to be smaller or equal to .rA   )rM   rN   r#   r   SlicedAttnProcessorr   rd   re   r(   rf   r/   rg   )rh   r   r.   rl   rl   rm   set_attention_slice  s   


zAttention.set_attention_slicec                 C   sV   t | dr&t| jtjjr&t|tjjs&td| j d|  | j	d || _dS )z
        Set the attention processor to use.

        Args:
            processor (`AttnProcessor`):
                The attention processor to use.
        r.   z-You are removing possibly trained weights of z with N)
rd   r~   r.   r   r   Moduler   r   _modulespop)rh   r.   rl   rl   rm   rg     s   
zAttention.set_processorreturn_deprecated_loraAttentionProcessorc                 C   s   |s| j S dS )a7  
        Get the attention processor in use.

        Args:
            return_deprecated_lora (`bool`, *optional*, defaults to `False`):
                Set to `True` to return the deprecated LoRA attention processor.

        Returns:
            "AttentionProcessor": The attention processor in use.
        N)r.   )rh   r   rl   rl   rm   get_processor  s   zAttention.get_processorhidden_statesencoder_hidden_statesattention_maskc                    s   t t| jjj  dh fdd| D }t|dkr0t	
d| d| jjj d  fdd	| D }| j| |f||d
|S )ah  
        The forward method of the `Attention` class.

        Args:
            hidden_states (`torch.Tensor`):
                The hidden states of the query.
            encoder_hidden_states (`torch.Tensor`, *optional*):
                The hidden states of the encoder.
            attention_mask (`torch.Tensor`, *optional*):
                The attention mask to use. If `None`, no mask is applied.
            **cross_attention_kwargs:
                Additional keyword arguments to pass along to the cross attention.

        Returns:
            `torch.Tensor`: The output of the attention layer.
        ip_adapter_masksc                    s$   g | ]\}}| vr|vr|qS rl   rl   ).0kr   attn_parametersquiet_attn_parametersrl   rm   
<listcomp>  s    z%Attention.forward.<locals>.<listcomp>r   zcross_attention_kwargs z are not expected by z and will be ignored.c                    s   i | ]\}}| v r||qS rl   rl   )r   r   w)r   rl   rm   
<dictcomp>  s    z%Attention.forward.<locals>.<dictcomp>)r   r   )setinspect	signaturer.   __call__
parameterskeysitemslenr   warningrk   __name__)rh   r   r   r   cross_attention_kwargsunused_kwargsrl   r   rm   forward  s&   zAttention.forwardtensorc                 C   sL   | j }|j\}}}||| |||}|dddd|| ||| }|S )ac  
        Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size // heads, seq_len, dim * heads]`. `heads`
        is the number of heads initialized while constructing the `Attention` class.

        Args:
            tensor (`torch.Tensor`): The tensor to reshape.

        Returns:
            `torch.Tensor`: The reshaped tensor.
        r   r   r1      )r   shapereshapepermute)rh   r   	head_size
batch_sizeseq_lendimrl   rl   rm   batch_to_head_dim  s
   "zAttention.batch_to_head_dimr   c                 C   s~   | j }|jdkr|j\}}}d}n|j\}}}}|||| ||| }|dddd}|dkr=||| || || }|S )a   
        Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size, seq_len, heads, dim // heads]` `heads` is
        the number of heads initialized while constructing the `Attention` class.

        Args:
            tensor (`torch.Tensor`): The tensor to reshape.
            out_dim (`int`, *optional*, defaults to `3`): The output dimension of the tensor. If `3`, the tensor is
                reshaped to `[batch_size * heads, seq_len, dim // heads]`.

        Returns:
            `torch.Tensor`: The reshaped tensor.
        r   r1   r   r   )r   ndimr   r   r   )rh   r   r0   r   r   r   r   	extra_dimrl   rl   rm   head_to_batch_dim  s   
zAttention.head_to_batch_dimquerykeyc           	      C   s   |j }| jr| }| }|du r*tj|jd |jd |jd |j |jd}d}n|}d}tj|||dd|| j	d}~| j
rE| }|jdd}~||}|S )	ak  
        Compute the attention scores.

        Args:
            query (`torch.Tensor`): The query tensor.
            key (`torch.Tensor`): The key tensor.
            attention_mask (`torch.Tensor`, *optional*): The attention mask to use. If `None`, no mask is applied.

        Returns:
            `torch.Tensor`: The attention probabilities/scores.
        Nr   r1   dtyperx   )betaalphar   )r   r   floatr   emptyr   rx   baddbmm	transposerL   r   softmaxr   )	rh   r   r   r   r   baddbmm_inputr   attention_scoresattention_probsrl   rl   rm   get_attention_scores  s2    

zAttention.get_attention_scorestarget_lengthr   c           	      C   s   | j }|du r	|S |jd }||krA|jjdkr7|jd |jd |f}tj||j|jd}tj||gdd}n
tj	|d|fd	d
}|dkrW|jd || k rU|j
|dd}|S |dkrg|d}|j
|dd}|S )a  
        Prepare the attention mask for the attention computation.

        Args:
            attention_mask (`torch.Tensor`):
                The attention mask to prepare.
            target_length (`int`):
                The target length of the attention mask. This is the length of the attention mask after padding.
            batch_size (`int`):
                The batch size, which is used to repeat the attention mask.
            out_dim (`int`, *optional*, defaults to `3`):
                The output dimension of the attention mask. Can be either `3` or `4`.

        Returns:
            `torch.Tensor`: The prepared attention mask.
        Nr   mpsr   r1   r   r   r   r   )valuer      )r   r   rx   typer   zerosr   catre   padrepeat_interleave	unsqueeze)	rh   r   r   r   r0   r   current_lengthpadding_shapepaddingrl   rl   rm   prepare_attention_maskM  s$   

z Attention.prepare_attention_maskc                 C   sf   | j dus	J dt| j tjr|  |}|S t| j tjr1|dd}|  |}|dd}|S J )aG  
        Normalize the encoder hidden states. Requires `self.norm_cross` to be specified when constructing the
        `Attention` class.

        Args:
            encoder_hidden_states (`torch.Tensor`): Hidden states of the encoder.

        Returns:
            `torch.Tensor`: The normalized encoder hidden states.
        NzGself.norm_cross must be defined to call self.norm_encoder_hidden_statesr1   r   )rU   r~   r   rT   rO   r   )rh   r   rl   rl   rm   norm_encoder_hidden_states|  s   

z$Attention.norm_encoder_hidden_statesc                 C   s  | j jjj}| j jjj}| jsYt| j jj| jjj| j	jjg}|j
d }|j
d }tj||| j||d| _| jj| | jrXt| j jj| jjj| j	jjg}| jj| nAt| jjj| j	jjg}|j
d }|j
d }tj||| j||d| _| jj| | jrt| jjj| j	jjg}| jj| t| drt| drt| drt| jjj| jjj| jjjg}|j
d }|j
d }tj||| j||d| _| jj| | jrt| jjj| jjj| jjjg}| jj| || _d S )Nr1   r   )r   rx   r   r\   rZ   r[   )rW   r   datarx   r   rH   r   r   rX   rY   r   r   rV   rG   to_qkvcopy_r   to_kvrd   r\   rZ   r[   r$   to_added_qkvrI   )rh   fuserx   r   concatenated_weightsin_featuresout_featuresconcatenated_biasrl   rl   rm   fuse_projections  sH   "

"




zAttention.fuse_projections)Nr   Nr   r   FFFNr   NNTNNTTFr   r   FFNNNFN)r.   r/   ro   N)FNN)r   )T)r   
__module____qualname____doc__intr   r   boolstrrC   rq   r   r   r   rg   r   r   Tensorr   r   r   r   r   r   no_gradr   __classcell__rl   rl   rj   rm   r   (   s   9	
 +
k

.
0
/r   c                   @   sP   e Zd ZdZ			ddedejdeej deej deej dejfd	d
ZdS )r/   zJ
    Default processor for performing attention-related computations.
    Nattnr   r   r   tembro   c                 O   s  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}||||}|j	d urd|	|dddd}|
|}|d u rp|}n|jrx||}||}||}||}||}||}||||}t||}||}|jd |}|jd |}|
dkr|dd	||||}|jr||	 }||j }|S )
Nr   rL   The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.1.0.0r   r1   r   r   r   )r   getr
   rQ   r   r   viewr   r   r?   rW   rU   r   rX   rY   r   r   r   bmmr   r^   r   r,   r+   )rh   r  r   r   r   r  argskwargsdeprecation_messageresidual
input_ndimr   channelheightwidthsequence_lengthr   r   r   r   r   rl   rl   rm   r     sH   











zAttnProcessor.__call__NNN	r   r   r   r   r   r   r   r   r   rl   rl   rl   rm   r/     s$    	r/   c                          e Zd ZdZ						ddededee dee d	ed
ef fddZ		dde	de
jdee
j dee
j de
jf
ddZ  ZS )r   aK  
    Processor for implementing attention for the Custom Diffusion method.

    Args:
        train_kv (`bool`, defaults to `True`):
            Whether to newly train the key and value matrices corresponding to the text features.
        train_q_out (`bool`, defaults to `True`):
            Whether to newly train query matrices corresponding to the latent image features.
        hidden_size (`int`, *optional*, defaults to `None`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*, defaults to `None`):
            The number of channels in the `encoder_hidden_states`.
        out_bias (`bool`, defaults to `True`):
            Whether to include the bias parameter in `train_q_out`.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
    TNr   ry   rz   r{   r   r'   r   c                       t    || _|| _|| _|| _| jr*tj|p||dd| _tj|p$||dd| _	| jrStj||dd| _
tg | _| jtj|||d | jt| d S d S NFr@   rB   rC   ry   rz   r{   r   r   rV   r|   to_v_custom_diffusionto_q_custom_diffusionr]   to_out_custom_diffusionr_   r`   rh   ry   rz   r{   r   r'   r   rj   rl   rm   rC   &     
	z%CustomDiffusionAttnProcessor.__init__r  r   r   r   ro   c                 C   s  |j \}}}||||}| jr| ||jjj}n|||jjj}|d u r0d}	|}n
d}	|jr:|	|}| j
rd| || jjj}
| || jjj}|
|jjj}
||jjj}n
||}
||}|	rt|
}|d d d dd d f d |d d d dd d f< ||
 d| |
   }
|| d| |   }||}||
}
||}|||
|}t||}||}| jr| jd |}| jd |}|S |jd |}|jd |}|S )NFTr1   r   r   )r   r   rz   r  r   rW   r   r   rU   r   ry   r|   r  rX   rY   r   	ones_likedetachr   r   r  r   r  r^   )rh   r  r   r   r   r   r  r   r   	crossattnr   r   r  r   rl   rl   rm   r   @  sH   



4



z%CustomDiffusionAttnProcessor.__call__TTNNTr   r   r   r   r   r   r   r   r   r   rC   r   r   r   r   r  rl   rl   rj   rm   r     sD    r   c                   @   sD   e Zd ZdZ		d
dedejdeej deej dejf
dd	ZdS )r   z
    Processor for performing attention-related computations with extra learnable key and value matrices for the text
    encoder.
    Nr  r   r   r   ro   c                 O   s  t |dks|dd d urd}tdd| |}||jd |jd ddd}|j\}	}
}|||
|	}|d u r=|}n|jrE||}|	|dddd}|
|}||}||}||}||}||}|js||}||}||}||}tj||gdd}tj||gdd}n|}|}||||}t||}||}|jd |}|jd |}|dd	|j}|| }|S )
Nr   rL   r  r  r1   r   r   r   r   )r   r  r
   r  r   r   r   rU   r   r?   rW   r   rZ   r[   r)   rX   rY   r   r   r   r  r   r^   r   )rh   r  r   r   r   r	  r
  r  r  r   r  r   r   encoder_hidden_states_key_proj encoder_hidden_states_value_projr   r   r   rl   rl   rm   r     sF   	"











zAttnAddedKVProcessor.__call__r   r  rl   rl   rl   rm   r   {  s    	r   c                   @   sL   e Zd ZdZdd Z		ddedejdeej deej d	ejf
d
dZ	dS )r   z
    Processor for performing scaled dot-product attention (enabled by default if you're using PyTorch 2.0), with extra
    learnable key and value matrices for the text encoder.
    c                 C      t tds	tdd S )NrA   zWAttnAddedKVProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rd   re   ImportErrorrh   rl   rl   rm   rC     
   
z AttnAddedKVProcessor2_0.__init__Nr  r   r   r   ro   c                 O   s  t |dks|dd d urd}tdd| |}||jd |jd ddd}|j\}	}
}|j||
|	dd	}|d u r?|}n|jrG||}|	|dddd}|
|}|j|dd	}||}||}|j|dd	}|j|dd	}|js||}||}|j|dd	}|j|dd	}tj||gdd
}tj||gdd
}n|}|}tj||||ddd}|dd|	d|jd }|jd |}|jd |}|dd|j}|| }|S )Nr   rL   r  r  r1   r   r   r   )r0   r   r   F	attn_mask	dropout_p	is_causalr   )r   r  r
   r  r   r   r   rU   r   r?   rW   r   rZ   r[   r)   rX   rY   r   r   re   rA   r   r^   )rh   r  r   r   r   r	  r
  r  r  r   r  r   r   r"  r#  r   r   rl   rl   rm   r     sH   	"





z AttnAddedKVProcessor2_0.__call__r   
r   r   r   r   rC   r   r   r   r   r   rl   rl   rl   rm   r     s     
r   c                   @   H   e Zd ZdZdd Z		ddedejdejdeej d	ejf
d
dZ	dS )JointAttnProcessor2_0YAttention processor used typically in processing the SD3-like self-attention projections.c                 C   r$  NrA   zPAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC        
zJointAttnProcessor2_0.__init__Nr  r   r   r   ro   c                 O   s8  |}|j }|dkr|j\}	}
}}||	|
|| dd}|j }|dkr8|j\}	}
}}||	|
|| dd}|jd }	||}||}||}||}||}|	|}t
j||gdd}t
j||gdd}t
j||gdd}|jd }||j }||	d|j|dd}||	d|j|dd}||	d|j|dd}tj|||ddd	}|dd|	d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js||}|dkr|dd
|	|
||}|dkr|dd
|	|
||}||fS )Nr   r1   r   r   r   r   r   Fr+  r,  r   )r   r   r  r   rW   rX   rY   r\   rZ   r[   r   r   r   re   rA   r   r   r   r^   rJ   ra   )rh   r  r   r   r   r	  r
  r  r  r   r  r  r  context_input_ndimr   r   r    encoder_hidden_states_query_projr"  r#  rE   head_dimrl   rl   rm   r     sN   	











zJointAttnProcessor2_0.__call__r   
r   r   r   r   rC   r   r   FloatTensorr   r   rl   rl   rl   rm   r/  
       r/  c                	   @   <   e Zd ZdZdd Z	ddedejdejdejfd	d
ZdS )PAGJointAttnProcessor2_0r0  c                 C   r$  )NrA   zXPAGJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC   X  r(  z!PAGJointAttnProcessor2_0.__init__Nr  r   r   ro   c                  C   s  |}|j }|dkr|j\}}}}	|||||	 dd}|j }
|
dkr8|j\}}}}	|||||	 dd}|jd }|d\}}|d\}}|jd }||}||}||}||}|	|}|
|}tj||gdd}tj||gdd}tj||gdd}|jd }||j }||d|j|dd}||d|j|dd}||d|j|dd}tj|||ddd	}|dd|d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js
||}|dkr|dd
||||	}|
dkr,|dd
||||	}|jd }||}||}||}||}|	|}|
|}tj||gdd}tj||gdd}tj||gdd}|jd }||j }||d|j|dd}||d|j|dd}||d|j|dd}|d}tj||f|j|jd}td|d |d |f< |d |d |f d |dd}tj||||ddd}|dd|d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js||}|dkr/|dd
||||	}|
dkr@|dd
||||	}t||g}t||g}||fS )Nr   r1   r   r   r   r   r   Fr3  r   rx   r   -infr)  )r   r   r  r   chunkrW   rX   rY   r\   rZ   r[   r   r   r   re   rA   r   r   r   r^   rJ   ra   sizer   rx   r   fill_diagonal_r   ) rh   r  r   r   r  r  r   r  r  r  r4  identity_block_sizehidden_states_orghidden_states_ptbencoder_hidden_states_orgencoder_hidden_states_ptb	query_orgkey_org	value_org$encoder_hidden_states_org_query_proj"encoder_hidden_states_org_key_proj$encoder_hidden_states_org_value_projrE   r6  	query_ptbkey_ptb	value_ptb$encoder_hidden_states_ptb_query_proj"encoder_hidden_states_ptb_key_proj$encoder_hidden_states_ptb_value_projr   	full_maskrl   rl   rm   r   ^  s   


























z!PAGJointAttnProcessor2_0.__call__r   	r   r   r   r   rC   r   r   r8  r   rl   rl   rl   rm   r;  U  s    
r;  c                   @   r.  )PAGCFGJointAttnProcessor2_0r0  c                 C   r$  )NrA   z[PAGCFGJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC     r(  z$PAGCFGJointAttnProcessor2_0.__init__Nr  r   r   r   ro   c           %      O   s  |}|j }|dkr|j\}	}
}}||	|
|| dd}|j }|dkr8|j\}	}
}}||	|
|| dd}|jd }|d\}}}t||g}|d\}}}t||g}|jd }	||}||}|	|}|
|}||}||}tj||gdd}tj||gdd}tj||gdd}|jd }||j }||	d|j|dd}||	d|j|dd}||	d|j|dd}tj|||dd	d
}|dd|	d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js||}|dkr+|dd|	|
||}|dkr<|dd|	|
||}|jd }	||}||}|	|}|
|} ||}!||}"tj|| gdd}tj||!gdd}tj||"gdd}|jd }||j }||	d|j|dd}||	d|j|dd}||	d|j|dd}|d}#tj|#|#f|j|jd}$td|$d |d |f< |$d |d |f d |$dd}$tj||||$dd	d}|dd|	d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js.||}|dkr?|dd|	|
||}|dkrP|dd|	|
||}t||g}t||g}||fS )Nr   r1   r   r   r   r   r   r   Fr3  r   r<  r=  r)  )r   r   r  r   r>  r   r   rW   rX   rY   r\   rZ   r[   r   re   rA   r   r   r   r^   rJ   ra   r?  r   rx   r   r@  r   )%rh   r  r   r   r   r	  r
  r  r  r   r  r  r  r4  rA  hidden_states_uncondrB  rC  encoder_hidden_states_uncondrD  rE  rF  rG  rH  rI  rJ  rK  rE   r6  rL  rM  rN  rO  rP  rQ  r   rR  rl   rl   rm   r     s   	

























z$PAGCFGJointAttnProcessor2_0.__call__r   r7  rl   rl   rl   rm   rT    s     
rT  c                   @   r.  )FusedJointAttnProcessor2_0r0  c                 C   r$  r1  r%  r'  rl   rl   rm   rC     r2  z#FusedJointAttnProcessor2_0.__init__Nr  r   r   r   ro   c                 O   sZ  |}|j }|dkr|j\}	}
}}||	|
|| dd}|j }|dkr8|j\}	}
}}||	|
|| dd}|jd }	||}|jd d }tj||dd\}}}||}|jd d }tj||dd\}}}tj||gdd}tj||gdd}tj||gdd}|jd }||j	 }||	d|j	|dd}||	d|j	|dd}||	d|j	|dd}t
j|||dd	d
}|dd|	d|j	| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}|js||}|dkr|dd|	|
||}|dkr)|dd|	|
||}||fS )Nr   r1   r   r   r   r   r   r   Fr3  r   )r   r   r  r   r   r   splitr   r   r   re   rA   r   r   r   r^   rJ   ra   )rh   r  r   r   r   r	  r
  r  r  r   r  r  r  r4  qkv
split_sizer   r   r   encoder_qkvr5  r"  r#  rE   r6  rl   rl   rm   r     sV   	







z#FusedJointAttnProcessor2_0.__call__r   r7  rl   rl   rl   rm   rW    r9  rW  c                	   @   r:  )AuraFlowAttnProcessor2_0z;Attention processor used typically in processing Aura Flow.c                 C   $   t tdstddrtdd S d S )NrA   <2.1zAuraFlowAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to at least 2.1 or above as we use `scale` in `F.scaled_dot_product_attention()`. rd   re   r   r&  r'  rl   rl   rm   rC     
   z!AuraFlowAttnProcessor2_0.__init__Nr  r   r   ro   c                 O   sD  |j d }||}||}||}	|d ur'||}
||}||}|j d }||j }||d|j|}||d|j|}|	|d|j|}	|j	d urV|	|}|j
d ur`|
|}|d ur|
|d|j|}
||d|j|}||d|j|}|jd ur||
}
|jd ur||}tj|
|gdd}tj||gdd}tj||	gdd}	|dd}|dd}|	dd}	tj|||	d|jdd}|dd|d|j| }||j}|d ur|d d |j d d f |d d d |j d f }}|jd |}|jd |}|d ur||}|d ur ||fS |S )	Nr   r   r1   r   r   r   Fr+  rL   r,  )r   rW   rX   rY   r\   rZ   r[   r   r  rR   rS   rb   rc   r   r   r   re   rA   rL   r   r   r   r^   ra   )rh   r  r   r   r	  r
  r   r   r   r   r5  r"  r#  rE   r6  rl   rl   rm   r     sf   





















z!AuraFlowAttnProcessor2_0.__call__r   rS  rl   rl   rl   rm   r\        
r\  c                	   @   r:  )FusedAuraFlowAttnProcessor2_0zRAttention processor used typically in processing Aura Flow with fused projections.c                 C   r]  )NrA   r^  r_  zFusedAuraFlowAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to at least 2.1 or above as we use `scale` in `F.scaled_dot_product_attention()`. r`  r'  rl   rl   rm   rC   D  ra  z&FusedAuraFlowAttnProcessor2_0.__init__Nr  r   r   ro   c                 O   sf  |j d }||}|j d d }tj||dd\}	}
}|d ur7||}|j d d }tj||dd\}}}|
j d }||j }|	|d|j|}	|
|d|j|}
||d|j|}|jd urf||	}	|jd urp||
}
|d ur||d|j|}||d|j|}||d|j|}|j	d ur|	|}|j
d ur|	|}tj||	gdd}	tj||
gdd}
tj||gdd}|	dd}	|
dd}
|dd}tj|	|
|d|jdd	}|dd|d|j| }||	j}|d ur|d d |j d d f |d d d |j d f }}|jd |}|jd |}|d ur(||}|d ur1||fS |S )
Nr   r   r   r   r1   r   r   Frb  )r   r   r   rX  r   r   r  rR   rS   rb   rc   r   r   re   rA   rL   r   r   r   r^   ra   )rh   r  r   r   r	  r
  r   rY  rZ  r   r   r   r[  r5  r"  r#  rE   r6  rl   rl   rm   r   J  sn   


















z&FusedAuraFlowAttnProcessor2_0.__call__r   rS  rl   rl   rl   rm   rd  A  rc  rd  c                 C   s   |   jg | jd d dddR  }|  jg |jd d dddR  }|d |d  |d |d   }|d |d  |d |d   }|j| j | |j|j |fS )Nr   r1   r   ).r   ).r1   )r   r   r   type_as)xqxk	freqs_cisxq_xk_xq_outxk_outrl   rl   rm   
apply_rope  s
   **  $rm  c                   @   sX   e Zd ZdZdd Z			ddedejdeej deej	 d	eej d
ejfddZ
dS )FluxSingleAttnProcessor2_0s
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
    c                 C   r$  r1  r%  r'  rl   rl   rm   rC     r2  z#FluxSingleAttnProcessor2_0.__init__Nr  r   r   r   image_rotary_embro   c                 C   s|  |j }|dkr|j\}}}	}
||||	|
 dd}|d u r"|jn|j\}}}||}|d u r3|}||}||}|jd }||j }||d|j|dd}||d|j|dd}||d|j|dd}|jd urx||}|j	d ur|	|}|d urt
|||\}}tj|||ddd}|dd|d|j| }||j}|dkr|dd|||	|
}|S )	Nr   r1   r   r   r   Fr3  r   )r   r   r  r   rW   rX   rY   r   rR   rS   rm  re   rA   r   r   r   )rh   r  r   r   r   rp  r  r   r  r  r  r   r   r   r   rE   r6  rl   rl   rm   r     s6   








z#FluxSingleAttnProcessor2_0.__call__r  )r   r   r   r   rC   r   r   r   r   r8  r   rl   rl   rl   rm   rn    s&    rn  c                   @   sT   e Zd ZdZdd Z			ddedejdejdeej d	eej	 d
ejfddZ
dS )FluxAttnProcessor2_0r0  c                 C   r$  )NrA   zTFluxAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC     r2  zFluxAttnProcessor2_0.__init__Nr  r   r   r   rp  ro   c                 C   s  |j }|dkr|j\}}}	}
||||	|
 dd}|j }|dkr6|j\}}}	}
||||	|
 dd}|jd }||}||}||}|jd }||j }||d|j|dd}||d|j|dd}||d|j|dd}|jd ur||}|j	d ur|	|}|
|}||}||}||d|j|dd}||d|j|dd}||d|j|dd}|jd ur||}|jd ur||}tj||gdd}tj||gdd}tj||gdd}|d urt|||\}}tj|||ddd	}|dd|d|j| }||j}|d d d |jd f |d d |jd d f }}|jd |}|jd |}||}|dkr^|dd
|||	|
}|dkro|dd
|||	|
}||fS )Nr   r1   r   r   r   r   r   Fr3  r   )r   r   r  r   rW   rX   rY   r   rR   rS   r\   rZ   r[   rb   rc   r   r   rm  re   rA   r   r   r   r^   ra   )rh   r  r   r   r   rp  r  r   r  r  r  r4  r   r   r   rE   r6  r5  r"  r#  rl   rl   rm   r     s|   























zFluxAttnProcessor2_0.__call__r  r   r   r   r   rC   r   r   r8  r   r   r   rl   rl   rl   rm   rq    s&    rq  c                   @   sX   e Zd ZdZddee fddZ		ddedej	deej	 d	eej	 d
ej	f
ddZ
dS )r     
    Processor for implementing memory efficient attention using xFormers.

    Args:
        attention_op (`Callable`, *optional*, defaults to `None`):
            The base
            [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to
            use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best
            operator.
    Nrs   c                 C   
   || _ d S r   r}   rh   rs   rl   rl   rm   rC   [     
z%XFormersAttnAddedKVProcessor.__init__r  r   r   r   ro   c                 C   s~  |}| |jd |jd ddd}|j\}}}||||}|d u r'|}n|jr/||}||dddd}||}	||	}	|	|}
|
|}||
}
||}|js||}||}||}||}tj|
|gdd}tj||gdd}n|
}|}tjj|	|||| j|jd}||	j}||}|jd |}|jd |}|dd|j}|| }|S )Nr   r1   r   r   r   	attn_biasoprL   r   )r  r   r   r   rU   r   r?   rW   r   rZ   r[   r)   rX   rY   r   r   rt   r   r   rs   rL   r   r   r   r^   r   )rh   r  r   r   r   r  r   r  r   r   r"  r#  r   r   rl   rl   rm   r   ^  sD   "











z%XFormersAttnAddedKVProcessor.__call__r   r   r   r   r   r   r   r   rC   r   r   r   r   rl   rl   rl   rm   r   O  s     r   c                   @   sd   e Zd ZdZddee fddZ			ddedej	deej	 d	eej	 d
eej	 dej	fddZ
dS )r   rs  Nrs   c                 C   rt  r   r}   ru  rl   rl   rm   rC     rv  zXFormersAttnProcessor.__init__r  r   r   r   r  ro   c                 O   s  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}||||}|d urc|j\}}}|	d|d}|j
d uru|
|dddd}||}|d u r|}n|jr||}||}||}|| }|| }|| }tjj||||| j|jd	}||j}||}|jd |}|jd |}|
dkr|dd
||||}|jr||	 }||j }|S )Nr   rL   r  r  r   r1   r   r   rw  r   )r   r  r
   rQ   r   r   r  r   r   expandr?   rW   rU   r   rX   rY   r   
contiguousrt   r   r   rs   rL   r   r   r   r^   r   r,   r+   )rh   r  r   r   r   r  r	  r
  r  r  r  r   r  r  r  
key_tokensr   query_tokensr   r   r   rl   rl   rm   r     sR   








zXFormersAttnProcessor.__call__r   r  rz  rl   rl   rl   rm   r     s&    	r   c                   @   X   e Zd ZdZdd Z			ddedejdeej deej d	eej d
ejfddZ	dS )rp   a  
    Processor for implementing flash attention using torch_npu. Torch_npu supports only fp16 and bf16 data types. If
    fp32 is used, F.scaled_dot_product_attention will be used for computation, but the acceleration effect on NPU is
    not significant.

    c                 C   s   t  stdd S )NzTAttnProcessorNPU requires torch_npu extensions and is supported only on npu devices.)r   r&  r'  rl   rl   rm   rC     s   zAttnProcessorNPU.__init__Nr  r   r   r   r  ro   c                 O   sn  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}|d urb||||}|||j	d|jd }|j
d urt|
|dddd}||}|d u r|}n|jr||}||}||}|jd }||j	 }||d|j	|dd}||d|j	|dd}||d|j	|dd}|jtjtjfv rtj||||j	d	d |d
t|jd  ddd
dddd }ntj||||ddd}|dd|d|j	| }||j}|jd |}|jd |}|
dkr(|dd||||}|jr0||	 }||j }|S )Nr   rL   r  r  r   r1   r   r   BNSDr   i   F)	input_layoutpse
atten_maskrL   pre_tockensnext_tockens	keep_probsyncinner_preciser   r)  r   )r   r  r
   rQ   r   r   r  r   r   r   r?   rW   rU   r   rX   rY   r   r   float16bfloat16	torch_npunpu_fusion_attentionmathsqrtre   rA   r   r   r^   r,   r+   rh   r  r   r   r   r  r	  r
  r  r  r  r   r  r  r  r  r   r   r   r   rE   r6  rl   rl   rm   r      sx   










zAttnProcessorNPU.__call__r  r-  rl   rl   rl   rm   rp     s&    	rp   c                   @   r  )rf   ro  c                 C   r$  r1  r%  r'  rl   rl   rm   rC   f  r2  zAttnProcessor2_0.__init__Nr  r   r   r   r  ro   c                 O   sH  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}|d urb||||}|||j	d|jd }|j
d urt|
|dddd}||}|d u r|}n|jr||}||}||}|jd }||j	 }||d|j	|dd}||d|j	|dd}||d|j	|dd}|jd ur||}|jd ur||}tj||||d	d
d}|dd|d|j	| }||j}|jd |}|jd |}|
dkr|dd||||}|jr||	 }||j }|S )Nr   rL   r  r  r   r1   r   r   r   Fr)  r   )r   r  r
   rQ   r   r   r  r   r   r   r?   rW   rU   r   rX   rY   rR   rS   re   rA   r   r   r   r^   r,   r+   r  rl   rl   rm   r   j  s\   














zAttnProcessor2_0.__call__r  r-  rl   rl   rl   rm   rf   a  s&    	rf   c                   @   sx   e Zd ZdZdd Zdejdeej dejfddZ						dd
e	dejde
ej de
ej de
ej dejfddZd	S )StableAudioAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the Stable Audio model. It applies rotary embedding on query and key vector, and allows MHA, GQA or MQA.
    c                 C   r$  )NrA   z[StableAudioAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC     r(  z$StableAudioAttnProcessor2_0.__init__xrh  ro   c           	      C   sb   ddl m} |d jd }|dd |f |d|d f }}|||ddd}tj||fdd	}|S )
Nr1   apply_rotary_embr   r   .Tr   use_realuse_real_unbind_dimr   )
embeddingsr  r   r   r   )	rh   r  rh  r  rot_dimx_to_rotatex_unrotated	x_rotatedoutrl   rl   rm   apply_partial_rotary_emb  s   "z4StableAudioAttnProcessor2_0.apply_partial_rotary_embNr  r   r   r   
rotary_embc                 C   s  ddl m} |}|j}|dkr#|j\}	}
}}||	|
|| dd}|d u r*|jn|j\}	}}|d urG||||	}||	|jd|jd }||}|d u rS|}n|j	r[|
|}||}||}|jd |j }|jd | }||	d|j|dd}||	d||dd}||	d||dd}||jkr|j| }tj||dd}tj||dd}|jd ur||}|jd ur||}|d ur7|j}|j}|tj}|tj}|d jd }|dd |f |d|d f }}|||d	d
d}tj||fdd}|js-|dd |f |d|d f }}|||d	d
d}tj||fdd}||}||}tj||||ddd}|dd|	d|j| }||j}|jd |}|jd |}|dkru|dd
|	|
||}|jr}|| }||j }|S )Nr1   r  r   r   r   r   r   .Tr   r  r   Fr)  )r  r  r   r   r  r   r   r   rW   rU   r   rX   rY   r   r   rR   rS   r   r   float32r   rH   re   rA   r   r^   r,   r+   )rh   r  r   r   r   r  r  r  r  r   r  r  r  r  r   r   r   r   r6  r   heads_per_kv_headquery_dtype	key_dtyper  query_to_rotatequery_unrotatedquery_rotatedkey_to_rotatekey_unrotatedkey_rotatedrl   rl   rm   r     sv   










""



z$StableAudioAttnProcessor2_0.__call__r  )r   r   r   r   rC   r   r   r   r  r   r   r   rl   rl   rl   rm   r    s4    
r  c                   @   d   e Zd ZdZdd Z				ddedejdeej deej d	eej d
eej dejfddZ	dS )HunyuanAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on query and key vector.
    c                 C   r$  r1  r%  r'  rl   rl   rm   rC   H	  r2  z HunyuanAttnProcessor2_0.__init__Nr  r   r   r   r  rp  ro   c                 C   sJ  ddl m} |}|jd ur|||}|j}	|	dkr.|j\}
}}}||
||| dd}|d u r5|jn|j\}
}}|d urR||||
}||
|jd|jd }|j	d urd|	|dddd}|
|}|d u rp|}n|jrx||}||}||}|jd }||j }||
d|j|dd}||
d|j|dd}||
d|j|dd}|jd ur||}|jd ur||}|d ur|||}|js|||}tj||||ddd}|dd|
d|j| }||j}|jd	 |}|jd |}|	dkr|dd
|
|||}|jr|| }||j }|S Nr1   r  r   r   r   r   Fr)  r   r   )r  r  rQ   r   r   r  r   r   r   r?   rW   rU   r   rX   rY   rR   rS   rH   re   rA   r   r   r   r^   r,   r+   )rh   r  r   r   r   r  rp  r  r  r  r   r  r  r  r  r   r   r   r   rE   r6  rl   rl   rm   r   L	  s`   	















z HunyuanAttnProcessor2_0.__call__NNNNr-  rl   rl   rl   rm   r  B	  s,    r  c                   @   r  )FusedHunyuanAttnProcessor2_0a  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0) with fused
    projection layers. This is used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on
    query and key vector.
    c                 C   r$  )NrA   z\FusedHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC   	  r(  z%FusedHunyuanAttnProcessor2_0.__init__Nr  r   r   r   r  rp  ro   c                 C   s  ddl m} |}|jd ur|||}|j}	|	dkr.|j\}
}}}||
||| dd}|d u r5|jn|j\}
}}|d urR||||
}||
|jd|jd }|j	d urd|	|dddd}|d u r|
|}|jd d }tj||dd\}}}n#|jr||}||}||}|jd d }tj||dd\}}|jd }||j }||
d|j|dd}||
d|j|dd}||
d|j|dd}|jd ur||}|jd ur||}|d ur|||}|js|||}tj||||dd	d
}|dd|
d|j| }||j}|jd |}|jd |}|	dkr7|dd|
|||}|jr?|| }||j }|S )Nr1   r  r   r   r   r   r   r   Fr)  r   r   )r  r  rQ   r   r   r  r   r   r   r?   r   r   rX  rU   r   rW   r   rR   rS   rH   re   rA   r   r   r   r^   r,   r+   )rh   r  r   r   r   r  rp  r  r  r  r   r  r  r  r  r   rY  rZ  r   r   r   kvrE   r6  rl   rl   rm   r   	  sf   	















z%FusedHunyuanAttnProcessor2_0.__call__r  r-  rl   rl   rl   rm   r  	  ,    
r  c                   @   r  )PAGHunyuanAttnProcessor2_0W  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the HunyuanDiT model. It applies a normalization layer and rotary embedding on query and key vector. This
    variant of the processor employs [Pertubed Attention Guidance](https://arxiv.org/abs/2403.17377).
    c                 C   r$  )NrA   zZPAGHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC   
  r(  z#PAGHunyuanAttnProcessor2_0.__init__Nr  r   r   r   r  rp  ro   c                 C   s  ddl m} |}|jd ur|||}|j}	|	dkr.|j\}
}}}||
||| dd}|d\}}|d u r<|jn|j\}
}}|d urY||||
}||
|j	d|jd }|j
d urk|
|dddd}||}|d u rw|}n|jr||}||}||}|jd }||j	 }||
d|j	|dd}||
d|j	|dd}||
d|j	|dd}|jd ur||}|jd ur||}|d ur|||}|js|||}tj||||ddd}|dd|
d|j	| }||j}|jd	 |}|jd |}|	dkr|dd
|
|||}|j
d ur0|
|dddd}||}||j}|jd	 |}|jd |}|	dkrZ|dd
|
|||}t||g}|jri|| }||j }|S r  )r  r  rQ   r   r   r  r   r>  r   r   r?   rW   rU   r   rX   rY   rR   rS   rH   re   rA   r   r   r   r^   r   r   r,   r+   )rh   r  r   r   r   r  rp  r  r  r  r   r  r  r  rB  rC  r  r   r   r   r   rE   r6  rl   rl   rm   r   
  st   	

















z#PAGHunyuanAttnProcessor2_0.__call__r  r-  rl   rl   rl   rm   r  
  r  r  c                   @   r  )PAGCFGHunyuanAttnProcessor2_0r  c                 C   r$  )NrA   z]PAGCFGHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC   
  r(  z&PAGCFGHunyuanAttnProcessor2_0.__init__Nr  r   r   r   r  rp  ro   c                 C   s  ddl m} |}|jd ur|||}|j}	|	dkr.|j\}
}}}||
||| dd}|d\}}}t	||g}|d u rD|jn|j\}
}}|d ura|
|||
}||
|jd|jd }|jd urs||dddd}||}|d u r|}n|jr||}||}||}|jd }||j }||
d|j|dd}||
d|j|dd}||
d|j|dd}|jd ur||}|jd ur||}|d ur|||}|js|||}tj||||ddd	}|dd|
d|j| }||j}|jd
 |}|jd |}|	dkr%|dd|
|||}|jd ur8||dddd}||}||j}|jd
 |}|jd |}|	dkrb|dd|
|||}t	||g}|jrq|| }||j }|S )Nr1   r  r   r   r   r   r   Fr)  r   r   )r  r  rQ   r   r   r  r   r>  r   r   r   r   r?   rW   rU   r   rX   rY   rR   rS   rH   re   rA   r   r   r   r^   r,   r+   )rh   r  r   r   r   r  rp  r  r  r  r   r  r  r  rU  rB  rC  r  r   r   r   r   rE   r6  rl   rl   rm   r   
  sv   	

















z&PAGCFGHunyuanAttnProcessor2_0.__call__r  r-  rl   rl   rl   rm   r  
  r  r  c                   @   sh   e Zd ZdZdd Z				ddedejdejdeej d	eej d
eej dee	 dejfddZ
dS )LuminaAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the LuminaNextDiT model. It applies a s normalization layer and rotary embedding on query and key vector.
    c                 C   r$  r1  r%  r'  rl   rl   rm   rC     r2  zLuminaAttnProcessor2_0.__init__Nr  r   r   r   query_rotary_embkey_rotary_embbase_sequence_lengthro   c                 C   s0  ddl m} |j}	|	dkr!|j\}
}}}||
||| dd}|j\}
}}||}||}||}|jd }|jd }||j	 }|j
}|| }|jd urV||}|jd ur`||}||
d|j	|}||
d||}||
d||}|d ur|||dd}|d ur|||dd}||||}}|d u rd }n|d urtt|||j }n|j}|j	| }|dkr|dddd|ddd}|dddd|ddd}| |
ddd}|d|j	|d}|dd}|dd}|dd}tj|||||d	}|dd|}|S )
Nr1   r  r   r   r   F)r  r   )r*  rL   )r  r  r   r   r  r   rW   rX   rY   r   r   rR   rS   r   r  r  logrL   r   repeatflattenr   r{  re   rA   )rh   r  r   r   r   r  r  r  r  r  r   r  r  r  r  r   r   r   r   r   rE   r6  r   r   softmax_scalen_reprl   rl   rm   r     sZ   











  
zLuminaAttnProcessor2_0.__call__r  )r   r   r   r   rC   r   r   r   r   r   r   rl   rl   rl   rm   r    s0    		r  c                   @   r  )FusedAttnProcessor2_0u  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). It uses
    fused projection layers. For self-attention modules, all projection matrices (i.e., query, key, value) are fused.
    For cross-attention modules, key and value projection matrices are fused.

    <Tip warning={true}>

    This API is currently 🧪 experimental in nature and can change in future.

    </Tip>
    c                 C   r$  )NrA   z`FusedAttnProcessor2_0 requires at least PyTorch 2.0, to use it. Please upgrade PyTorch to > 2.0.r%  r'  rl   rl   rm   rC   o  r(  zFusedAttnProcessor2_0.__init__Nr  r   r   r   r  ro   c                 O   s  t |dks|dd d urd}tdd| |}	|jd ur#|||}|j}
|
dkr>|j\}}}}||||| dd}|d u rE|jn|j\}}}|d urb||||}|||j	d|jd }|j
d urt|
|dddd}|d u r||}|jd d	 }tj||dd
\}}}n#|jr||}||}||}|jd d }tj||dd
\}}|jd }||j	 }||d|j	|dd}||d|j	|dd}||d|j	|dd}|jd ur||}|jd ur||}tj||||ddd}|dd|d|j	| }||j}|jd |}|jd |}|
dkr6|dd||||}|jr>||	 }||j }|S )Nr   rL   r  r  r   r1   r   r   r   r   r   Fr)  r   )r   r  r
   rQ   r   r   r  r   r   r   r?   r   r   rX  rU   r   rW   r   rR   rS   re   rA   r   r   r   r^   r,   r+   )rh   r  r   r   r   r  r	  r
  r  r  r  r   r  r  r  r  r   rY  rZ  r   r   r   r  rE   r6  rl   rl   rm   r   u  sb   














zFusedAttnProcessor2_0.__call__r  r-  rl   rl   rl   rm   r  b  s&    
	r  c                       s   e Zd ZdZ							ddededee d	ee d
ededee f fddZ			dde
dejdeej deej dejf
ddZ  ZS )r   az  
    Processor for implementing memory efficient attention using xFormers for the Custom Diffusion method.

    Args:
    train_kv (`bool`, defaults to `True`):
        Whether to newly train the key and value matrices corresponding to the text features.
    train_q_out (`bool`, defaults to `True`):
        Whether to newly train query matrices corresponding to the latent image features.
    hidden_size (`int`, *optional*, defaults to `None`):
        The hidden size of the attention layer.
    cross_attention_dim (`int`, *optional*, defaults to `None`):
        The number of channels in the `encoder_hidden_states`.
    out_bias (`bool`, defaults to `True`):
        Whether to include the bias parameter in `train_q_out`.
    dropout (`float`, *optional*, defaults to 0.0):
        The dropout probability to use.
    attention_op (`Callable`, *optional*, defaults to `None`):
        The base
        [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to use
        as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best operator.
    TFNr   ry   rz   r{   r   r'   r   rs   c                    s   t    || _|| _|| _|| _|| _| jr-tj|p||dd| _	tj|p'||dd| _
| jrVtj||dd| _tg | _| jtj|||d | jt| d S d S r  )rB   rC   ry   rz   r{   r   rs   r   rV   r|   r  r  r]   r  r_   r`   )rh   ry   rz   r{   r   r'   r   rs   rj   rl   rm   rC     s   

z-CustomDiffusionXFormersAttnProcessor.__init__r  r   r   r   ro   c                 C   s  |d u r|j n|j \}}}||||}| jr#| ||jjj}n|||jjj}|d u r7d}	|}n
d}	|jrA|	|}| j
rk| || jjj}
| || jjj}|
|jjj}
||jjj}n
||}
||}|	rt|
}|d d d dd d f d |d d d dd d f< ||
 d| |
   }
|| d| |   }|| }||
 }
|| }tjj||
||| j|jd}||j}||}| jr| jd |}| jd |}|S |jd |}|jd |}|S )NFTr1   r   rw  r   )r   r   rz   r  r   rW   r   r   rU   r   ry   r|   r  rX   rY   r   r  r  r   r|  rt   r   r   rs   rL   r   r  r^   )rh   r  r   r   r   r   r  r   r   r  r   r   r  rl   rl   rm   r     sN   



4
z-CustomDiffusionXFormersAttnProcessor.__call__)TFNNTr   Nr   )r   r   r   r   r   r   r   r   r   rC   r   r   r   r   r  rl   rl   rj   rm   r     sJ     r   c                       r  )r   u  
    Processor for implementing attention for the Custom Diffusion method using PyTorch 2.0’s memory-efficient scaled
    dot-product attention.

    Args:
        train_kv (`bool`, defaults to `True`):
            Whether to newly train the key and value matrices corresponding to the text features.
        train_q_out (`bool`, defaults to `True`):
            Whether to newly train query matrices corresponding to the latent image features.
        hidden_size (`int`, *optional*, defaults to `None`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*, defaults to `None`):
            The number of channels in the `encoder_hidden_states`.
        out_bias (`bool`, defaults to `True`):
            Whether to include the bias parameter in `train_q_out`.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
    TNr   ry   rz   r{   r   r'   r   c                    r  r  r  r  rj   rl   rm   rC   T  r  z(CustomDiffusionAttnProcessor2_0.__init__r  r   r   r   ro   c                 C   s  |j \}}}||||}| jr| |}n||}|d u r$d}	|}n
d}	|jr.||}| jrX| |	| jj
j}
| |	| jj
j}|
	|jj
j}
|	|jj
j}n
||}
||}|	rt|
}|d d d dd d f d |d d d dd d f< ||
 d| |
   }
|| d| |   }|j d }||j }||d|j|dd}|
|d|j|dd}
||d|j|dd}tj||
||ddd}|dd|d|j| }|	|j}| jr| jd |}| jd |}|S |jd |}|jd |}|S )	NFTr1   r   r   r   r)  r   )r   r   rz   r  rW   rU   r   ry   r|   r   r   r   r  rX   rY   r   r  r  r   r  r   re   rA   r   r  r^   )rh   r  r   r   r   r   r  r   r   r  r   r   r  rE   r6  rl   rl   rm   r   n  sP   




4

z(CustomDiffusionAttnProcessor2_0.__call__r   r   r!  rl   rl   rj   rm   r   @  sD    r   c                   @   sR   e Zd ZdZdefddZ		ddedejde	ej d	e	ej d
ejf
ddZ
dS )r   a'  
    Processor for implementing sliced attention.

    Args:
        slice_size (`int`, *optional*):
            The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
            `attention_head_dim` must be a multiple of the `slice_size`.
    r   c                 C   rt  r   r   rh   r   rl   rl   rm   rC     rv  zSlicedAttnProcessor.__init__Nr  r   r   r   ro   c                 C   s
  |}|j }|dkr|j\}}}	}
||||	|
 dd}|d u r$|jn|j\}}}||||}|jd urC||dddd}||}|jd }||}|d u rY|}n|jra|	|}|
|}||}||}||}|j\}}}tj||||j f|j|jd}t|d | j d D ]=}|| j }|d | j }||| }||| }|d ur||| nd }||||}t|||| }||||< q||}|jd |}|jd |}|dkr|dd|||	|
}|jr|| }||j }|S )Nr   r1   r   r   r<  r   r   )r   r   r  r   r   r?   rW   r   rU   r   rX   rY   r   r   r   rx   r   ranger   r   r  r   r^   r   r,   r+   )rh   r  r   r   r   r  r  r   r  r  r  r  r   r   r   r   r   batch_size_attentionr~  i	start_idxend_idxquery_slice	key_sliceattn_mask_slice
attn_slicerl   rl   rm   r     sV   











zSlicedAttnProcessor.__call__r   )r   r   r   r   r   rC   r   r   r   r   r   rl   rl   rl   rm   r     s     	r   c                   @   sX   e Zd ZdZdd Z			ddddejdeej d	eej d
eej dejfddZdS )r   ah  
    Processor for implementing sliced attention with extra learnable key and value matrices for the text encoder.

    Args:
        slice_size (`int`, *optional*):
            The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
            `attention_head_dim` must be a multiple of the `slice_size`.
    c                 C   rt  r   r  r  rl   rl   rm   rC     rv  z#SlicedAttnAddedKVProcessor.__init__Nr  r   r   r   r   r  ro   c                 C   s4  |}|j d ur| ||}||jd |jd ddd}|j\}}}	||||}|d u r2|}n|jr:||}||dddd}||}
|
jd }|	|
}
|
|}||}|	|}|	|}|js||}||}|	|}|	|}tj||gdd}tj||gdd}n|}|}|
j\}}}	tj||||j f|
j|
jd}t|d | j d D ]=}|| j }|d | j }|
|| }||| }|d ur||| nd }||||}t|||| }||||< q||}|jd |}|jd |}|dd|j}|| }|S )Nr   r1   r   r   r   r<  r   )rQ   r  r   r   r   rU   r   r?   rW   r   rZ   r[   r)   rX   rY   r   r   r   r   rx   r   r  r   r   r  r   r^   r   )rh   r  r   r   r   r  r  r   r  r   r   r   r"  r#  r   r   r  r~  r  r  r  r  r  r  r  rl   rl   rm   r     s\   
"













z#SlicedAttnAddedKVProcessor.__call__r  )	r   r   r   r   rC   r   r   r   r   rl   rl   rl   rm   r   	  s&    	r   c                       sF   e Zd ZdZdedef fddZdejdejdejfd	d
Z  Z	S )rP   ai  
    Spatially conditioned normalization as defined in https://arxiv.org/abs/2209.09002.

    Args:
        f_channels (`int`):
            The number of channels for input to group normalization layer, and output of the spatial norm layer.
        zq_channels (`int`):
            The number of channels for the quantized vector as described in the paper.
    r8   r9   c                    sN   t    tj|dddd| _tj||dddd| _tj||dddd| _d S )Nr   gư>Tr4   r1   r   )kernel_sizestrider   )rB   rC   r   rO   
norm_layerConv2dconv_yconv_b)rh   r8   r9   rj   rl   rm   rC   p  s   
zSpatialNorm.__init__fzqro   c                 C   sD   |j dd  }tj||dd}| |}|| | | | }|S )Nr   nearest)r?  mode)r   re   interpolater  r  r  )rh   r  r  f_sizenorm_fnew_frl   rl   rm   r   z  s
   
zSpatialNorm.forward)
r   r   r   r   r   rC   r   r   r   r  rl   rl   rj   rm   rP   e  s    
$
rP   c                       n   e Zd ZdZd fdd	Z					ddedejd	eej d
eej deej de	deej fddZ
  ZS )IPAdapterAttnProcessora  
    Attention processor for Multiple IP-Adapters.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `Tuple[int]` or `List[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or List[`float`], defaults to 1.0):
            the weight scale of image prompt.
    Nr   r   c                    s   t    | _ | _t|ttfs|g}|| _t|ts$|gt| }t|t|kr0t	d|| _
t fddtt|D | _t fddtt|D | _d S )NJ`scale` should be a list of integers with the same length as `num_tokens`.c                       g | ]
}t j d dqS Fr@   r   rV   r   r   r   r{   rl   rm   r         z3IPAdapterAttnProcessor.__init__.<locals>.<listcomp>c                    r  r  r  r  r  rl   rm   r     r  )rB   rC   r{   r   r~   tuplelist
num_tokensr   rN   rL   r   r]   r  to_k_ipto_v_iprh   r{   r   r  rL   rj   r  rm   rC     s"   


zIPAdapterAttnProcessor.__init__r  r   r   r   r  rL   r   c           %   
   C   s  |}|d ur@t |tr|\}}	n0d}
tdd|
dd |jd | jd  }|d d d |d d f |d d |d d d f g}}	|jd urK|||}|j}|dkrf|j\}}}}||||| dd	}|d u rm|jn|j\}}}|	|||}|j
d ur|
|dd	dd	}||}|d u r|}n|jr||}||}||}||}||}||}||||}t||}||}|d urht |tst|d}t|t| j  krt|	ksn td
t| dt| j dt|	 dtt|| j|	D ]Y\}\}}}t |tjr!|jdkr%td|jd |jd krCtd|jd  d|jd  d| t |tret||jd ksetd|jd  dt| d| qnd gt| j }t|	| j| j| j|D ]\}}}}}d}t |trt dd |D rd}n|dkrd}|sV|d ur,t |ts|g|jd  }|jd }t!|D ]l}||d d |d d d d f } ||d d |d d d d f }!|| } ||!}!||| d }"t|"|!}#||#}#t"#|d d |d d d d f ||#jd |#jd	 }$|$j$|j%|j&d}$||| |#|$   }qq{||} ||}!|| } ||!}!||| d }"t|"|!}||}|||  }q{|j'd |}|j'd |}|dkrw|dd(||||}|j)r|| }||j* }|S )NYou have passed a tensor as `encoder_hidden_states`. This is deprecated and will be removed in a future release. Please make sure to update your script to pass `encoder_hidden_states` as a tuple to suppress this warning.!encoder_hidden_states not a tupler  Fstandard_warnr1   r   r   r   "Length of ip_adapter_masks array ()) must match length of self.scale array (") and number of ip_hidden_states ()Each element of the ip_adapter_masks array should be a tensor with shape [1, num_images_for_ip_adapter, height, width]. Please use `IPAdapterMaskProcessor` to preprocess your maskNumber of masks (&) does not match number of ip images () at index #) does not match number of scales (c                 s       | ]}|d kV  qdS r   Nrl   r   srl   rl   rm   	<genexpr>      z2IPAdapterAttnProcessor.__call__.<locals>.<genexpr>Tr   r   r   )+r~   r  r
   r   r  rQ   r   r  r   r   r?   rW   rU   r   rX   rY   r   r   r   r  r   r   r  r   r   rL   rN   	enumeratezipr   r  r  allr  r	   
downsampler   r   rx   r^   r   r,   r+   )%rh   r  r   r   r   r  rL   r   r  ip_hidden_statesr  end_posr  r   r  r  r  r  r   r   r   r   r   indexmaskip_statecurrent_ip_hidden_statesr  r  skipcurrent_num_imagesr  ip_keyip_valueip_attention_probs_current_ip_hidden_statesmask_downsamplerl   rl   rm   r     s   














&  


  







zIPAdapterAttnProcessor.__call__Nr  r   NNNr   Nr   r   r   r   rC   r   r   r   r   r   r   r  rl   rl   rj   rm   r    s.    r  c                       r  )IPAdapterAttnProcessor2_0a  
    Attention processor for IP-Adapter for PyTorch 2.0.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `Tuple[int]` or `List[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or `List[float]`, defaults to 1.0):
            the weight scale of image prompt.
    Nr  r   c                    s   t    ttdst| jj d| _ | _t	|t
tfs#|g}|| _t	|ts2|gt| }t|t|kr>td|| _t fddtt|D | _t fddtt|D | _d S )NrA   z@ requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r  c                    r  r  r  r  r  rl   rm   r   m  r  z6IPAdapterAttnProcessor2_0.__init__.<locals>.<listcomp>c                    r  r  r  r  r  rl   rm   r   p  r  )rB   rC   rd   re   r&  rk   r   r{   r   r~   r  r  r  r   rN   rL   r   r]   r  r  r  r  rj   r  rm   rC   W  s*   



z"IPAdapterAttnProcessor2_0.__init__r  r   r   r   r  rL   r   c           %   
   C   s  |}|d ur@t |tr|\}}	n0d}
tdd|
dd |jd | jd  }|d d d |d d f |d d |d d d f g}}	|jd urK|||}|j}|dkrf|j\}}}}||||| dd	}|d u rm|jn|j\}}}|d ur|	|||}|||j
d
|jd
 }|jd ur||dd	dd	}||}|d u r|}n|jr||}||}||}|jd
 }||j
 }||d
|j
|dd	}||d
|j
|dd	}||d
|j
|dd	}tj||||ddd}|dd	|d
|j
| }||j}|d urt |tst|d}t|t| j  kr/t|	ksEn tdt| dt| j dt|	 dtt|| j|	D ]Y\}\}}}t |tjrb|jdkrftd|jd |jd krtd|jd  d|jd  d| t |trt||jd kstd|jd  dt| d| qNnd gt| j }t|	| j| j | j!|D ]\}}}}}d}t |trt"dd |D rd}n|dkrd}|s|d urt |ts|g|jd  }|jd }t#|D ]} ||d d | d d d d f }!||d d | d d d d f }"|!|d
|j
|dd	}!|"|d
|j
|dd	}"tj||!|"d ddd}#|#dd	|d
|j
| }#|#|j}#t$%|d d | d d d d f ||#jd |#jd	 }$|$j|j|j&d}$|||  |#|$   }qq||}!||}"|!|d
|j
|dd	}!|"|d
|j
|dd	}"tj||!|"d ddd}|dd	|d
|j
| }||j}|||  }q|j'd |}|j'd |}|dkr|d
d||||}|j(r|| }||j) }|S )Nr  r  r  Fr  r1   r   r   r   r   r   r)  r  r  r  r  r  r  r  r  r  c                 s   r  r  rl   r  rl   rl   rm   r    r  z5IPAdapterAttnProcessor2_0.__call__.<locals>.<genexpr>Tr   r   )*r~   r  r
   r   r  rQ   r   r  r   r   r   r?   rW   rU   r   rX   rY   re   rA   r   r   r   r   r  r   r   rL   rN   r  r  r   r   r  r  r  r  r	   r  rx   r^   r,   r+   )%rh   r  r   r   r   r  rL   r   r  r  r  r  r  r   r  r  r  r  r   r   r   r   rE   r6  r  r   r  r  r  r  r  r  r  r  r  r  r	  rl   rl   rm   r   s  s
  











(  


  

z"IPAdapterAttnProcessor2_0.__call__r
  r  r  rl   rl   rj   rm   r  H  s.     r  c                   @   X   e Zd ZdZdd Z			ddedejdeej deej d	eej d
ej	fddZ
dS )PAGIdentitySelfAttnProcessor2_0
    Processor for implementing PAG using scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
    PAG reference: https://arxiv.org/abs/2403.17377
    c                 C   r$  )NrA   z_PAGIdentitySelfAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC   4  r(  z(PAGIdentitySelfAttnProcessor2_0.__init__Nr  r   r   r   r  ro   c                 C   sf  |}|j d ur| ||}|j}|dkr(|j\}}	}
}|||	|
| dd}|d\}}|j\}}}|d urL||||}|||jd|jd }|jd ur^||dddd}|	|}|
|}||}|jd }||j }||d|j|dd}||d|j|dd}||d|j|dd}tj||||ddd}|dd|d|j| }||j}|jd |}|jd |}|dkr|dd	||	|
|}|j\}}}|jd ur||dddd}||}||j}|jd |}|jd |}|dkr|dd	||	|
|}t||g}|jr,|| }||j }|S )
Nr   r1   r   r   r   Fr)  r   r   )rQ   r   r   r  r   r>  r   r   r?   rW   rX   rY   re   rA   r   r   r   r^   r   r   r,   r+   )rh   r  r   r   r   r  r  r  r   r  r  r  rB  rC  r  r   r   r   r   rE   r6  rl   rl   rm   r   :  sZ   










z(PAGIdentitySelfAttnProcessor2_0.__call__r  rr  rl   rl   rl   rm   r  .  &    
r  c                   @   r  )"PAGCFGIdentitySelfAttnProcessor2_0r  c                 C   r$  )NrA   zbPAGCFGIdentitySelfAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r%  r'  rl   rl   rm   rC     r(  z+PAGCFGIdentitySelfAttnProcessor2_0.__init__Nr  r   r   r   r  ro   c                 C   sz  |}|j d ur| ||}|j}|dkr(|j\}}	}
}|||	|
| dd}|d\}}}t||g}|j\}}}|d urT||||}|||j	d|jd }|j
d urf|
|dddd}||}||}||}|jd }||j	 }||d|j	|dd}||d|j	|dd}||d|j	|dd}tj||||ddd}|dd|d|j	| }||j}|jd	 |}|jd |}|dkr|dd
||	|
|}|j\}}}|j
d ur|
|dddd}||}|}||j}|jd	 |}|jd |}|dkr'|dd
||	|
|}t||g}|jr6|| }||j }|S )Nr   r1   r   r   r   r   Fr)  r   r   )rQ   r   r   r  r   r>  r   r   r   r   r?   rW   rX   rY   re   rA   r   r   r   r^   r,   r+   )rh   r  r   r   r   r  r  r  r   r  r  r  rU  rB  rC  r  r   r   r   r   rE   r6  rl   rl   rm   r     s^   










z+PAGCFGIdentitySelfAttnProcessor2_0.__call__r  rr  rl   rl   rl   rm   r    r  r  c                   @      e Zd Zdd ZdS )LoRAAttnProcessorc                 C      d S r   rl   r'  rl   rl   rm   rC        zLoRAAttnProcessor.__init__Nr   r   r   rC   rl   rl   rl   rm   r        r  c                   @   r  )LoRAAttnProcessor2_0c                 C   r  r   rl   r'  rl   rl   rm   rC     r  zLoRAAttnProcessor2_0.__init__Nr  rl   rl   rl   rm   r    r  r  c                   @   r  )LoRAXFormersAttnProcessorc                 C   r  r   rl   r'  rl   rl   rm   rC     r  z"LoRAXFormersAttnProcessor.__init__Nr  rl   rl   rl   rm   r    r  r  c                   @   r  )LoRAAttnAddedKVProcessorc                 C   r  r   rl   r'  rl   rl   rm   rC     r  z!LoRAAttnAddedKVProcessor.__init__Nr  rl   rl   rl   rm   r    r  r  )Hr   r  typingr   r   r   r   r   r   torch.nn.functionalr   
functionalre   image_processorr	   utilsr
   r   utils.import_utilsr   r   utils.torch_utilsr   r   
get_loggerr   r   r  rt   xformers.opsr   r   r/   r   r   r   r/  r;  rT  rW  r\  rd  rm  rn  rq  r   r   rp   rf   r  r  r  r  r  r  r  r   r   r   r   rP   r  r  r  r  r  r  r  r  ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORSr   rl   rl   rl   rm   <module>   s   

     'HhCLK  &O]bBbG^m^ bg{|`jtrW\ G gcg	