o
    Gis                     @   s  d dl Z d dlmZ d dlZd dlZd dlmZ d dlm  m	Z
 ddlmZmZ ddlmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z( e)e*Z+d2d3ddZ,d2d3ddZ-d2d3ddZ.dddddej/fde0dej1e0B de2fdd Z3G d!d" d"Z4G d#d dejj5eZ6G d$d% d%ejj5Z7G d&d' d'ej5Z8G d(d) d)ej5Z9G d*d+ d+ejj5Z:eG d,d- d-ej5Z;eG d.d/ d/ej5Z<G d0d1 d1e$eeeeZ=dS )4    N)Any   )ConfigMixinregister_to_config)FromOriginalModelMixinPeftAdapterMixin)apply_lora_scalelogging)maybe_allow_in_graph   )AttentionModuleMixinFeedForward)dispatch_attention_fn)
CacheMixin)TimestepEmbeddingapply_rotary_embget_timestep_embedding)Transformer2DModelOutput)
ModelMixin)AdaLayerNormContinuousAdaLayerNormZeroAdaLayerNormZeroSingleattnBriaAttentionc           	      C   sj   |  |}| |}| |}d  } }}|d ur-| jd ur-| |}| |}| |}||||||fS N)to_qto_kto_vadded_kv_proj_dim
add_q_proj
add_k_proj
add_v_proj	r   hidden_statesencoder_hidden_statesquerykeyvalueencoder_queryencoder_keyencoder_value r+   b/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/models/transformers/transformer_bria.py_get_projections   s   





r-   c           	      C   sb   |  |jddd\}}}d } }}|d ur)t| dr)| |jddd\}}}||||||fS )Nr   dimr   to_added_qkv)to_qkvchunkhasattrr1   r"   r+   r+   r,   _get_fused_projections'   s
   r5   c                 C   s   | j r	t| ||S t| ||S r   )fused_projectionsr5   r-   )r   r#   r$   r+   r+   r,   _get_qkv_projections1   s   r7   g     @F      ?Tr0   posthetac              	   C   s  | d dksJ t |trt|}t |tjrt|}|| }d|tjd| d||jdd| d  |    | }t||}|r^|r^|	 j
ddd }	| j
ddd }
|	|
fS |rtj|	 |	 gdd }	tj| | gdd }
|	|
fS tt||}|S )	a  
    Precompute the frequency tensor for complex exponentials (cis) with given dimensions.

    This function calculates a frequency tensor with complex exponentials using the given dimension 'dim' and the end
    index 'end'. The 'theta' parameter scales the frequencies. The returned tensor contains complex values in complex64
    data type.

    Args:
        dim (`int`): Dimension of the frequency tensor.
        pos (`np.ndarray` or `int`): Position indices for the frequency tensor. [S] or scalar
        theta (`float`, *optional*, defaults to 10000.0):
            Scaling factor for frequency computation. Defaults to 10000.0.
        use_real (`bool`, *optional*):
            If True, return real part and imaginary part separately. Otherwise, return complex numbers.
        linear_factor (`float`, *optional*, defaults to 1.0):
            Scaling factor for the context extrapolation. Defaults to 1.0.
        ntk_factor (`float`, *optional*, defaults to 1.0):
            Scaling factor for the NTK-Aware RoPE. Defaults to 1.0.
        repeat_interleave_real (`bool`, *optional*, defaults to `True`):
            If `True` and `use_real`, real part and imaginary part are each interleaved with themselves to reach `dim`.
            Otherwise, they are concateanted with themselves.
        freqs_dtype (`torch.float32` or `torch.float64`, *optional*, defaults to `torch.float32`):
            the dtype of the frequency tensor.
    Returns:
        `torch.Tensor`: Precomputed frequency tensor with complex exponentials. [S, D/2]
    r   r   r8   )dtypedeviceN   r/   r.   )
isinstanceinttorcharangenpndarray
from_numpyr<   outercosrepeat_interleavefloatsincatpolar	ones_like)r0   r9   r:   use_reallinear_factor
ntk_factorrepeat_interleave_realfreqs_dtypefreqs	freqs_cos	freqs_sin	freqs_cisr+   r+   r,   get_1d_rotary_pos_embed7   s.   $


(rV   c                   @   sX   e Zd ZdZdZdd Z			ddddejdejdejdB d	ejdB d
ejfddZdS )BriaAttnProcessorNc                 C   s    t tdst| jj dd S )Nscaled_dot_product_attentionz; requires PyTorch 2.0. Please upgrade your pytorch version.)r4   FImportError	__class____name__selfr+   r+   r,   __init__}   s   
zBriaAttnProcessor.__init__r   r   r#   r$   attention_maskimage_rotary_embreturnc                 C   s  t |||\}}}}	}
}|d|jdf}|d|jdf}|d|jdf}||}||}|jd urv|	d|jdf}	|
d|jdf}
|d|jdf}||	}	||
}
tj	|	|gdd}tj	|
|gdd}tj	||gdd}|d urt
||dd}t
||dd}t||||| j| jd}|dd}||j}|d ur|j|jd |jd |jd  gdd\}}|jd |}|jd |}||}||fS |S )	Nr.   r=   r/   )sequence_dim)	attn_maskbackendparallel_configr   r   r   )r7   	unflattenheadsnorm_qnorm_kr   norm_added_qnorm_added_kr@   rJ   r   r   _attention_backend_parallel_configflattentor;   split_with_sizesshapeto_out
to_add_out)r^   r   r#   r$   r`   ra   r%   r&   r'   r(   r)   r*   r+   r+   r,   __call__   sN   






zBriaAttnProcessor.__call__NNN)	r\   
__module____qualname__rm   rn   r_   r@   Tensorru   r+   r+   r+   r,   rW   y   s(    rW   c                       s   e Zd ZeZegZ													dded	ed
ededededB dedB dededededB dedef fddZ				dde
jde
jdB de
jdB de
jdB de
jf
ddZ  ZS ) r      @           FNTh㈵>	query_dimrh   dim_headdropoutbiasr   added_proj_biasout_biasepsout_dimcontext_pre_onlypre_onlyelementwise_affinec                    s  t    || _|
d ur|
n|| | _|| _|| _|| _|
d ur"|
n|| _|| _|| _	|
d ur3|
| n|| _
|| _|| _tjj||	|d| _tjj||	|d| _tjj|| j|d| _tjj|| j|d| _tjj|| j|d| _| j	stjg | _| jtjj| j| j|d | jtj| |d urtjj||	d| _tjj||	d| _tjj|| j|d| _tjj|| j|d| _tjj|| j|d| _tjj| j||d| _|d u r|   }| !| d S )N)r   r   r   )r   )"superr_   head_dim	inner_dimr~   use_biasr   r   r   r   rh   r   r   r@   nnRMSNormri   rj   Linearr   r   r   
ModuleListrs   appendDropoutrk   rl   r   r    r!   rt   _default_processor_clsset_processor)r^   r~   rh   r   r   r   r   r   r   r   r   r   r   r   	processorr[   r+   r,   r_      s>   
zBriaAttention.__init__r#   r$   r`   ra   rb   c                    s   t t| jjj  ddh fdd| D }t|dkr1t	
d| d| jjj d  fd	d
| D }| j| ||||fi |S )Nip_adapter_masksip_hidden_statesc                    s$   g | ]\}}| vr|vr|qS r+   r+   ).0k_attn_parametersquiet_attn_parametersr+   r,   
<listcomp>  s   $ z)BriaAttention.forward.<locals>.<listcomp>r   zattention_kwargs z are not expected by z and will be ignored.c                    s   i | ]\}}| v r||qS r+   r+   )r   r   w)r   r+   r,   
<dictcomp>  s    z)BriaAttention.forward.<locals>.<dictcomp>)setinspect	signaturer   ru   
parameterskeysitemslenloggerwarningr[   r\   )r^   r#   r$   r`   ra   kwargsunused_kwargsr+   r   r,   forward   s   zBriaAttention.forward)rz   r{   r|   FNTTr}   NNFTNrv   )r\   rw   rx   rW   r   _available_processorsr?   rH   boolr_   r@   ry   r   __classcell__r+   r+   r   r,   r      st    	
9c                       @   e Zd Zdedee f fddZdejdejfddZ  Z	S )	BriaEmbedNDr:   axes_dimc                       t    || _|| _d S r   r   r_   r:   r   r^   r:   r   r   r+   r,   r_        

zBriaEmbedND.__init__idsrb   c              	   C      |j d }g }g }| }|jjdk}|rtjntj}t|D ]"}t| j	| |d d |f | j
dd|d\}	}
||	 ||
 qtj|dd|j}tj|dd|j}||fS Nr.   mpsT)r:   rP   rM   rQ   r/   rr   rH   r<   typer@   float32float64rangerV   r   r:   r   rJ   rp   r^   r   n_axescos_outsin_outr9   is_mpsrQ   irF   rI   rS   rT   r+   r+   r,   r     (   


zBriaEmbedND.forward
r\   rw   rx   r?   listr_   r@   ry   r   r   r+   r+   r   r,   r         r   c                	       s:   e Zd Z	ddedededef fddZd	d
 Z  ZS )BriaTimestepsr=   '  num_channelsflip_sin_to_cosdownscale_freq_shiftscalec                    s,   t    || _|| _|| _|| _|| _d S r   )r   r_   r   r   r   r   
time_theta)r^   r   r   r   r   r   r   r+   r,   r_   *  s   

zBriaTimesteps.__init__c                 C   s"   t || j| j| j| j| jd}|S )N)r   r   r   
max_period)r   r   r   r   r   r   )r^   	timestepst_embr+   r+   r,   r   4  s   zBriaTimesteps.forward)r=   r   )	r\   rw   rx   r?   r   rH   r_   r   r   r+   r+   r   r,   r   )  s    
r   c                       s$   e Zd Z fddZdd Z  ZS )BriaTimestepProjEmbeddingsc                    s.   t    tddd|d| _td|d| _d S )N   Tr   )r   r   r   r   )in_channelstime_embed_dim)r   r_   r   	time_projr   timestep_embedder)r^   embedding_dimr   r   r+   r,   r_   A  s
   
z#BriaTimestepProjEmbeddings.__init__c                 C   s    |  |}| |j|d}|S )Nr;   )r   r   rp   )r^   timestepr;   timesteps_projtimesteps_embr+   r+   r,   r   I  s   
z"BriaTimestepProjEmbeddings.forward)r\   rw   rx   r_   r   r   r+   r+   r   r,   r   @  s    r   c                       r   )	BriaPosEmbedr:   r   c                    r   r   r   r   r   r+   r,   r_   Q  r   zBriaPosEmbed.__init__r   rb   c              	   C   r   r   r   r   r+   r+   r,   r   V  r   zBriaPosEmbed.forwardr   r+   r+   r   r,   r   O  r   r   c                       s   e Zd Z	ddededededef
 fdd	Z	
	
ddejdejdejde	ejejf d
B de
eef d
B de	ejejf fddZ  ZS )BriaTransformerBlockrms_normư>r0   num_attention_headsattention_head_dimqk_normr   c                    s   t    t|| _t|| _t|||||ddt |d	| _tj	|ddd| _
t||dd| _tj	|ddd| _t||dd| _d S )NFT)	r~   r   r   rh   r   r   r   r   r   r   r   r   zgelu-approximate)r0   dim_outactivation_fn)r   r_   r   norm1norm1_contextr   rW   r   r   	LayerNormnorm2r   ffnorm2_context
ff_context)r^   r0   r   r   r   r   r   r+   r,   r_   o  s$   


zBriaTransformerBlock.__init__Nr#   r$   tembra   attention_kwargsrb   c                 C   s|  | j ||d\}}}}	}
| j||d\}}}}}|pi }| jd|||d|}t|dkr3|\}}nt|dkr>|\}}}|d| }|| }| |}|d|	d d d f   |d d d f  }| |}|
d| }|| }t|dkr||| }|d| }|| }| |}|d|d d d f   |d d d f  }| |}||d|  }|j	t
jkr|dd}||fS )	Nemb)r#   r$   ra   r   r   r=       r+   )r   r   r   r   	unsqueezer   r   r   r   r;   r@   float16clip)r^   r#   r$   r   ra   r   norm_hidden_statesgate_msa	shift_mlp	scale_mlpgate_mlpnorm_encoder_hidden_states
c_gate_msac_shift_mlpc_scale_mlp
c_gate_mlpattention_outputsattn_outputcontext_attn_outputip_attn_output	ff_outputcontext_ff_outputr+   r+   r,   r     sD   


(

(
zBriaTransformerBlock.forward)r   r   NN)r\   rw   rx   r?   strrH   r_   r@   ry   tupledictr   r   r   r+   r+   r   r,   r   m  s8    r   c                       s   e Zd Zddedededef fddZ		dd	ejd
ejdejdeejejf dB de	e
ef dB deejejf fddZ  ZS )BriaSingleTransformerBlock      @r0   r   r   	mlp_ratioc              
      sx   t    t|| | _t|| _t|| j| _tj	dd| _
t|| j || _t }t||||d|ddd| _d S )Ntanh)approximateTr   )r~   r   rh   r   r   r   r   r   )r   r_   r?   mlp_hidden_dimr   normr   r   proj_mlpGELUact_mlpproj_outrW   r   r   )r^   r0   r   r   r  r   r   r+   r,   r_     s"   

z#BriaSingleTransformerBlock.__init__Nr#   r$   r   ra   r   rb   c                 C   s   |j d }tj||gdd}|}| j||d\}}	| | |}
|p$i }| jd||d|}tj||
gdd}|	d}	|	| | }|| }|j	tj
krU|dd}|d d d |f |d d |d f }}||fS )	Nr=   r/   r   )r#   ra   r   r   r   r+   )rr   r@   rJ   r  r  r  r   r   r  r;   r   r   )r^   r#   r$   r   ra   r   text_seq_lenresidualr   gatemlp_hidden_statesr  r+   r+   r,   r     s(   

*z"BriaSingleTransformerBlock.forward)r  r  )r\   rw   rx   r?   rH   r_   r@   ry   r  r  r  r   r   r   r+   r+   r   r,   r    s"     r  c                       s   e Zd ZdZdZeddddddd	d
dg dddfdedededededededededee f fddZ	e
d	
	
	
	
	
	
	
		
	
d&dejdejdejdejdejd ejd!ejdeeef d
B d"ed#eej eB fd$d%Z  ZS )'BriaTransformer2DModela  
    The Transformer model introduced in Flux. Based on FluxPipeline with several changes:
    - no pooled embeddings
    - We use zero padding for prompts
    - No guidance embedding since this is not a distilled version
    Reference: https://blackforestlabs.ai/announcing-black-forest-labs/

    Parameters:
        patch_size (`int`): Patch size to turn the input data into small patches.
        in_channels (`int`, *optional*, defaults to 16): The number of channels in the input.
        num_layers (`int`, *optional*, defaults to 18): The number of layers of MMDiT blocks to use.
        num_single_layers (`int`, *optional*, defaults to 18): The number of layers of single DiT blocks to use.
        attention_head_dim (`int`, *optional*, defaults to 64): The number of channels in each head.
        num_attention_heads (`int`, *optional*, defaults to 18): The number of heads to use for multi-head attention.
        joint_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use.
        pooled_projection_dim (`int`): Number of dimensions to use when projecting the `pooled_projections`.
        guidance_embeds (`bool`, defaults to False): Whether to use guidance embeddings.
    Tr=   r{      &         i   NF)   8   r$  r   
patch_sizer   
num_layersnum_single_layersr   r   joint_attention_dimpooled_projection_dimguidance_embedsaxes_dims_ropec                    s   t    | _ jj jj  _t||
d _t	 j|d _
|	r)t	 jd _t jj j _tj jj j _t fddt jjD  _t fddt jjD  _t j jddd	 _tj j||  j d
d _d _d S )N)r:   r   )r   r   )r   c                    $   g | ]}t  j jj jjd qS )r0   r   r   )r   r   configr   r   r   r   r]   r+   r,   r   .      z3BriaTransformer2DModel.__init__.<locals>.<listcomp>c                    r,  r-  )r  r   r.  r   r   r/  r]   r+   r,   r   9  r0  Fr   r   Tr   )r   r_   out_channelsr.  r   r   r   r   	pos_embedr   
time_embedguidance_embedr   r   r(  context_embedderr@   r   
x_embedderr   r   r&  transformer_blocksr'  single_transformer_blocksr   norm_outr  gradient_checkpointing)r^   r%  r   r&  r'  r   r   r(  r)  r*  r+  
rope_thetar   r   r]   r,   r_     s,   





zBriaTransformer2DModel.__init__r   r#   r$   pooled_projectionsr   img_idstxt_idsguidancereturn_dictrb   c              	   C   s  |  |}||j}|dur||j}nd}| j||jd}|r,|| j||jd7 }| |}t|jdkr<|d }t|jdkrG|d }tj	||fdd}| 
|}t| jD ]>\}}t rr| jrr| ||||||\}}n
|||||d\}}|
durt| jt|
 }tt|}||
||   }qZt| jD ]V\}}t r| jr| ||||||\}}n
|||||d\}}|durt| jt| }tt|}|dd|jd ddf |||   |dd|jd ddf< q| ||}| |}|	s|fS t|d	S )
a_  
        The [`BriaTransformer2DModel`] forward method.

        Args:
            hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`):
                Input `hidden_states`.
            encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`):
                Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
            pooled_projections (`torch.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected
                from the embeddings of input conditions.
            timestep ( `torch.LongTensor`):
                Used to indicate denoising step.
            block_controlnet_hidden_states: (`list` of `torch.Tensor`):
                A list of tensors that if specified are added to the residuals of transformer blocks.
            attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
                `self.processor` in
                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
                tuple.

        Returns:
            If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
            `tuple` where the first element is the sample tensor.
        Nr   r   r   r/   )r#   r$   r   ra   r=   .)sample)r6  rp   r;   r3  r4  r5  r   rr   r@   rJ   r2  	enumerater7  is_grad_enabledr:  _gradient_checkpointing_funcr?   rB   ceilr8  r9  r  r   )r^   r#   r$   r<  r   r=  r>  r?  r   r@  controlnet_block_samplescontrolnet_single_block_samplesr   r   ra   index_blockblockinterval_controloutputr+   r+   r,   r   H  s   
)










zBriaTransformer2DModel.forward)
NNNNNNNTNN)r\   rw   rx   __doc__ _supports_gradient_checkpointingr   r?   r   r   r_   r   r@   ry   
LongTensorr  r  r   r  r   r   r   r+   r+   r   r,   r    s    	
7	
r  r   )r   r   )>r   typingr   numpyrB   r@   torch.nnr   torch.nn.functional
functionalrY   configuration_utilsr   r   loadersr   r   utilsr   r	   utils.torch_utilsr
   	attentionr   r   attention_dispatchr   cache_utilsr   
embeddingsr   r   r   modeling_outputsr   modeling_utilsr   normalizationr   r   r   
get_loggerr\   r   r-   r5   r7   r   r?   rC   rH   rV   rW   Moduler   r   r   r   r   r   r  r  r+   r+   r+   r,   <module>   sZ    

	
BCOT7