o
    GiD(                 7   @   s  d dl mZ d dlZd dlZd dlm  mZ d dlmZ ddl	m
Z
mZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZmZmZmZmZmZ ddlm Z  ddl!m"Z" e#e$Z%																	dqde&de'de'de'de'de(de)de&de'de'dB de'dB de'dB de'dB d e(d!e(d"e(d#e(d$e&d%e&d&e(d'e)d(e&dB d)e'dB d*e&dB d+e)f2d,d-Z*														drd.e&de'de'de)de&de'd/e)de'de'dB de'dB d e(d!e(d0e(d#e(d$e&d%e&d&e(d(e&dB d)e'dB d+e)f(d1d2Z+																	dsd3e&de'de'de'd4e'de'd5e(de)de&d6e'dB de'de'dB de'dB de'dB d e(d!e(d"e(d#e(d$e&d%e&d&e(d'e)d(e&dB d)e'dB d7e&dB d+e)d8ej,f6d9d:Z-G d;d< d<ej,Z.G d=d> d>ej,Z/G d?d@ d@ej,Z0G dAdB dBej,Z1G dCdD dDej,Z2G dEdF dFej,Z3G dGdH dHej,Z4G dIdJ dJej,Z5G dKdL dLej,Z6G dMdN dNej,Z7G dOdP dPej,Z8G dQdR dRej,Z9G dSdT dTej,Z:G dUdV dVej,Z;G dWdX dXej,Z<G dYdZ dZej,Z=G d[d\ d\ej,Z>G d]d^ d^ej,Z?G d_d` d`ej,Z@G dadb dbej,ZAG dcdd ddej,ZBG dedf dfej,ZCG dgdh dhej,ZDG didj djej,ZEG dkdl dlej,ZFG dmdn dnej,ZGG dodp dpej,ZHdS )t    )AnyN)nn   )	deprecatelogging)apply_freeu   )get_activation)	AttentionAttnAddedKVProcessorAttnAddedKVProcessor2_0)AdaGroupNorm)Downsample2DFirDownsample2DFirUpsample2DKDownsample2DKUpsample2DResnetBlock2DResnetBlockCondNorm2D
Upsample2D)DualTransformer2DModel)Transformer2DModel   Fdefault      ?        down_block_type
num_layersin_channelsout_channelstemb_channelsadd_downsample
resnet_epsresnet_act_fntransformer_layers_per_blocknum_attention_headsresnet_groupscross_attention_dimdownsample_paddingdual_cross_attentionuse_linear_projectiononly_cross_attentionupcast_attentionresnet_time_scale_shiftattention_typeresnet_skip_time_actresnet_out_scale_factorcross_attention_normattention_head_dimdownsample_typedropoutc                 C   s  |d u rt d|	 d |	}| dr| dd  n| } | dkr/t|||||||||
||dS | dkrCt|||||||||
|||dS | d	krb|d
u rNd }n|pQd}t||||||||
||||dS | dkr|d u rntdtd6i d|d|d|d|d|d|d|d|d|d|
d|d|d|	d|d|d|d|d |d!|S | d"kr|d u rtd#td6i d|d|d|d|d|d|d|d|d|
d|d$|d |d%|d&|d|d'|S | d(krt	||||||||||d)
S | d*krt
||||||||||d+
S | d,kr%t||||||||
||d-
S | d.kr9t||||||||
|||d/S | d0krJt||||||||d1S | d2kret|||||||||||sad3d4S d
d4S t|  d5)7NztIt is recommended to provide `attention_head_dim` when calling `get_down_block`. Defaulting `attention_head_dim` to .UNetRes   DownBlock2D)r   r   r   r    r4   r!   r"   r#   r&   r(   r-   ResnetDownsampleBlock2D)r   r   r   r    r4   r!   r"   r#   r&   r-   skip_time_actoutput_scale_factorAttnDownBlock2DFconv)r   r   r   r    r4   r"   r#   r&   r(   r2   r-   r3   CrossAttnDownBlock2Dz>cross_attention_dim must be specified for CrossAttnDownBlock2Dr   r$   r   r   r    r4   r!   r"   r#   r&   r(   r'   r%   r)   r*   r+   r,   r-   r.   SimpleCrossAttnDownBlock2DzDcross_attention_dim must be specified for SimpleCrossAttnDownBlock2Dr2   r:   r;   r1   SkipDownBlock2D)
r   r   r   r    r4   r!   r"   r#   r(   r-   AttnSkipDownBlock2D)
r   r   r   r    r4   r!   r"   r#   r2   r-   DownEncoderBlock2D)
r   r   r   r4   r!   r"   r#   r&   r(   r-   AttnDownEncoderBlock2D)r   r   r   r4   r!   r"   r#   r&   r(   r2   r-   KDownBlock2D)r   r   r   r    r4   r!   r"   r#   KCrossAttnDownBlock2DT)r   r   r   r    r4   r!   r"   r#   r'   r2   add_self_attention does not exist. )loggerwarning
startswithr8   r9   r<   
ValueErrorr>   r?   r@   rA   rB   rC   rD   rE   )r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   rH   rH   Y/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/models/unets/unet_2d_blocks.pyget_down_block+   s  
	
	






rN   mid_block_typer;   mid_block_only_cross_attentionc                 C   s   | dkrt |||||||||	|||
|||dS | dkr,t|||||||	||||||dS | dkr>t|||d|||||dd
S | d u rDd S td	|  )
NUNetMidBlock2DCrossAttn)r$   r   r    r4   r"   r#   r;   r-   r'   r%   r&   r)   r*   r,   r.   UNetMidBlock2DSimpleCrossAttn)r   r    r4   r"   r#   r;   r'   r2   r&   r-   r:   r+   r1   UNetMidBlock2Dr   F)
r   r    r4   r   r"   r#   r;   r&   r-   add_attentionzunknown mid_block_type : )rQ   rR   rS   rL   )rO   r    r   r"   r#   r&   r;   r$   r%   r'   r)   r*   rP   r,   r-   r.   r/   r1   r2   r4   rH   rH   rM   get_mid_block   sd   rU   up_block_typeprev_output_channeladd_upsampleresolution_idxupsample_typereturnc                 C   s  |d u rt d| d |}| dr| dd  n| } | dkr0t||||||	||||||dS | dkrFt||||||	||||||||dS | d	kr|d u rRtd
td6i d|d|
d|d|d|d|d|	d|d|d|d|d|d|d|d|d|d|d|d|d|S | dkr|d u rtd td6i d|d|d|d|d|d|	d|d|d|d|d|d|d!|d|d"|d#|d|d$|S | d%kr|d&u rd }n|pd'}t||||||	|||||||d(S | d)krt	||||||	|||||d*S | d+kr#t
||||||	||||||d,S | d-kr7t||||	|||||||d.S | d/krLt||||	||||||||d0S | d1kr^t|||||	||||d2	S | d3krrt|||||	||||||d4S t|  d5)7NzrIt is recommended to provide `attention_head_dim` when calling `get_up_block`. Defaulting `attention_head_dim` to r5   r6   r7   	UpBlock2D)r   r   r   rW   r    rY   r4   rX   r"   r#   r&   r-   ResnetUpsampleBlock2D)r   r   r   rW   r    rY   r4   rX   r"   r#   r&   r-   r:   r;   CrossAttnUpBlock2Dz<cross_attention_dim must be specified for CrossAttnUpBlock2Dr   r$   r   r   rW   r    rY   r4   rX   r"   r#   r&   r'   r%   r)   r*   r+   r,   r-   r.   SimpleCrossAttnUpBlock2DzBcross_attention_dim must be specified for SimpleCrossAttnUpBlock2Dr2   r:   r;   r1   AttnUpBlock2DFr=   )r   r   r   rW   r    rY   r4   r"   r#   r&   r2   r-   rZ   SkipUpBlock2D)r   r   r   rW   r    rY   r4   rX   r"   r#   r-   AttnSkipUpBlock2D)r   r   r   rW   r    rY   r4   rX   r"   r#   r2   r-   UpDecoderBlock2D)r   r   r   rY   r4   rX   r"   r#   r&   r-   r    AttnUpDecoderBlock2D)r   r   r   rY   r4   rX   r"   r#   r&   r2   r-   r    
KUpBlock2D)	r   r   r   r    rY   r4   rX   r"   r#   KCrossAttnUpBlock2D)r   r   r   r    rY   r4   rX   r"   r#   r'   r2   rG   rH   )rI   rJ   rK   r\   r]   rL   r^   r_   r`   ra   rb   rc   rd   re   rf   )rV   r   r   r   rW   r    rX   r"   r#   rY   r$   r%   r&   r'   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   rZ   r4   rH   rH   rM   get_up_blockG  s  
	
	






rg   c                       sD   e Zd ZdZdededef fddZdejdejfd	d
Z	  Z
S )AutoencoderTinyBlocka*  
    Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU
    blocks.

    Args:
        in_channels (`int`): The number of input channels.
        out_channels (`int`): The number of output channels.
        act_fn (`str`):
            ` The activation function to use. Supported values are `"swish"`, `"mish"`, `"gelu"`, and `"relu"`.

    Returns:
        `torch.Tensor`: A tensor with the same shape as the input tensor, but with the number of channels equal to
        `out_channels`.
    r   r   act_fnc                    s   t    t|}ttj||ddd|tj||ddd|tj||ddd| _||kr5tj||dddnt | _t	 | _
d S )Nr   r   )kernel_sizepaddingF)rj   bias)super__init__r	   r   
SequentialConv2dr=   IdentityskipReLUfuse)selfr   r   ri   	__class__rH   rM   rn   8  s   
	zAutoencoderTinyBlock.__init__xr[   c                 C   s   |  | || | S N)rt   r=   rr   )ru   rx   rH   rH   rM   forwardI  s   zAutoencoderTinyBlock.forward)__name__
__module____qualname____doc__intstrrn   torchTensorrz   __classcell__rH   rH   rv   rM   rh   (  s    rh   c                       s   e Zd ZdZ													
ddedededededededededB dedededef fddZd de	j
de	j
dB de	j
fddZ  ZS )!rS   a7  
    A 2D UNet mid-block [`UNetMidBlock2D`] with multiple residual blocks and optional attention blocks.

    Args:
        in_channels (`int`): The number of input channels.
        temb_channels (`int`): The number of temporal embedding channels.
        dropout (`float`, *optional*, defaults to 0.0): The dropout rate.
        num_layers (`int`, *optional*, defaults to 1): The number of residual blocks.
        resnet_eps (`float`, *optional*, 1e-6 ): The epsilon value for the resnet blocks.
        resnet_time_scale_shift (`str`, *optional*, defaults to `default`):
            The type of normalization to apply to the time embeddings. This can help to improve the performance of the
            model on tasks with long-range temporal dependencies.
        resnet_act_fn (`str`, *optional*, defaults to `swish`): The activation function for the resnet blocks.
        resnet_groups (`int`, *optional*, defaults to 32):
            The number of groups to use in the group normalization layers of the resnet blocks.
        attn_groups (`int | None`, *optional*, defaults to None): The number of groups for the attention blocks.
        resnet_pre_norm (`bool`, *optional*, defaults to `True`):
            Whether to use pre-normalization for the resnet blocks.
        add_attention (`bool`, *optional*, defaults to `True`): Whether to add attention blocks.
        attention_head_dim (`int`, *optional*, defaults to 1):
            Dimension of a single attention head. The number of attention heads is determined based on this value and
            the number of input channels.
        output_scale_factor (`float`, *optional*, defaults to 1.0): The output scale factor.

    Returns:
        `torch.Tensor`: The output of the last residual block, which is a tensor of shape `(batch_size, in_channels,
        height, width)`.

    r   r   ư>r   swish    NTr   r   r    r4   r   r"   r-   r#   r&   attn_groupsresnet_pre_normrT   r2   r;   c                    sj  t    |d ur|nt|d d}|| _|	d u r!|dkr|nd }	|dkr4t||||||d||d	g}nt||||||||||
d
g}g }|d u rTtd| d |}t|D ]K}| jrx|	t
||| ||||	|dkrn|nd d	d	d	d	d
 n|	d  |dkr|	t||||||d||d	 qX|	t||||||||||
d
 qXt|| _t|| _d| _d S )N   r   r   spatial	r   r   r    epsgroupsr4   time_embedding_normnon_linearityr;   
r   r   r    r   r   r4   r   r   r;   pre_normiIt is not recommend to pass `attention_head_dim=None`. Defaulting `attention_head_dim` to `in_channels`: r5   T
headsdim_headrescale_output_factorr   norm_num_groupsspatial_norm_dimresidual_connectionrl   upcast_softmax_from_deprecated_attn_blockF)rm   rn   minrT   r   r   rI   rJ   rangeappendr
   r   
ModuleList
attentionsresnetsgradient_checkpointing)ru   r   r    r4   r   r"   r-   r#   r&   r   r   rT   r2   r;   r   r   _rv   rH   rM   rn   l  s   



zUNetMidBlock2D.__init__hidden_statestembr[   c                 C   s   | j d ||}t| j| j dd  D ],\}}t r0| jr0|d ur(|||d}| |||}q|d ur:|||d}|||}q|S )Nr   r   r   )r   zipr   r   is_grad_enabledr   _gradient_checkpointing_func)ru   r   r   attnresnetrH   rH   rM   rz     s   zUNetMidBlock2D.forward)r   r   r   r   r   r   NTTr   r   ry   )r{   r|   r}   r~   r   floatr   boolrn   r   r   rz   r   rH   rH   rv   rM   rS   M  sR    "	
*trS   c                '       s   e Zd Z														
				d*dedededB dededeee B dedededededB dedededededededef& fdd Z					d+d!e	j
d"e	j
dB d#e	j
dB d$e	j
dB d%eeef dB d&e	j
dB d'e	j
fd(d)Z  ZS ),rQ   Nr   r   r   r   r   r   Tr      Fr   r    r   r4   r   r$   r"   r-   r#   r&   resnet_groups_outr   r%   r;   r'   r)   r*   r,   r.   c                    s"  t    |p|}|| _|| _d| _|| _|
d ur|
nt|d d}
t|tr,|g| }|p/|
}t	|||||
||||	||dg}g }t
|D ]9}|s_|t||| ||| |||||d	 n|t||| |d||
d |t	||||||||	||d
 qFt|| _t|| _d	| _d S )
NTr   r   r   r   r    r   r   
groups_outr4   r   r   r;   r   )r   r   r'   r   r*   r,   r.   r   r   r   r'   r   r   F)rm   rn   r   r   has_cross_attentionr%   r   
isinstancer   r   r   r   r   r   r   r   r   r   r   )ru   r   r    r   r4   r   r$   r"   r-   r#   r&   r   r   r%   r;   r'   r)   r*   r,   r.   r   r   irv   rH   rM   rn     s   




z UNetMidBlock2DCrossAttn.__init__r   r   encoder_hidden_statesattention_maskcross_attention_kwargsencoder_attention_maskr[   c           	   	   C   s   |d ur| dd d urtd | jd ||}t| j| jdd  D ]0\}}t rC| jrC||||||ddd }| 	|||}q$||||||ddd }|||}q$|S )NscaleSPassing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.r   r   Fr   r   r   r   return_dict)
getrI   rJ   r   r   r   r   r   r   r   )	ru   r   r   r   r   r   r   r   r   rH   rH   rM   rz   V  s:   	
zUNetMidBlock2DCrossAttn.forward)Nr   r   r   r   r   r   r   NTr   r   r   FFFr   NNNNNr{   r|   r}   r   r   tupler   r   rn   r   r   dictr   rz   r   rH   rH   rv   rM   rQ     s    
	
irQ   c                        s   e Zd Z												
	
	d&dedededededededededededededededB f fddZ					d'dej	dej	dB dej	dB d ej	dB d!e
eef dB d"ej	dB d#ej	fd$d%Z  ZS )(rR   r   r   r   r   r   r   Tr   r   FNr   r    r4   r   r"   r-   r#   r&   r   r2   r;   r'   r:   r+   r1   c                    s   t    d| _|
| _|d ur|nt|d d}|| j | _t||||||||||	|dg}g }t|D ]3}tt	dr>t
 nt }|t||| j| j||dd|||d |t||||||||||	|d q4t|| _t|| _d S )NTr   r   r   r   r    r   r   r4   r   r   r;   r   r:   scaled_dot_product_attention	query_dimr'   r   r   added_kv_proj_dimr   rl   r   r+   r1   	processor)rm   rn   r   r2   r   	num_headsr   r   hasattrFr   r   r   r
   r   r   r   r   )ru   r   r    r4   r   r"   r-   r#   r&   r   r2   r;   r'   r:   r+   r1   r   r   r   r   rv   rH   rM   rn   ~  sn   
z&UNetMidBlock2DSimpleCrossAttn.__init__r   r   r   r   r   r   r[   c           
      C   s   |d ur|ni }| dd d urtd |d u r"|d u rd n|}n|}| jd ||}t| j| jdd  D ]\}}	||f||d|}|	||}q7|S )Nr   r   r   r   r   r   )r   rI   rJ   r   r   r   )
ru   r   r   r   r   r   r   maskr   r   rH   rH   rM   rz     s$   	
z%UNetMidBlock2DSimpleCrossAttn.forward)r   r   r   r   r   r   Tr   r   r   FFNr   )r{   r|   r}   r   r   r   r   rn   r   r   r   r   rz   r   rH   rH   rv   rM   rR   }  s    	
WrR   c                       s   e Zd Z												d#d
edededededededededededededef fddZ			d$dej	dej	dB dedB de
eef dB deej	eej	d f f f
d!d"Z  ZS )%r<   r   r   r   r   r   r   Tr   r=   r   r   r    r4   r   r"   r-   r#   r&   r   r2   r;   r(   r3   c                    s  t    g }g }|| _|d u rtd| d |}t|D ].}|dkr'|n|}|t|||||	|||||
d
 |t||| ||||	ddddd
 qt	
|| _t	
|| _|dkrmt	
t|d||dd	g| _n|d
krt	
t|||||	|||||
ddg| _nd | _d| _d S )Nr   r5   r   r   T	r   r   r   r   r   r   rl   r   r   r=   opuse_convr   rk   namer   )r   r   r    r   r   r4   r   r   r;   r   downF)rm   rn   r3   rI   rJ   r   r   r   r
   r   r   r   r   r   downsamplersr   )ru   r   r   r    r4   r   r"   r-   r#   r&   r   r2   r;   r(   r3   r   r   r   rv   rH   rM   rn     s   



zAttnDownBlock2D.__init__Nr   r   upsample_sizer   r[   .c           	      C   s   |d ur|ni }| dd d urtd d}t| j| jD ]2\}}t r>| jr>| 	|||}||fi |}||f }q|||}||fi |}||f }q| j
d urq| j
D ]}| jdkrg|||d}qY||}qY||f7 }||fS )Nr   r   rH   r   r   )r   rI   rJ   r   r   r   r   r   r   r   r   r3   )	ru   r   r   r   r   output_statesr   r   downsamplerrH   rH   rM   rz   X  s&   






zAttnDownBlock2D.forward)r   r   r   r   r   r   Tr   r   r   r=   )NNNr{   r|   r}   r   r   r   r   rn   r   r   r   r   r   rz   r   rH   rH   rv   rM   r<     sn    	
`r<   c                +       s  e Zd Z															
	
	
	
	d.dedededededeee B dededededededededededededededef* fd d!Z	"	"	"	"	"	"d/d#e	j
d$e	j
d"B d%e	j
d"B d&e	j
d"B d'eeef d"B d(e	j
d"B d)e	j
d"B d*ee	j
ee	j
d+f f fd,d-Z  ZS )0r>   r   r   r   r   r   r   Tr   r   Fr   r   r    r4   r   r$   r"   r-   r#   r&   r   r%   r'   r;   r(   r!   r)   r*   r+   r,   r.   c                    s
  t    g }g }d| _|| _t|tr|g| }t|D ]B}|dkr%|n|}|t|||||
|||	||d
 |sP|t	||| ||| ||
||||d
 q|t
||| |d||
d qt|| _t|| _|r}tt|d||ddg| _nd | _d	| _d S )
NTr   r   r   r   r'   r   r*   r+   r,   r.   r   r   r   r   F)rm   rn   r   r%   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )ru   r   r   r    r4   r   r$   r"   r-   r#   r&   r   r%   r'   r;   r(   r!   r)   r*   r+   r,   r.   r   r   r   rv   rH   rM   rn   |  sv   





zCrossAttnDownBlock2D.__init__Nr   r   r   r   r   r   additional_residualsr[   .c              	   C   s   |d ur| dd d urtd d}tt| j| j}	t|	D ]G\}
\}}t	 rA| j
rA| |||}||||||ddd }n|||}||||||ddd }|
t|	d krb|d urb|| }||f }q | jd ur|| jD ]}||}qp||f }||fS )Nr   r   rH   Fr   r   r   )r   rI   rJ   listr   r   r   	enumerater   r   r   r   lenr   )ru   r   r   r   r   r   r   r   r   blocksr   r   r   r   rH   rH   rM   rz     sJ   


	




zCrossAttnDownBlock2D.forward)r   r   r   r   r   r   r   Tr   r   r   r   TFFFFr   NNNNNNr   rH   rH   rv   rM   r>   {  s    
	
^	r>   c                       s   e Zd Z										dd	ed
edededededededededededef fddZ	d dej	dej	dB de
ej	e
ej	df f fddZ  ZS )!r8   r   r   r   r   r   r   Tr   r   r   r    r4   r   r"   r-   r#   r&   r   r;   r!   r(   c                    s   t    g }t|D ]}|dkr|n|}|t|||||	|||||
d
 qt|| _|r>tt|d||ddg| _	nd | _	d| _
d S )Nr   r   Tr   r   F)rm   rn   r   r   r   r   r   r   r   r   r   )ru   r   r   r    r4   r   r"   r-   r#   r&   r   r;   r!   r(   r   r   rv   rH   rM   rn     s8   


zDownBlock2D.__init__Nr   r   r[   .c           	      O   s   t |dks|dd d urd}tdd| d}| jD ]}t r,| jr,| |||}n|||}||f }q| jd urK| jD ]}||}q?||f }||fS Nr   r   The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.1.0.0rH   	r   r   r   r   r   r   r   r   r   	ru   r   r   argskwargsdeprecation_messager   r   r   rH   rH   rM   rz   B  s   





zDownBlock2D.forward
r   r   r   r   r   r   Tr   Tr   ry   r{   r|   r}   r   r   r   r   rn   r   r   r   rz   r   rH   rH   rv   rM   r8     s\    	
4r8   c                       sz   e Zd Z										dd	ed
ededededededededededef fddZdej	dej	fddZ
  ZS )rB   r   r   r   r   r   r   Tr   r   r   r4   r   r"   r-   r#   r&   r   r;   r!   r(   c                    s   t    g }t|D ]0}|dkr|n|}|dkr*|t||d |||d||
d	 q|t||d ||||||
|	d
 qt|| _|rTtt	|d||ddg| _
d S d | _
d S )Nr   r   r   r   Tr   r   )rm   rn   r   r   r   r   r   r   r   r   r   )ru   r   r   r4   r   r"   r-   r#   r&   r   r;   r!   r(   r   r   rv   rH   rM   rn   ]  sR   



zDownEncoderBlock2D.__init__r   r[   c                 O   sf   t |dks|dd d urd}tdd| | jD ]}||d d}q| jd ur1| jD ]}||}q*|S Nr   r   r   r   r   )r   r   r   r   r   )ru   r   r   r   r   r   r   rH   rH   rM   rz     s   



zDownEncoderBlock2D.forwardr   r{   r|   r}   r   r   r   r   rn   r   r   rz   r   rH   rH   rv   rM   rB   \  sJ    	
?rB   c                       s   e Zd Z											dd	ed
edededededededededededef fddZdej	dej	fddZ
  ZS )rC   r   r   r   r   r   r   Tr   r   r   r4   r   r"   r-   r#   r&   r   r2   r;   r!   r(   c                    s  t    g }g }|
d u rtd| d |}
t|D ]C}|dkr$|n|}|dkr;|t||d |||d||d	 n|t||d |||||||	d
 |t|||
 |
|||ddddd
 qt	
|| _t	
|| _|r~t	
t|d||d	d
g| _d S d | _d S )Nr   r5   r   r   r   r   Tr   r   r   )rm   rn   rI   rJ   r   r   r   r   r
   r   r   r   r   r   r   )ru   r   r   r4   r   r"   r-   r#   r&   r   r2   r;   r!   r(   r   r   r   rv   rH   rM   rn     s|   




zAttnDownEncoderBlock2D.__init__r   r[   c                 O   sz   t |dks|dd d urd}tdd| t| j| jD ]\}}||d d}||}q| jd ur;| jD ]}||}q4|S r   )r   r   r   r   r   r   r   )ru   r   r   r   r   r   r   r   rH   rH   rM   rz     s   



zAttnDownEncoderBlock2D.forward)r   r   r   r   r   r   Tr   r   Tr   r   rH   rH   rv   rM   rC     sP    	
VrC   c                       s   e Zd Zdddddddeddf	ded	ed
edededededededededef fddZ			dde
jde
jdB de
jdB dee
jee
jdf e
jf fddZ  ZS )rA   r   r   r   r   r   T       @r   r   r    r4   r   r"   r-   r#   r   r2   r;   r!   c                    s6  t    tg | _tg | _|
d u r td| d |}
t|D ];}|dkr,|n|}| j	t
||||t|d dt|d d|||||	d | j	t|||
 |
||dddddd
 q$|rt
||||t|d d|||||	ddd	d
| _tt||dg| _tjd|ddd| _d S d | _d | _d | _d S )Nr   r5   r   r   r   r   Tr   firr   r   r    r   r   r4   r   r   r;   r   use_in_shortcutr   kernelr   r   r   r   rj   stride)rm   rn   r   r   r   r   rI   rJ   r   r   r   r   r
   resnet_downr   r   rp   	skip_conv)ru   r   r   r    r4   r   r"   r-   r#   r   r2   r;   r!   r   rv   rH   rM   rn     sx   


zAttnSkipDownBlock2D.__init__Nr   r   skip_sampler[   .c                 O   s   t |dks|dd d urd}tdd| d}t| j| jD ]\}}	|||}|	|}||f7 }q| jd urS| ||}| jD ]}
|
|}q@| || }||f7 }|||fS r   )	r   r   r   r   r   r   r   r   r   )ru   r   r   r   r   r   r   r   r   r   r   rH   rH   rM   rz   c  s   





zAttnSkipDownBlock2D.forwardNNr{   r|   r}   npsqrtr   r   r   r   rn   r   r   r   rz   r   rH   rH   rv   rM   rA     s\    	
SrA   c                       s   e Zd Zddddddedddf	ded	ed
edededededededededef fddZ			dde
jde
jdB de
jdB dee
jee
jdf e
jf fddZ  ZS )r@   r   r   r   r   r   Tr   r   r   r    r4   r   r"   r-   r#   r   r;   r!   r(   c                    s   t    tg | _t|D ]'}|dkr|n|}| jt||||t|d dt|d d||||
|	d q|rgt||||t|d d||||
|	dddd| _	tt
||dg| _tjd	|d
d
d| _d S d | _	d | _d | _d S )Nr   r   r   r   Tr   r   r   r   r   r   )rm   rn   r   r   r   r   r   r   r   r   r   r   rp   r   )ru   r   r   r    r4   r   r"   r-   r#   r   r;   r!   r(   r   rv   rH   rM   rn     sP   

zSkipDownBlock2D.__init__Nr   r   r   r[   .c           
      O   s   t |dks|dd d urd}tdd| d}| jD ]}|||}||f7 }q| jd urI| ||}| jD ]}	|	|}q6| || }||f7 }|||fS r   )r   r   r   r   r   r   r   )
ru   r   r   r   r   r   r   r   r   r   rH   rH   rM   rz     s   






zSkipDownBlock2D.forwardr   r   rH   rH   rv   rM   r@     s\    	
>r@   c                       s   e Zd Z											d d
ededededededededededededef fddZ	d!dej	dej	dB de
ej	e
ej	df f fddZ  ZS )"r9   r   r   r   r   r   r   Tr   Fr   r   r    r4   r   r"   r-   r#   r&   r   r;   r!   r:   c                    s   t    g }t|D ]}|dkr|n|}|t|||||	|||||
|d qt|| _|rFtt|||||	|||||
|ddg| _nd | _d| _	d S )Nr   r   Tr   r   r    r   r   r4   r   r   r;   r   r:   r   F)
rm   rn   r   r   r   r   r   r   r   r   )ru   r   r   r    r4   r   r"   r-   r#   r&   r   r;   r!   r:   r   r   rv   rH   rM   rn     sP   

z ResnetDownsampleBlock2D.__init__Nr   r   r[   .c           	      O   s   t |dks|dd d urd}tdd| d}| jD ]}t r,| jr,| |||}n|||}||f }q| jd urL| jD ]}|||}q?||f }||fS r   r   r   rH   rH   rM   rz     s   




zResnetDownsampleBlock2D.forward)
r   r   r   r   r   r   Tr   TFry   r   rH   rH   rv   rM   r9     s\    	
@r9   c                $       s   e Zd Z													
	
	d)dedededededededededededededededededB f" fddZ					d*dej	d ej	dB d!ej	dB d"ej	dB d#e
eef dB d$ej	dB d%eej	eej	d&f f fd'd(Z  ZS )+r?   r   r   r   r   r   r   Tr   r   FNr   r   r    r4   r   r"   r-   r#   r&   r   r2   r'   r;   r!   r:   r+   r1   c                    s   t    d| _g }g }|| _|| j | _t|D ]:}|dkr!|n|}|t|||||	|||||
|d tt	dr=t
 nt }|t||| j|||	dd|||d qt|| _t|| _|rxtt|||||	|||||
|ddg| _nd | _d| _d S )NTr   r   r   r   r   F)rm   rn   r   r2   r   r   r   r   r   r   r   r   r
   r   r   r   r   r   r   )ru   r   r   r    r4   r   r"   r-   r#   r&   r   r2   r'   r;   r!   r:   r+   r1   r   r   r   r   rv   rH   rM   rn   7  s|   

z#SimpleCrossAttnDownBlock2D.__init__r   r   r   r   r   r   r[   .c                 C   s   |d ur|ni }| dd d urtd d}|d u r$|d u r!d n|}n|}t| j| jD ]3\}	}
t rK| jrK| 	|	||}|
|f||d|}n|	||}|
|f||d|}||f }q-| j
d urv| j
D ]}|||}qi||f }||fS )Nr   r   rH   r   r   rI   rJ   r   r   r   r   r   r   r   r   )ru   r   r   r   r   r   r   r   r   r   r   r   rH   rH   rM   rz     sB   	




z"SimpleCrossAttnDownBlock2D.forward)r   r   r   r   r   r   Tr   r   r   TFFNr   r   rH   rH   rv   rM   r?   6  s    	
ar?   c                       s   e Zd Z						ddeded	ed
edededededef fddZ	ddej	dej	dB de
ej	e
ej	df f fddZ  ZS )rD   r   r   h㈵>gelur   Fr   r   r    r4   r   r"   r#   resnet_group_sizer!   c
                    s   t    g }
t|D ]#}|dkr|n|}|| }|| }|
t||||||||ddd
 qt|
| _|	r@tt g| _	nd | _	d| _
d S )Nr   	ada_groupF
r   r   r4   r    r   r   r   r   r   conv_shortcut_bias)rm   rn   r   r   r   r   r   r   r   r   r   )ru   r   r   r    r4   r   r"   r#   r   r!   r   r   r   r   rv   rH   rM   rn     s2   

zKDownBlock2D.__init__Nr   r   r[   .c           	      O   s   t |dks|dd d urd}tdd| d}| jD ]}t r,| jr,| |||}n|||}||f7 }q| jd urF| jD ]}||}q?||fS r   r   r   rH   rH   rM   rz     s   




zKDownBlock2D.forward)r   r   r   r   r   Fry   r   rH   rH   rv   rM   rD     sD    	
.rD   c                       s   e Zd Z								d"d	ed
ededededededededededef fddZ					d#dej	dej	dB dej	dB dej	dB de
eef dB dej	dB deej	eej	df f fd d!Z  ZS )$rE   r   r   r   T@   Fr   r   r   r   r    r'   r4   r   r   r!   r2   rF   r"   r#   c                    s   t    g }g }d| _t|D ]5}|dkr|n|}|| }|| }|t||||||||ddd
 |t|||	 |	||d|
d|d	 qt|| _	t|| _
|r]tt g| _nd | _d| _d S )NTr   r  Fr  
layer_norm)r'   r    attention_biasrF   r1   
group_size)rm   rn   r   r   r   r   KAttentionBlockr   r   r   r   r   r   r   )ru   r   r   r    r'   r4   r   r   r!   r2   rF   r"   r#   r   r   r   r   r   rv   rH   rM   rn     sR   

zKCrossAttnDownBlock2D.__init__Nr   r   r   r   r   r   r[   .c              	   C   s   |d ur|ni }| dd d urtd d}t| j| jD ];\}}	t r;| jr;| 	|||}|	||||||d}n|||}|	||||||d}| j
d u rT|d7 }q||f7 }q| j
d uri| j
D ]}
|
|}qb||fS )Nr   r   rH   r   embr   r   r   ry   r   )ru   r   r   r   r   r   r   r   r   r   r   rH   rH   rM   rz   T  sF   	

	
	



zKCrossAttnDownBlock2D.forward)r   r   r   Tr  Fr   r   r   )r{   r|   r}   r   r   r   r   rn   r   r   r   r   r   rz   r   rH   rH   rv   rM   rE     sl    	
CrE   c                       s   e Zd Z												
d$dededededededededededededededef fddZ		d%dej	de
ej	df dej	dB d edB d!ej	f
d"d#Z  ZS )&r`   Nr   r   r   r   r   r   Tr   r=   r   rW   r   r    rY   r4   r   r"   r-   r#   r&   r   r2   r;   rZ   c                    s6  t    g }g }|| _|d u rtd| d |}t|D ]:}||d kr)|n|}|dkr1|n|}|t|| ||||||	|
||d
 |t||| ||||ddddd
 qt	
|| _t	
|| _|dkrwt	
t|d|d	g| _n|d
krt	
t|||||||	|
||ddg| _nd | _d| _|| _d S )Nr   r5   r   r   r   Tr   r=   r   r   r   )r   r   r    r   r   r4   r   r   r;   r   upF)rm   rn   rZ   rI   rJ   r   r   r   r
   r   r   r   r   r   
upsamplersr   rY   )ru   r   rW   r   r    rY   r4   r   r"   r-   r#   r&   r   r2   r;   rZ   r   r   r   res_skip_channelsresnet_in_channelsrv   rH   rM   rn     s   


zAttnUpBlock2D.__init__r   res_hidden_states_tuple.r   r   r[   c                 O   s   t |dks|dd d urd}tdd| t| j| jD ]3\}}	|d }
|d d }tj||
gdd}t rG| j	rG| 
|||}|	|}q|||}|	|}q| jd url| jD ]}| jdkrg|||d	}qY||}qY|S )
Nr   r   r   r   r   dimr   r   )r   r   r   r   r   r   r   catr   r   r   r  rZ   )ru   r   r  r   r   r   r   r   r   r   res_hidden_states	upsamplerrH   rH   rM   rz     s$   	






zAttnUpBlock2D.forward)Nr   r   r   r   r   r   Tr   r   r=   r   r   rH   rH   rv   rM   r`     sp    	
`r`   c                -       s  e Zd Z													
						d0dedededededB dededeee B dedededededededededededed ed!ef, fd"d#Z						d1d$e	j
d%ee	j
d&f d'e	j
dB d(e	j
dB d)eeef dB d*edB d+e	j
dB d,e	j
dB d-e	j
fd.d/Z  ZS )2r^   Nr   r   r   r   r   r   Tr   r   Fr   r   rW   r    rY   r4   r   r$   r"   r-   r#   r&   r   r%   r'   r;   rX   r)   r*   r+   r,   r.   c                    s$  t    g }g }d| _|| _t|tr|g| }t|D ]N}||d kr'|n|}|dkr/|n|}|t|| |||	|||
|||d
 |s\|t	||| ||| ||||||d
 q|t
||| |d||d qt|| _t|| _|rtt|d|dg| _nd | _d| _|| _d S )	NTr   r   r   r   r   r  F)rm   rn   r   r%   r   r   r   r   r   r   r   r   r   r   r   r   r  r   rY   )ru   r   r   rW   r    rY   r4   r   r$   r"   r-   r#   r&   r   r%   r'   r;   rX   r)   r*   r+   r,   r.   r   r   r   r  r  rv   rH   rM   rn   	  sp   




zCrossAttnUpBlock2D.__init__r   r  .r   r   r   r   r   r   r[   c	              
   C   s4  |d ur| dd d urtd t| dd o(t| dd o(t| dd o(t| dd }	t| j| jD ]W\}
}|d }|d d }|	rRt| j||| j	| j
| j| jd\}}tj||gd	d
}t rv| jrv| |
||}||||||ddd }q0|
||}||||||ddd }q0| jd ur| jD ]}|||}q|S )Nr   r   s1s2b1b2r  r  r  r  r  r   r  Fr   r   )r   rI   rJ   getattrr   r   r   r   rY   r  r  r  r  r   r  r   r   r   r  )ru   r   r  r   r   r   r   r   r   is_freeu_enabledr   r   r  r  rH   rH   rM   rz   e	  sf   






	
	
zCrossAttnUpBlock2D.forward)Nr   r   r   r   r   r   r   Tr   r   r   TFFFFr   r   r   rH   rH   rv   rM   r^   
	  s    
	
^	
r^   c                       s   e Zd Z											d"d
edededededB dededededededededef fddZ		d#dej	de
ej	df dej	dB dedB dej	f
d d!Z  ZS )$r\   Nr   r   r   r   r   r   Tr   r   rW   r   r    rY   r4   r   r"   r-   r#   r&   r   r;   rX   c                    s   t    g }t|D ]'}||d kr|n|}|dkr|n|}|t|| ||||||	|
||d
 qt|| _|rHtt|d|dg| _	nd | _	d| _
|| _d S )Nr   r   r   Tr  F)rm   rn   r   r   r   r   r   r   r   r  r   rY   )ru   r   rW   r   r    rY   r4   r   r"   r-   r#   r&   r   r;   rX   r   r   r  r  rv   rH   rM   rn   	  s2   

zUpBlock2D.__init__r   r  .r   r   r[   c              
   O   s  t |dks|dd d urd}tdd| t| dd o-t| dd o-t| dd o-t| dd }| jD ]=}	|d	 }
|d d	 }|rQt| j||
| j| j| j	| j
d
\}}
tj||
gdd}t ri| jri| |	||}q1|	||}q1| jd ur| jD ]}|||}qw|S )Nr   r   r   r   r  r  r  r  r  r  r   r  )r   r   r   r  r   r   rY   r  r  r  r  r   r  r   r   r   r  )ru   r   r  r   r   r   r   r   r  r   r  r  rH   rH   rM   rz   	  s>   	







zUpBlock2D.forward)
Nr   r   r   r   r   r   Tr   Tr   r   rH   rH   rv   rM   r\   	  sj    	
5r\   c                       s   e Zd Z												dd
edededB dededededededededededB f fddZddej	dej	dB dej	fddZ
  ZS ) rc   Nr   r   r   r   r   r   Tr   r   r   rY   r4   r   r"   r-   r#   r&   r   r;   rX   r    c                    s   t    g }t|D ]0}|dkr|n|}|dkr*|t|||||	|d||d	 q|t|||||	|||||
d
 qt|| _|rQtt	|d|dg| _
nd | _
|| _d S )Nr   r   r   r   Tr  )rm   rn   r   r   r   r   r   r   r   r   r  rY   )ru   r   r   rY   r4   r   r"   r-   r#   r&   r   r;   rX   r    r   r   input_channelsrv   rH   rM   rn   
  sJ   

zUpDecoderBlock2D.__init__r   r   r[   c                 C   s:   | j D ]}|||d}q| jd ur| jD ]}||}q|S Nr   )r   r  )ru   r   r   r   r  rH   rH   rM   rz   M
  s   



zUpDecoderBlock2D.forward)Nr   r   r   r   r   r   Tr   TNry   r   rH   rH   rv   rM   rc   
  sP    	
*=rc   c                       s   e Zd Z													dd
edededB dedededededededededededB f fddZd dej	dej	dB dej	fddZ
  ZS )!rd   Nr   r   r   r   r   r   Tr   r   r   rY   r4   r   r"   r-   r#   r&   r   r2   r;   rX   r    c                    s   t    g }g }|d u rtd| d |}t|D ]P}|dkr$|n|}|dkr;|t|||||	|d||d	 n|t|||||	|||||
d
 |t||| ||||dkr\|	nd |dkrc|nd ddddd qt	
|| _t	
|| _|rt	
t|d|d	g| _nd | _|| _d S )
NjIt is not recommend to pass `attention_head_dim=None`. Defaulting `attention_head_dim` to `out_channels`: r5   r   r   r   r   Tr   r  )rm   rn   rI   rJ   r   r   r   r   r
   r   r   r   r   r   r  rY   )ru   r   r   rY   r4   r   r"   r-   r#   r&   r   r2   r;   rX   r    r   r   r   r  rv   rH   rM   rn   Y
  sv   


zAttnUpDecoderBlock2D.__init__r   r   r[   c                 C   sR   t | j| jD ]\}}|||d}|||d}q| jd ur'| jD ]}||}q |S r  )r   r   r   r  )ru   r   r   r   r   r  rH   rH   rM   rz   
  s   


zAttnUpDecoderBlock2D.forward)Nr   r   r   r   r   r   Tr   r   TNry   r   rH   rH   rv   rM   rd   X
  sV    	
*Vrd   c                       s   e Zd Zddddddddeddf
d	ed
ededededB dededededededededef fddZ			d de
jdee
jdf de
jdB dee
je
jf fddZ  ZS )!rb   Nr   r   r   r   r   Tr   r   rW   r   r    rY   r4   r   r"   r-   r#   r   r2   r;   rX   c                    s  t    tg | _tg | _t|D ]5}||d kr|n|}|dkr'|n|}| jt|| |||t	||d  dt	|d d||	|
||d q|d u rZt
d| d |}| jt||| |||dddddd	
 t||d
| _|rt||||t	|d dt	|d d||	|
||dddd| _tj|ddddd| _tjjt	|d d||dd| _t | _nd | _d | _d | _d | _|| _d S )Nr   r   r   r   r   r   r5   Tr   r   r   r   r   r    r   r   r   r4   r   r   r;   r   r   r  r   r   r   r   r   rj   r   rk   
num_groupsnum_channelsr   affine)rm   rn   r   r   r   r   r   r   r   r   rI   rJ   r
   r   r  	resnet_uprp   r   r   	GroupNorm	skip_normSiLUactrY   )ru   r   rW   r   r    rY   r4   r   r"   r-   r#   r   r2   r;   rX   r   r  r  rv   rH   rM   rn   
  s   


zAttnSkipUpBlock2D.__init__r   r  .r   r[   c                 O   s   t |dks|dd d urd}tdd| | jD ]}|d }	|d d }tj||	gdd}|||}q| jd |}|d urE| |}nd}| jd ure| 	|}
| 
|
}
| |
}
||
 }| ||}||fS Nr   r   r   r   r  r   r  )r   r   r   r   r   r  r   r  r(  r*  r,  r   ru   r   r  r   r   r   r   r   r   r  skip_sample_statesrH   rH   rM   rz     s&   	




zAttnSkipUpBlock2D.forwardr   r   rH   rH   rv   rM   rb   
  sf    	
arb   c                       s   e Zd Zdddddddedddf
d	ed
ededededB dededededededededef fddZ			d de
jdee
jdf de
jdB dee
je
jf fddZ  ZS )!ra   Nr   r   r   r   r   Tr   r   rW   r   r    rY   r4   r   r"   r-   r#   r   r;   rX   upsample_paddingc                    s:  t    tg | _t|D ]5}||d kr|n|}|dkr!|n|}| jt|| |||t|| d dt|d d||	|
||d qt	||d| _
|rt||||t|d dt|d d||	|
||dddd	| _tj|d
dddd| _tjjt|d d||dd| _t | _nd | _d | _d | _d | _|| _d S )Nr   r   r   r   r   r   Tr   r!  r   r"  r   r#  r$  )rm   rn   r   r   r   r   r   r   r   r   r  r(  rp   r   r   r)  r*  r+  r,  rY   )ru   r   rW   r   r    rY   r4   r   r"   r-   r#   r   r;   rX   r0  r   r  r  rv   rH   rM   rn   B  s`   

zSkipUpBlock2D.__init__r   r  .r   r[   c                 O   s   t |dks|dd d urd}tdd| | jD ]}|d }	|d d }tj||	gdd}|||}q|d ur>| |}nd}| jd ur^| |}
| 	|
}
| 
|
}
||
 }| ||}||fS r-  )r   r   r   r   r   r  r  r(  r*  r,  r   r.  rH   rH   rM   rz     s$   	




zSkipUpBlock2D.forwardr   r   rH   rH   rv   rM   ra   A  sf    	
Kra   c                       s   e Zd Z												
d$dedededededB dedededededededededef fddZ		d%dej	de
ej	df dej	dB d edB d!ej	f
d"d#Z  ZS )&r]   Nr   r   r   r   r   r   Tr   Fr   rW   r   r    rY   r4   r   r"   r-   r#   r&   r   r;   rX   r:   c                    s   t    g }t|D ](}||d kr|n|}|dkr|n|}|t|| ||||||	|
|||d qt|| _|rRtt|||||||	|
|||ddg| _nd | _d| _	|| _
d S )Nr   r   r   Tr   r   r    r   r   r4   r   r   r;   r   r:   r  F)rm   rn   r   r   r   r   r   r   r  r   rY   )ru   r   rW   r   r    rY   r4   r   r"   r-   r#   r&   r   r;   rX   r:   r   r   r  r  rv   rH   rM   rn     sT   

zResnetUpsampleBlock2D.__init__r   r  .r   r   r[   c                 O   s   t |dks|dd d urd}tdd| | jD ])}|d }	|d d }tj||	gdd}t r=| jr=| |||}q|||}q| j	d urS| j	D ]}
|
||}qK|S r-  )
r   r   r   r   r   r  r   r   r   r  )ru   r   r  r   r   r   r   r   r   r  r  rH   rH   rM   rz     s   	


zResnetUpsampleBlock2D.forward)Nr   r   r   r   r   r   Tr   TFr   r   rH   rH   rv   rM   r]     sp    	
Hr]   c                (       s  e Zd Z												
				d-dedededededB dededededededededededededededB f& fdd Z						d.d!ej	d"e
ej	d#f d$ej	dB d%ej	dB d&edB d'ej	dB d(eeef dB d)ej	dB d*ej	fd+d,Z  ZS )/r_   Nr   r   r   r   r   r   Tr   r   Fr   r   rW   r    rY   r4   r   r"   r-   r#   r&   r   r2   r'   r;   rX   r:   r+   r1   c                    s   t    g }g }d| _|| _|| j | _t|D ]G}||d kr#|n|}|dkr+|n|}|t|| ||||||	|
|||d tt	drIt
 nt }|t||| j| j||dd|||d qt|| _t|| _|rtt|||||||	|
|||ddg| _nd | _d| _|| _d S )	NTr   r   r   r   r   r1  F)rm   rn   r   r2   r   r   r   r   r   r   r   r   r
   r   r   r   r   r  r   rY   )ru   r   r   rW   r    rY   r4   r   r"   r-   r#   r&   r   r2   r'   r;   rX   r:   r+   r1   r   r   r   r  r  r   rv   rH   rM   rn     s   

z!SimpleCrossAttnUpBlock2D.__init__r   r  .r   r   r   r   r   r   r[   c	                 C   s   |d ur|ni }| dd d urtd |d u r"|d u rd n|}	n|}	t| j| jD ]A\}
}|d }|d d }tj||gdd}t r\| j	r\| 
|
||}||f||	d|}q+|
||}||f||	d|}q+| jd ur}| jD ]}|||}qu|S )Nr   r   r  r   r  r   )r   rI   rJ   r   r   r   r   r  r   r   r   r  )ru   r   r  r   r   r   r   r   r   r   r   r   r  r  rH   rH   rM   rz   v  sB   



z SimpleCrossAttnUpBlock2D.forward)Nr   r   r   r   r   r   Tr   r   r   TFFNr   r{   r|   r}   r   r   r   r   rn   r   r   r   r   r   rz   r   rH   rH   rv   rM   r_     s    	
f	
r_   c                       s   e Zd Z						ddeded	ed
ededededededB def fddZ		ddej	de
ej	df dej	dB dedB dej	f
ddZ  ZS )re   r      r   r   r   Tr   r   r    rY   r4   r   r"   r#   r   NrX   c                    s   t    g }d| }|}|d }t|D ]+}|dkr|n|}||	 }||	 }|t|||d kr3|n|||||||ddd
 qt|| _|
rRtt g| _	nd | _	d| _
|| _d S )Nr   r   r   r  F)
r   r   r    r   r   r   r4   r   r   r  )rm   rn   r   r   r   r   r   r   r   r  r   rY   )ru   r   r   r    rY   r4   r   r"   r#   r   rX   r   k_in_channelsk_out_channelsr   r   r   rv   rH   rM   rn     s:   

zKUpBlock2D.__init__r   r  .r   r   r[   c           
      O   s   t |dks|dd d urd}tdd| |d }|d ur'tj||gdd}| jD ]}t r;| jr;| |||}q*|||}q*| j	d urP| j	D ]}	|	|}qI|S r-  )
r   r   r   r   r  r   r   r   r   r  )
ru   r   r  r   r   r   r   r   r   r  rH   rH   rM   rz     s   	



zKUpBlock2D.forward)r   r3  r   r   r   Tr   r   rH   rH   rv   rM   re     sR    	
5re   c                       s   e Zd Z										d&d
ededededededededededededef fddZ						d'dej	de
ej	df dej	dB dej	dB deeef dB d edB d!ej	dB d"ej	dB d#ej	fd$d%Z  ZS )(rf   r   r   r   r   r   r      TFr   r   r    rY   r4   r   r"   r#   r   r2   r'   rX   r,   c                    s^  t    g }g }||  ko|kn  }||k}|rdnd}d| _|
| _|r)|nd| }|}|d }t|D ]U}|dkr?|n|}||	 }||	 }|rT||d krT|}nd }|t|||||||||ddd |t||d krs|n|||d kr~||
 n||
 |
||d|d|d		 q7t	|| _
t	|| _|rt	t g| _nd | _d| _|| _d S )
NTFr   r   r   r  )r   r   conv_2d_out_channelsr    r   r   r   r4   r   r   r  r  )r'   r    r  rF   r1   r,   )rm   rn   r   r2   r   r   r   r  r   r   r   r   r   r  r   rY   )ru   r   r   r    rY   r4   r   r"   r#   r   r2   r'   rX   r,   r   r   is_first_blockis_middle_blockrF   r4  r5  r   r   r   r7  rv   rH   rM   rn     sn   

zKCrossAttnUpBlock2D.__init__Nr   r  .r   r   r   r   r   r   r[   c	              	   C   s   |d }|d urt j||gdd}t| j| jD ],\}	}
t  r5| jr5| |	||}|
||||||d}q|	||}|
||||||d}q| jd urT| jD ]}||}qM|S )Nr  r   r  r	  )	r   r  r   r   r   r   r   r   r  )ru   r   r  r   r   r   r   r   r   r   r   r  rH   rH   rM   rz   V  s>   
	
	

zKCrossAttnUpBlock2D.forward)	r   r   r   r   r   r   r6  TFr   r2  rH   rH   rv   rM   rf      s|    	
Y	
rf   c                       s   e Zd ZdZ								d#deded	ed
ededB dedededededB def fddZde	j
dedede	j
fddZde	j
dedede	j
fddZ					d$de	j
de	j
dB de	j
dB de	j
dB deeef dB d e	j
dB de	j
fd!d"Z  ZS )%r  aN  
    A basic Transformer block.

    Parameters:
        dim (`int`): The number of channels in the input and output.
        num_attention_heads (`int`): The number of heads to use for multi-head attention.
        attention_head_dim (`int`): The number of channels in each head.
        dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
        cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
        attention_bias (`bool`, *optional*, defaults to `False`):
            Configure if the attention layers should contain a bias parameter.
        upcast_attention (`bool`, *optional*, defaults to `False`):
            Set to `True` to upcast the attention computation to `float32`.
        temb_channels (`int`, *optional*, defaults to 768):
            The number of channels in the token embedding.
        add_self_attention (`bool`, *optional*, defaults to `False`):
            Set to `True` to add self-attention to the block.
        cross_attention_norm (`str`, *optional*, defaults to `None`):
            The type of normalization to use for the cross attention. Can be `None`, `layer_norm`, or `group_norm`.
        group_size (`int`, *optional*, defaults to 32):
            The number of groups to separate the channels into for group normalization.
    r   NFr6  r   r  r%   r2   r4   r'   r  r,   r    rF   r1   r  c              
      sz   t    |	| _|	r"t||td|| | _t|||||d d d| _t||td|| | _t||||||||
d| _	d S )Nr   )r   r   r   r4   rl   r'   r1   )r   r'   r   r   r4   rl   r,   r1   )
rm   rn   rF   r   maxnorm1r
   attn1norm2attn2)ru   r  r%   r2   r4   r'   r  r,   r    rF   r1   r  rv   rH   rM   rn     s0   
zKAttentionBlock.__init__r   heightweightr[   c                 C   s$   | dddd|jd || dS )Nr   r   r   r   r  permutereshapeshaperu   r   r?  r@  rH   rH   rM   _to_3d  s   $zKAttentionBlock._to_3dc                 C   s    | ddd|jd d||S )Nr   r   r   r  rA  rE  rH   rH   rM   _to_4d  s    zKAttentionBlock._to_4dr   r
  r   r   r   c                 C   s   |d ur|ni }| dd d urtd | jrE| ||}|jdd  \}}	| |||	}| j|fd |d|}
| |
||	}
|
| }| 	||}|jdd  \}}	| |||	}| j
|f||d u rf|n|d|}
| |
||	}
|
| }|S )Nr   r   r   r   )r   rI   rJ   rF   r;  rD  rF  r<  rG  r=  r>  )ru   r   r   r
  r   r   r   norm_hidden_statesr?  r@  attn_outputrH   rH   rM   rz     s>   
zKAttentionBlock.forward)r   NFFr6  FNr   r   )r{   r|   r}   r~   r   r   r   r   rn   r   r   rF  rG  r   r   rz   r   rH   rH   rv   rM   r    sn    	
+	
r  )r   NNNNFFFFr   r   Fr   NNNr   )r   r   NNFFFFr   r   FNr   r   )Nr   NNNFFFFr   r   Fr   NNNr   )Itypingr   numpyr   r   torch.nn.functionalr   
functionalr   utilsr   r   utils.torch_utilsr   activationsr	   attention_processorr
   r   r   normalizationr   r   r   r   r   r   r   r   r   r    transformers.dual_transformer_2dr   transformers.transformer_2dr   
get_loggerr{   rI   r   r   r   r   rN   rU   Modulerg   rh   rS   rQ   rR   r<   r>   r8   rB   rC   rA   r@   r9   r?   rD   rE   r`   r^   r\   rc   rd   rb   ra   r]   r_   re   rf   r  rH   rH   rH   rM   <module>   s  (

	

 Y	

U	

 b% # }  NOgpZZ Fv  !eIc nd P 