o
    pi`@                 7   @   s  d dl mZmZmZmZmZ d dlZd dlZd dl	m
  mZ d dlm
Z
 ddlmZmZmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZmZm Z m!Z!m"Z"m#Z# ddl$m%Z% ddl&m'Z' e(e)Z*																	dqde+de,de,de,de,de-de.de+de,dee, dee, dee, dee, d e-d!e-d"e-d#e-d$e+d%e+d&e-d'e.d(ee+ d)ee, d*ee+ d+e.f2d,d-Z/														drd.e+de,de,de.de+de,d/e.de,dee, dee, d e-d!e-d0e-d#e-d$e+d%e+d&e-d(ee+ d)ee, d+e.f(d1d2Z0																	dsd3e+de,de,de,d4e,de,d5e-de.de+d6ee, de,dee, dee, dee, d e-d!e-d"e-d#e-d$e+d%e+d&e-d'e.d(ee+ d)ee, d7ee+ d+e.d8e
j1f6d9d:Z2G d;d< d<e
j1Z3G d=d> d>e
j1Z4G d?d@ d@e
j1Z5G dAdB dBe
j1Z6G dCdD dDe
j1Z7G dEdF dFe
j1Z8G dGdH dHe
j1Z9G dIdJ dJe
j1Z:G dKdL dLe
j1Z;G dMdN dNe
j1Z<G dOdP dPe
j1Z=G dQdR dRe
j1Z>G dSdT dTe
j1Z?G dUdV dVe
j1Z@G dWdX dXe
j1ZAG dYdZ dZe
j1ZBG d[d\ d\e
j1ZCG d]d^ d^e
j1ZDG d_d` d`e
j1ZEG dadb dbe
j1ZFG dcdd dde
j1ZGG dedf dfe
j1ZHG dgdh dhe
j1ZIG didj dje
j1ZJG dkdl dle
j1ZKG dmdn dne
j1ZLG dodp dpe
j1ZMdS )t    )AnyDictOptionalTupleUnionN)nn   )	deprecateis_torch_versionlogging)apply_freeu   )get_activation)	AttentionAttnAddedKVProcessorAttnAddedKVProcessor2_0)AdaGroupNorm)Downsample2DFirDownsample2DFirUpsample2DKDownsample2DKUpsample2DResnetBlock2DResnetBlockCondNorm2D
Upsample2D)DualTransformer2DModel)Transformer2DModel   Fdefault      ?        down_block_type
num_layersin_channelsout_channelstemb_channelsadd_downsample
resnet_epsresnet_act_fntransformer_layers_per_blocknum_attention_headsresnet_groupscross_attention_dimdownsample_paddingdual_cross_attentionuse_linear_projectiononly_cross_attentionupcast_attentionresnet_time_scale_shiftattention_typeresnet_skip_time_actresnet_out_scale_factorcross_attention_normattention_head_dimdownsample_typedropoutc                 C   s  |d u rt d|	 d |	}| dr| dd  n| } | dkr/t|||||||||
||dS | dkrCt|||||||||
|||dS | d	krb|d
u rNd }n|pQd}t||||||||
||||dS | dkr|d u rntdtd6i d|d|d|d|d|d|d|d|d|d|
d|d|d|	d|d|d|d|d |d!|S | d"kr|d u rtd#td6i d|d|d|d|d|d|d|d|d|
d|d$|d |d%|d&|d|d'|S | d(krt	||||||||||d)
S | d*krt
||||||||||d+
S | d,kr%t||||||||
||d-
S | d.kr9t||||||||
|||d/S | d0krJt||||||||d1S | d2kret|||||||||||sad3d4S d
d4S t|  d5)7NztIt is recommended to provide `attention_head_dim` when calling `get_down_block`. Defaulting `attention_head_dim` to .UNetRes   DownBlock2D)r"   r#   r$   r%   r9   r&   r'   r(   r+   r-   r2   ResnetDownsampleBlock2D)r"   r#   r$   r%   r9   r&   r'   r(   r+   r2   skip_time_actoutput_scale_factorAttnDownBlock2DFconv)r"   r#   r$   r%   r9   r'   r(   r+   r-   r7   r2   r8   CrossAttnDownBlock2Dz>cross_attention_dim must be specified for CrossAttnDownBlock2Dr"   r)   r#   r$   r%   r9   r&   r'   r(   r+   r-   r,   r*   r.   r/   r0   r1   r2   r3   SimpleCrossAttnDownBlock2DzDcross_attention_dim must be specified for SimpleCrossAttnDownBlock2Dr7   r?   r@   r6   SkipDownBlock2D)
r"   r#   r$   r%   r9   r&   r'   r(   r-   r2   AttnSkipDownBlock2D)
r"   r#   r$   r%   r9   r&   r'   r(   r7   r2   DownEncoderBlock2D)
r"   r#   r$   r9   r&   r'   r(   r+   r-   r2   AttnDownEncoderBlock2D)r"   r#   r$   r9   r&   r'   r(   r+   r-   r7   r2   KDownBlock2D)r"   r#   r$   r%   r9   r&   r'   r(   KCrossAttnDownBlock2DT)r"   r#   r$   r%   r9   r&   r'   r(   r,   r7   add_self_attention does not exist. )loggerwarning
startswithr=   r>   rA   
ValueErrorrC   rD   rE   rF   rG   rH   rI   rJ   )r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   rM   rM   c/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/models/unets/unet_2d_blocks.pyget_down_block+   s  
	
	






rS   mid_block_typer@   mid_block_only_cross_attentionc                 C   s   | dkrt |||||||||	|||
|||dS | dkr,t|||||||	||||||dS | dkr>t|||d|||||dd
S | d u rDd S td	|  )
NUNetMidBlock2DCrossAttn)r)   r#   r%   r9   r'   r(   r@   r2   r,   r*   r+   r.   r/   r1   r3   UNetMidBlock2DSimpleCrossAttn)r#   r%   r9   r'   r(   r@   r,   r7   r+   r2   r?   r0   r6   UNetMidBlock2Dr   F)
r#   r%   r9   r"   r'   r(   r@   r+   r2   add_attentionzunknown mid_block_type : )rV   rW   rX   rQ   )rT   r%   r#   r'   r(   r+   r@   r)   r*   r,   r.   r/   rU   r1   r2   r3   r4   r6   r7   r9   rM   rM   rR   get_mid_block   sd   rZ   up_block_typeprev_output_channeladd_upsampleresolution_idxupsample_typereturnc                 C   s  |d u rt d| d |}| dr| dd  n| } | dkr0t||||||	||||||dS | dkrFt||||||	||||||||dS | d	kr|d u rRtd
td6i d|d|
d|d|d|d|d|	d|d|d|d|d|d|d|d|d|d|d|d|d|S | dkr|d u rtd td6i d|d|d|d|d|d|	d|d|d|d|d|d|d!|d|d"|d#|d|d$|S | d%kr|d&u rd }n|pd'}t||||||	|||||||d(S | d)krt	||||||	|||||d*S | d+kr#t
||||||	||||||d,S | d-kr7t||||	|||||||d.S | d/krLt||||	||||||||d0S | d1kr^t|||||	||||d2	S | d3krrt|||||	||||||d4S t|  d5)7NzrIt is recommended to provide `attention_head_dim` when calling `get_up_block`. Defaulting `attention_head_dim` to r:   r;   r<   	UpBlock2D)r"   r#   r$   r\   r%   r^   r9   r]   r'   r(   r+   r2   ResnetUpsampleBlock2D)r"   r#   r$   r\   r%   r^   r9   r]   r'   r(   r+   r2   r?   r@   CrossAttnUpBlock2Dz<cross_attention_dim must be specified for CrossAttnUpBlock2Dr"   r)   r#   r$   r\   r%   r^   r9   r]   r'   r(   r+   r,   r*   r.   r/   r0   r1   r2   r3   SimpleCrossAttnUpBlock2DzBcross_attention_dim must be specified for SimpleCrossAttnUpBlock2Dr7   r?   r@   r6   AttnUpBlock2DFrB   )r"   r#   r$   r\   r%   r^   r9   r'   r(   r+   r7   r2   r_   SkipUpBlock2D)r"   r#   r$   r\   r%   r^   r9   r]   r'   r(   r2   AttnSkipUpBlock2D)r"   r#   r$   r\   r%   r^   r9   r]   r'   r(   r7   r2   UpDecoderBlock2D)r"   r#   r$   r^   r9   r]   r'   r(   r+   r2   r%   AttnUpDecoderBlock2D)r"   r#   r$   r^   r9   r]   r'   r(   r+   r7   r2   r%   
KUpBlock2D)	r"   r#   r$   r%   r^   r9   r]   r'   r(   KCrossAttnUpBlock2D)r"   r#   r$   r%   r^   r9   r]   r'   r(   r,   r7   rL   rM   )rN   rO   rP   ra   rb   rQ   rc   rd   re   rf   rg   rh   ri   rj   rk   )r[   r"   r#   r$   r\   r%   r]   r'   r(   r^   r)   r*   r+   r,   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r_   r9   rM   rM   rR   get_up_blockG  s  
	
	






rl   c                       sD   e Zd ZdZdededef fddZdejdejfd	d
Z	  Z
S )AutoencoderTinyBlocka*  
    Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU
    blocks.

    Args:
        in_channels (`int`): The number of input channels.
        out_channels (`int`): The number of output channels.
        act_fn (`str`):
            ` The activation function to use. Supported values are `"swish"`, `"mish"`, `"gelu"`, and `"relu"`.

    Returns:
        `torch.Tensor`: A tensor with the same shape as the input tensor, but with the number of channels equal to
        `out_channels`.
    r#   r$   act_fnc                    s   t    t|}ttj||ddd|tj||ddd|tj||ddd| _||kr5tj||dddnt | _t	 | _
d S )Nr   r   )kernel_sizepaddingF)ro   bias)super__init__r   r   
SequentialConv2drB   IdentityskipReLUfuse)selfr#   r$   rn   	__class__rM   rR   rs   8  s   
	zAutoencoderTinyBlock.__init__xr`   c                 C   s   |  | || | S N)ry   rB   rw   )rz   r}   rM   rM   rR   forwardI  s   zAutoencoderTinyBlock.forward)__name__
__module____qualname____doc__intstrrs   torchTensorr   __classcell__rM   rM   r{   rR   rm   (  s    rm   c                       s   e Zd ZdZ													
ddededededededededee dedededef fddZ	d de
jdee
j de
jfddZ  ZS )!rX   a:  
    A 2D UNet mid-block [`UNetMidBlock2D`] with multiple residual blocks and optional attention blocks.

    Args:
        in_channels (`int`): The number of input channels.
        temb_channels (`int`): The number of temporal embedding channels.
        dropout (`float`, *optional*, defaults to 0.0): The dropout rate.
        num_layers (`int`, *optional*, defaults to 1): The number of residual blocks.
        resnet_eps (`float`, *optional*, 1e-6 ): The epsilon value for the resnet blocks.
        resnet_time_scale_shift (`str`, *optional*, defaults to `default`):
            The type of normalization to apply to the time embeddings. This can help to improve the performance of the
            model on tasks with long-range temporal dependencies.
        resnet_act_fn (`str`, *optional*, defaults to `swish`): The activation function for the resnet blocks.
        resnet_groups (`int`, *optional*, defaults to 32):
            The number of groups to use in the group normalization layers of the resnet blocks.
        attn_groups (`Optional[int]`, *optional*, defaults to None): The number of groups for the attention blocks.
        resnet_pre_norm (`bool`, *optional*, defaults to `True`):
            Whether to use pre-normalization for the resnet blocks.
        add_attention (`bool`, *optional*, defaults to `True`): Whether to add attention blocks.
        attention_head_dim (`int`, *optional*, defaults to 1):
            Dimension of a single attention head. The number of attention heads is determined based on this value and
            the number of input channels.
        output_scale_factor (`float`, *optional*, defaults to 1.0): The output scale factor.

    Returns:
        `torch.Tensor`: The output of the last residual block, which is a tensor of shape `(batch_size, in_channels,
        height, width)`.

    r    r   ư>r   swish    NTr   r#   r%   r9   r"   r'   r2   r(   r+   attn_groupsresnet_pre_normrY   r7   r@   c                    sd  t    |d ur|nt|d d}|| _|	d u r!|dkr|nd }	|dkr4t||||||d||d	g}nt||||||||||
d
g}g }|d u rTtd| d |}t|D ]K}| jrx|	t
||| ||||	|dkrn|nd d	d	d	d	d
 n|	d  |dkr|	t||||||d||d	 qX|	t||||||||||
d
 qXt|| _t|| _d S )N   r   r   spatial	r#   r$   r%   epsgroupsr9   time_embedding_normnon_linearityr@   
r#   r$   r%   r   r   r9   r   r   r@   pre_normiIt is not recommend to pass `attention_head_dim=None`. Defaulting `attention_head_dim` to `in_channels`: r:   T
headsdim_headrescale_output_factorr   norm_num_groupsspatial_norm_dimresidual_connectionrq   upcast_softmax_from_deprecated_attn_block)rr   rs   minrY   r   r   rN   rO   rangeappendr   r   
ModuleList
attentionsresnets)rz   r#   r%   r9   r"   r'   r2   r(   r+   r   r   rY   r7   r@   r   r   _r{   rM   rR   rs   l  s   


zUNetMidBlock2D.__init__hidden_statestembr`   c                 C   sR   | j d ||}t| j| j dd  D ]\}}|d ur!|||d}|||}q|S )Nr   r   r   )r   zipr   )rz   r   r   attnresnetrM   rM   rR   r     s   zUNetMidBlock2D.forward)r    r   r   r   r   r   NTTr   r   r~   )r   r   r   r   r   floatr   r   boolrs   r   r   r   r   rM   rM   r{   rR   rX   M  sR    "	
*rrX   c                '       s   e Zd Z														
				d*dededee dededeeee f dededededee de	dededede	de	de	def& fdd Z
					d+d!ejd"eej d#eej d$eej d%eeeef  d&eej d'ejfd(d)Z  ZS ),rV   Nr    r   r   r   r   r   Tr      Fr#   r%   r$   r9   r"   r)   r'   r2   r(   r+   resnet_groups_outr   r*   r@   r,   r.   r/   r1   r3   c                    s"  t    |p|}|| _|| _d| _|| _|
d ur|
nt|d d}
t|tr,|g| }|p/|
}t	|||||
||||	||dg}g }t
|D ]9}|s_|t||| ||| |||||d	 n|t||| |d||
d |t	||||||||	||d
 qFt|| _t|| _d	| _d S )
NTr   r   r#   r$   r%   r   r   
groups_outr9   r   r   r@   r   )r#   r"   r,   r   r/   r1   r3   r   r#   r"   r,   r   r   F)rr   rs   r#   r$   has_cross_attentionr*   r   
isinstancer   r   r   r   r   r   r   r   r   r   gradient_checkpointing)rz   r#   r%   r$   r9   r"   r)   r'   r2   r(   r+   r   r   r*   r@   r,   r.   r/   r1   r3   r   r   ir{   rM   rR   rs     s   




z UNetMidBlock2DCrossAttn.__init__r   r   encoder_hidden_statesattention_maskcross_attention_kwargsencoder_attention_maskr`   c              	   C   s   |d ur| dd d urtd | jd ||}t| j| jdd  D ]G\}}| jrZ| jrZddd}	tddr<d	d
ini }
||||||d
dd }t	j
jj|	|||fi |
}q$||||||d
dd }|||}q$|S )NscaleSPassing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.r   r   c                        fdd}|S )Nc                        d ur | diS  |  S Nreturn_dictrM   inputsmoduler   rM   rR   custom_forwarda     zVUNetMidBlock2DCrossAttn.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   r   r   rM   r   rR   create_custom_forward`     z>UNetMidBlock2DCrossAttn.forward.<locals>.create_custom_forward>=1.11.0use_reentrantFr   r   r   r   r   r~   )getrN   rO   r   r   r   trainingr   r
   r   utils
checkpoint)rz   r   r   r   r   r   r   r   r   r   ckpt_kwargsrM   rM   rR   r   O  sJ   	

	zUNetMidBlock2DCrossAttn.forward)Nr    r   r   r   r   r   r   NTr   r   r   FFFr   NNNNNr   r   r   r   r   r   r   r   r   r   rs   r   r   r   r   r   r   rM   rM   r{   rR   rV     s    	
irV   c                        s   e Zd Z												
	
	d&dededededededededededededededee f fddZ					d'de	j
dee	j
 dee	j
 d ee	j
 d!eeeef  d"ee	j
 d#e	j
fd$d%Z  ZS )(rW   r    r   r   r   r   r   Tr   r   FNr#   r%   r9   r"   r'   r2   r(   r+   r   r7   r@   r,   r?   r0   r6   c                    s   t    d| _|
| _|d ur|nt|d d}|| j | _t||||||||||	|dg}g }t|D ]3}tt	dr>t
 nt }|t||| j| j||dd|||d |t||||||||||	|d q4t|| _t|| _d S )NTr   r   r#   r$   r%   r   r   r9   r   r   r@   r   r?   scaled_dot_product_attention	query_dimr,   r   r   added_kv_proj_dimr   rq   r   r0   r6   	processor)rr   rs   r   r7   r   	num_headsr   r   hasattrFr   r   r   r   r   r   r   r   )rz   r#   r%   r9   r"   r'   r2   r(   r+   r   r7   r@   r,   r?   r0   r6   r   r   r   r   r{   rM   rR   rs     sn   
z&UNetMidBlock2DSimpleCrossAttn.__init__r   r   r   r   r   r   r`   c           
      C   s   |d ur|ni }| dd d urtd |d u r"|d u rd n|}n|}| jd ||}t| j| jdd  D ]\}}	||f||d|}|	||}q7|S )Nr   r   r   r   r   r   )r   rN   rO   r   r   r   )
rz   r   r   r   r   r   r   maskr   r   rM   rM   rR   r     s$   	
z%UNetMidBlock2DSimpleCrossAttn.forward)r    r   r   r   r   r   Tr   r   r   FFNr   )r   r   r   r   r   r   r   r   rs   r   r   r   r   r   r   rM   rM   r{   rR   rW     s    	
WrW   c                       s   e Zd Z												d#d
edededededededededededededef fddZ			d$dej	de
ej	 de
e de
eeef  deej	eej	d f f f
d!d"Z  ZS )%rA   r    r   r   r   r   r   Tr   rB   r#   r$   r%   r9   r"   r'   r2   r(   r+   r   r7   r@   r-   r8   c                    s  t    g }g }|| _|d u rtd| d |}t|D ].}|dkr'|n|}|t|||||	|||||
d
 |t||| ||||	ddddd
 qt	
|| _t	
|| _|dkrnt	
t|d||dd	g| _d S |d
krt	
t|||||	|||||
ddg| _d S d | _d S )Nr   r:   r   r   T	r   r   r   r   r   r   rq   r   r   rB   opuse_convr$   rp   namer   )r#   r$   r%   r   r   r9   r   r   r@   r   down)rr   rs   r8   rN   rO   r   r   r   r   r   r   r   r   r   downsamplers)rz   r#   r$   r%   r9   r"   r'   r2   r(   r+   r   r7   r@   r-   r8   r   r   r   r{   rM   rR   rs     s   





zAttnDownBlock2D.__init__Nr   r   upsample_sizer   r`   .c           	      C   s   |d ur|ni }| dd d urtd d}t| j| jD ]\}}|||}||fi |}||f }q| jd urU| jD ]}| jdkrK|||d}q=||}q=||f7 }||fS )Nr   r   rM   r   r   )r   rN   rO   r   r   r   r   r8   )	rz   r   r   r   r   output_statesr   r   downsamplerrM   rM   rR   r   _  s   






zAttnDownBlock2D.forward)r    r   r   r   r   r   Tr   r   r   rB   )NNN)r   r   r   r   r   r   r   rs   r   r   r   r   r   r   r   r   rM   rM   r{   rR   rA     sn    	
^rA   c                +       s  e Zd Z															
	
	
	
	d.dedededededeeee f dededededededededededededededef* fd d!Z		"	"	"	"	"	"d/d#e
jd$ee
j d%ee
j d&ee
j d'eeeef  d(ee
j d)ee
j d*ee
jee
jd+f f fd,d-Z  ZS )0rC   r    r   r   r   r   r   Tr   r   Fr#   r$   r%   r9   r"   r)   r'   r2   r(   r+   r   r*   r,   r@   r-   r&   r.   r/   r0   r1   r3   c                    s
  t    g }g }d| _|| _t|tr|g| }t|D ]B}|dkr%|n|}|t|||||
|||	||d
 |sP|t	||| ||| ||
||||d
 q|t
||| |d||
d qt|| _t|| _|r}tt|d||ddg| _nd | _d	| _d S )
NTr   r   r#   r"   r,   r   r/   r0   r1   r3   r   r   r   r   F)rr   rs   r   r*   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )rz   r#   r$   r%   r9   r"   r)   r'   r2   r(   r+   r   r*   r,   r@   r-   r&   r.   r/   r0   r1   r3   r   r   r   r{   rM   rR   rs   ~  sv   





zCrossAttnDownBlock2D.__init__Nr   r   r   r   r   r   additional_residualsr`   .c              	   C   s.  |d ur| dd d urtd d}tt| j| j}	t|	D ]^\}
\}}| jrX| j	rXddd}t
ddr:dd	ini }tjjj||||fi |}||||||d	d
d }n|||}||||||d	d
d }|
t|	d kry|d ury|| }||f }q | jd ur| jD ]}||}q||f }||fS )Nr   r   rM   c                    r   )Nc                     r   r   rM   r   r   rM   rR   r     r   zSCrossAttnDownBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r     r   z;CrossAttnDownBlock2D.forward.<locals>.create_custom_forwardr   r   r   Fr   r   r   r~   )r   rN   rO   listr   r   r   	enumerater   r   r
   r   r   r   lenr   )rz   r   r   r   r   r   r   r   r   blocksr   r   r   r   r   r   rM   rM   rR   r     sZ   


	
	




zCrossAttnDownBlock2D.forward)r    r   r   r   r   r   r   Tr   r   r   r   TFFFFr   NNNNNN)r   r   r   r   r   r   r   r   r   rs   r   r   r   r   r   r   r   rM   rM   r{   rR   rC   }  s    	
^	rC   c                       s   e Zd Z										dd	ed
edededededededededededef fddZ	d dej	de
ej	 deej	eej	df f fddZ  ZS )!r=   r    r   r   r   r   r   Tr   r#   r$   r%   r9   r"   r'   r2   r(   r+   r   r@   r&   r-   c                    s   t    g }t|D ]}|dkr|n|}|t|||||	|||||
d
 qt|| _|r>tt|d||ddg| _	nd | _	d| _
d S )Nr   r   Tr   r   F)rr   rs   r   r   r   r   r   r   r   r   r   )rz   r#   r$   r%   r9   r"   r'   r2   r(   r+   r   r@   r&   r-   r   r   r{   rM   rR   rs   !  s8   


zDownBlock2D.__init__Nr   r   r`   .c           
      O   s   t |dks|dd d urd}tdd| d}| jD ]5}| jrF| jrFdd }tdd	r:tjj	j	||||d
d}ntjj		||||}n|||}||f }q| j
d ure| j
D ]}	|	|}qY||f }||fS )Nr   r   The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.1.0.0rM   c                        fdd}|S )Nc                         |  S r~   rM   r   r   rM   rR   r   a     zJDownBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   r   rM   r   rR   r   `     z2DownBlock2D.forward.<locals>.create_custom_forwardr   r   Fr   r   r   r	   r   r   r   r
   r   r   r   r   
rz   r   r   argskwargsdeprecation_messager   r   r   r   rM   rM   rR   r   T  s*   







zDownBlock2D.forward
r    r   r   r   r   r   Tr   Tr   r~   r   r   r   r   r   r   r   rs   r   r   r   r   r   r   rM   rM   r{   rR   r=      s\    	
4r=   c                       sz   e Zd Z										dd	ed
ededededededededededef fddZdej	dej	fddZ
  ZS )rG   r    r   r   r   r   r   Tr   r#   r$   r9   r"   r'   r2   r(   r+   r   r@   r&   r-   c                    s   t    g }t|D ]0}|dkr|n|}|dkr*|t||d |||d||
d	 q|t||d ||||||
|	d
 qt|| _|rTtt	|d||ddg| _
d S d | _
d S )Nr   r   r   r   Tr   r   )rr   rs   r   r   r   r   r   r   r   r   r   )rz   r#   r$   r9   r"   r'   r2   r(   r+   r   r@   r&   r-   r   r   r{   rM   rR   rs   }  sR   



zDownEncoderBlock2D.__init__r   r`   c                 O   sf   t |dks|dd d urd}tdd| | jD ]}||d d}q| jd ur1| jD ]}||}q*|S Nr   r   r   r   r   )r   r   r	   r   r   )rz   r   r   r   r   r   r   rM   rM   rR   r     s   



zDownEncoderBlock2D.forwardr  r   r   r   r   r   r   r   rs   r   r   r   r   rM   rM   r{   rR   rG   |  sJ    	
?rG   c                       s   e Zd Z											dd	ed
edededededededededededef fddZdej	dej	fddZ
  ZS )rH   r    r   r   r   r   r   Tr   r#   r$   r9   r"   r'   r2   r(   r+   r   r7   r@   r&   r-   c                    s  t    g }g }|
d u rtd| d |}
t|D ]C}|dkr$|n|}|dkr;|t||d |||d||d	 n|t||d |||||||	d
 |t|||
 |
|||ddddd
 qt	
|| _t	
|| _|r~t	
t|d||d	d
g| _d S d | _d S )Nr   r:   r   r   r   r   Tr   r   r   )rr   rs   rN   rO   r   r   r   r   r   r   r   r   r   r   r   )rz   r#   r$   r9   r"   r'   r2   r(   r+   r   r7   r@   r&   r-   r   r   r   r{   rM   rR   rs     s|   




zAttnDownEncoderBlock2D.__init__r   r`   c                 O   sz   t |dks|dd d urd}tdd| t| j| jD ]\}}||d d}||}q| jd ur;| jD ]}||}q4|S r  )r   r   r	   r   r   r   r   )rz   r   r   r   r   r   r   r   rM   rM   rR   r   "  s   



zAttnDownEncoderBlock2D.forward)r    r   r   r   r   r   Tr   r   Tr   r  rM   rM   r{   rR   rH     sP    	
VrH   c                       s   e Zd Zdddddddeddf	ded	ed
edededededededededef fddZ			dde
jdee
j dee
j dee
jee
jdf e
jf fddZ  ZS )rF   r    r   r   r   r   T       @r#   r$   r%   r9   r"   r'   r2   r(   r   r7   r@   r&   c                    s6  t    tg | _tg | _|
d u r td| d |}
t|D ];}|dkr,|n|}| j	t
||||t|d dt|d d|||||	d | j	t|||
 |
||dddddd
 q$|rt
||||t|d d|||||	ddd	d
| _tt||dg| _tjd|ddd| _d S d | _d | _d | _d S )Nr   r:   r   r   r   r   Tr   firr#   r$   r%   r   r   r9   r   r   r@   r   use_in_shortcutr   kernelr$   r   r   r   ro   stride)rr   rs   r   r   r   r   rN   rO   r   r   r   r   r   resnet_downr   r   ru   	skip_conv)rz   r#   r$   r%   r9   r"   r'   r2   r(   r   r7   r@   r&   r   r{   rM   rR   rs   3  sx   


zAttnSkipDownBlock2D.__init__Nr   r   skip_sampler`   .c                 O   s   t |dks|dd d urd}tdd| d}t| j| jD ]\}}	|||}|	|}||f7 }q| jd urS| ||}| jD ]}
|
|}q@| || }||f7 }|||fS Nr   r   r   r   rM   )	r   r   r	   r   r   r   r   r  r  )rz   r   r   r  r   r   r   r   r   r   r   rM   rM   rR   r     s   





zAttnSkipDownBlock2D.forwardNNr   r   r   npsqrtr   r   r   r   rs   r   r   r   r   r   r   rM   rM   r{   rR   rF   2  s\    	
SrF   c                       s   e Zd Zddddddedddf	ded	ed
edededededededededef fddZ			dde
jdee
j dee
j dee
jee
jdf e
jf fddZ  ZS )rE   r    r   r   r   r   Tr  r#   r$   r%   r9   r"   r'   r2   r(   r   r@   r&   r-   c                    s   t    tg | _t|D ]'}|dkr|n|}| jt||||t|d dt|d d||||
|	d q|rgt||||t|d d||||
|	dddd| _	tt
||dg| _tjd	|d
d
d| _d S d | _	d | _d | _d S )Nr   r   r   r   Tr  r  r
  r   r  r  )rr   rs   r   r   r   r   r   r   r   r  r   r   ru   r  )rz   r#   r$   r%   r9   r"   r'   r2   r(   r   r@   r&   r-   r   r{   rM   rR   rs     sP   

zSkipDownBlock2D.__init__Nr   r   r  r`   .c           
      O   s   t |dks|dd d urd}tdd| d}| jD ]}|||}||f7 }q| jd urI| ||}| jD ]}	|	|}q6| || }||f7 }|||fS r  )r   r   r	   r   r   r  r  )
rz   r   r   r  r   r   r   r   r   r   rM   rM   rR   r     s   






zSkipDownBlock2D.forwardr  r  rM   rM   r{   rR   rE     s\    	
>rE   c                       s   e Zd Z											d d
ededededededededededededef fddZ	d!dej	de
ej	 deej	eej	df f fddZ  ZS )"r>   r    r   r   r   r   r   Tr   Fr#   r$   r%   r9   r"   r'   r2   r(   r+   r   r@   r&   r?   c                    s   t    g }t|D ]}|dkr|n|}|t|||||	|||||
|d qt|| _|rFtt|||||	|||||
|ddg| _nd | _d| _	d S )Nr   r   Tr#   r$   r%   r   r   r9   r   r   r@   r   r?   r   F)
rr   rs   r   r   r   r   r   r   r   r   )rz   r#   r$   r%   r9   r"   r'   r2   r(   r+   r   r@   r&   r?   r   r   r{   rM   rR   rs     sP   

z ResnetDownsampleBlock2D.__init__Nr   r   r`   .c           
      O   s   t |dks|dd d urd}tdd| d}| jD ]5}| jrF| jrFdd }tdd	r:tjj	j	||||d
d}ntjj		||||}n|||}||f }q| j
d urf| j
D ]}	|	||}qY||f }||fS )Nr   r   r   r   rM   c                    r   )Nc                     r   r~   rM   r   r   rM   rR   r   I  r   zVResnetDownsampleBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r   H  r   z>ResnetDownsampleBlock2D.forward.<locals>.create_custom_forwardr   r   Fr   r   r   rM   rM   rR   r   <  s*   






zResnetDownsampleBlock2D.forward)
r    r   r   r   r   r   Tr   TFr~   r  rM   rM   r{   rR   r>     s\    	
@r>   c                $       s   e Zd Z													
	
	d)dededededededededededededededededee f" fddZ					d*de	j
d ee	j
 d!ee	j
 d"ee	j
 d#eeeef  d$ee	j
 d%ee	j
ee	j
d&f f fd'd(Z  ZS )+rD   r    r   r   r   r   r   Tr   r   FNr#   r$   r%   r9   r"   r'   r2   r(   r+   r   r7   r,   r@   r&   r?   r0   r6   c                    s   t    d| _g }g }|| _|| j | _t|D ]:}|dkr!|n|}|t|||||	|||||
|d tt	dr=t
 nt }|t||| j|||	dd|||d qt|| _t|| _|rxtt|||||	|||||
|ddg| _nd | _d| _d S )NTr   r   r   r   r  F)rr   rs   r   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )rz   r#   r$   r%   r9   r"   r'   r2   r(   r+   r   r7   r,   r@   r&   r?   r0   r6   r   r   r   r   r{   rM   rR   rs   e  s|   

z#SimpleCrossAttnDownBlock2D.__init__r   r   r   r   r   r   r`   .c                 C   s  |d ur|ni }| dd d urtd d}|d u r$|d u r!d n|}n|}t| j| jD ];\}	}
| jrS| jrSddd}tj	j

||	||}|
|f||d|}n|	||}|
|f||d|}||f }q-| jd ur~| jD ]}|||}qq||f }||fS )Nr   r   rM   c                    r   )Nc                     r   r   rM   r   r   rM   rR   r     r   zYSimpleCrossAttnDownBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r     r   zASimpleCrossAttnDownBlock2D.forward.<locals>.create_custom_forwardr   r~   )r   rN   rO   r   r   r   r   r   r   r   r   r   )rz   r   r   r   r   r   r   r   r   r   r   r   r   rM   rM   rR   r     sD   	

	



z"SimpleCrossAttnDownBlock2D.forward)r    r   r   r   r   r   Tr   r   r   TFFNr   )r   r   r   r   r   r   r   r   rs   r   r   r   r   r   r   r   rM   rM   r{   rR   rD   d  s    	
arD   c                       s   e Zd Z						ddeded	ed
edededededef fddZ	ddej	de
ej	 deej	eej	df f fddZ  ZS )rI   r    r   h㈵>gelur   Fr#   r$   r%   r9   r"   r'   r(   resnet_group_sizer&   c
                    s   t    g }
t|D ]#}|dkr|n|}|| }|| }|
t||||||||ddd
 qt|
| _|	r@tt g| _	nd | _	d| _
d S )Nr   	ada_groupF
r#   r$   r9   r%   r   r   r   r   r   conv_shortcut_bias)rr   rs   r   r   r   r   r   r   r   r   r   )rz   r#   r$   r%   r9   r"   r'   r(   r  r&   r   r   r   r   r{   rM   rR   rs     s2   

zKDownBlock2D.__init__Nr   r   r`   .c           
      O   s   t |dks|dd d urd}tdd| d}| jD ]5}| jrF| jrFdd }tdd	r:tjj	j	||||d
d}ntjj		||||}n|||}||f7 }q| j
d ur`| j
D ]}	|	|}qY||fS )Nr   r   r   r   rM   c                    r   )Nc                     r   r~   rM   r   r   rM   rR   r   @  r   zKKDownBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r   ?  r   z3KDownBlock2D.forward.<locals>.create_custom_forwardr   r   Fr   r   r   rM   rM   rR   r   3  s(   






zKDownBlock2D.forward)r    r   r  r  r   Fr~   r  rM   rM   r{   rR   rI     sD    	
.rI   c                       s   e Zd Z								d"d	ed
ededededededededededef fddZ					d#dej	de
ej	 de
ej	 de
ej	 de
eeef  de
ej	 deej	eej	df f fd d!Z  ZS )$rJ   r    r   r   T@   Fr  r  r#   r$   r%   r,   r9   r"   r  r&   r7   rK   r'   r(   c                    s   t    g }g }d| _t|D ]5}|dkr|n|}|| }|| }|t||||||||ddd
 |t|||	 |	||d|
d|d	 qt|| _	t|| _
|r]tt g| _nd | _d| _d S )NTr   r  Fr  
layer_norm)r,   r%   attention_biasrK   r6   
group_size)rr   rs   r   r   r   r   KAttentionBlockr   r   r   r   r   r   r   )rz   r#   r$   r%   r,   r9   r"   r  r&   r7   rK   r'   r(   r   r   r   r   r   r{   rM   rR   rs   Z  sR   

zKCrossAttnDownBlock2D.__init__Nr   r   r   r   r   r   r`   .c              	   C   s  |d ur|ni }| dd d urtd d}t| j| jD ]R\}}	| jrR| jrRddd}
tddr6dd	ini }t	j
jj|
|||fi |}|	||||||d
}n|||}|	||||||d
}| jd u rk|d7 }q||f7 }q| jd ur| jD ]}||}qy||fS )Nr   r   rM   c                    r   )Nc                     r   r   rM   r   r   rM   rR   r     r   zTKCrossAttnDownBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r     r   z<KCrossAttnDownBlock2D.forward.<locals>.create_custom_forwardr   r   r   Fr   embr   r   r   r~   )r   rN   rO   r   r   r   r   r   r
   r   r   r   r   )rz   r   r   r   r   r   r   r   r   r   r   r   r   rM   rM   rR   r     sN   	

	
	
	



zKCrossAttnDownBlock2D.forward)r    r   r   Tr  Fr  r  r   )r   r   r   r   r   r   r   rs   r   r   r   r   r   r   r   r   rM   rM   r{   rR   rJ   Y  sl    	
CrJ   c                       s   e Zd Z												
d$dededededededededededededededef fddZ		d%dej	de
ej	df deej	 d ee d!ej	f
d"d#Z  ZS )&re   Nr    r   r   r   r   r   Tr   rB   r#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r+   r   r7   r@   r_   c                    s0  t    g }g }|| _|d u rtd| d |}t|D ]:}||d kr)|n|}|dkr1|n|}|t|| ||||||	|
||d
 |t||| ||||ddddd
 qt	
|| _t	
|| _|dkrwt	
t|d|d	g| _n|d
krt	
t|||||||	|
||ddg| _nd | _|| _d S )Nr   r:   r   r   r   Tr   rB   r   r$   r   )r#   r$   r%   r   r   r9   r   r   r@   r   up)rr   rs   r_   rN   rO   r   r   r   r   r   r   r   r   r   
upsamplersr^   )rz   r#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r+   r   r7   r@   r_   r   r   r   res_skip_channelsresnet_in_channelsr{   rM   rR   rs     s~   


zAttnUpBlock2D.__init__r   res_hidden_states_tuple.r   r   r`   c                 O   s   t |dks|dd d urd}tdd| t| j| jD ] \}}	|d }
|d d }tj||
gdd}|||}|	|}q| jd urY| jD ]}| j	dkrT|||d	}qF||}qF|S )
Nr   r   r   r   r   dimr   r   )
r   r   r	   r   r   r   r   catr&  r_   )rz   r   r)  r   r   r   r   r   r   r   res_hidden_states	upsamplerrM   rM   rR   r   7	  s   	





zAttnUpBlock2D.forward)Nr    r   r   r   r   r   Tr   r   rB   r  )r   r   r   r   r   r   r   rs   r   r   r   r   r   r   rM   rM   r{   rR   re     sp    	
_re   c                -       s  e Zd Z													
						d0dededededee dededeeee f dedededede	dededede	de	de	de	d e	d!ef, fd"d#Z
						d1d$ejd%eejd&f d'eej d(eej d)eeeef  d*ee d+eej d,eej d-ejfd.d/Z  ZS )2rc   Nr    r   r   r   r   r   Tr   r   Fr#   r$   r\   r%   r^   r9   r"   r)   r'   r2   r(   r+   r   r*   r,   r@   r]   r.   r/   r0   r1   r3   c                    s$  t    g }g }d| _|| _t|tr|g| }t|D ]N}||d kr'|n|}|dkr/|n|}|t|| |||	|||
|||d
 |s\|t	||| ||| ||||||d
 q|t
||| |d||d qt|| _t|| _|rtt|d|dg| _nd | _d| _|| _d S )	NTr   r   r   r   r   r$  F)rr   rs   r   r*   r   r   r   r   r   r   r   r   r   r   r   r   r&  r   r^   )rz   r#   r$   r\   r%   r^   r9   r"   r)   r'   r2   r(   r+   r   r*   r,   r@   r]   r.   r/   r0   r1   r3   r   r   r   r'  r(  r{   rM   rR   rs   X	  sp   




zCrossAttnUpBlock2D.__init__r   r)  .r   r   r   r   r   r   r`   c	              
   C   sb  |d ur| dd d urtd t| dd o(t| dd o(t| dd o(t| dd }	t| j| jD ]n\}
}|d }|d d }|	rRt| j||| j	| j
| j| jd\}}tj||gd	d
}| jr| jrddd}tddroddini }tjjj||
||fi |}||||||ddd }q0|
||}||||||ddd }q0| jd ur| jD ]}|||}q|S )Nr   r   s1s2b1b2r*  r0  r1  r2  r3  r   r+  c                    r   )Nc                     r   r   rM   r   r   rM   rR   r   	  r   zQCrossAttnUpBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r   	  r   z9CrossAttnUpBlock2D.forward.<locals>.create_custom_forwardr   r   r   Fr   r   r~   )r   rN   rO   getattrr   r   r   r   r^   r0  r1  r2  r3  r   r-  r   r   r
   r   r   r&  )rz   r   r)  r   r   r   r   r   r   is_freeu_enabledr   r   r.  r   r   r/  rM   rM   rR   r   	  sv   






	
	
	
zCrossAttnUpBlock2D.forward)Nr    r   r   r   r   r   r   Tr   r   r   TFFFFr   r   r   rM   rM   r{   rR   rc   W	  s    	
^	
rc   c                       s   e Zd Z											d"d
ededededee dededededededededef fddZ		d#de	j
dee	j
df dee	j
 dee de	j
f
d d!Z  ZS )$ra   Nr    r   r   r   r   r   Tr   r#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r+   r   r@   r]   c                    s   t    g }t|D ]'}||d kr|n|}|dkr|n|}|t|| ||||||	|
||d
 qt|| _|rHtt|d|dg| _	nd | _	d| _
|| _d S )Nr   r   r   Tr$  F)rr   rs   r   r   r   r   r   r   r   r&  r   r^   )rz   r#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r+   r   r@   r]   r   r   r'  r(  r{   rM   rR   rs   
  s2   

zUpBlock2D.__init__r   r)  .r   r   r`   c              
   O   s6  t |dks|dd d urd}tdd| t| dd o-t| dd o-t| dd o-t| dd }| jD ]W}	|d	 }
|d d	 }|rQt| j||
| j| j| j	| j
d
\}}
tj||
gdd}| jr| jrdd }tddrwtjjj||	||dd}q1tjj||	||}q1|	||}q1| jd ur| jD ]}|||}q|S )Nr   r   r   r   r0  r1  r2  r3  r*  r4  r   r+  c                    r   )Nc                     r   r~   rM   r   r   rM   rR   r   c
  r   zHUpBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r   b
  r   z0UpBlock2D.forward.<locals>.create_custom_forwardr   r   Fr   )r   r   r	   r5  r   r   r^   r0  r1  r2  r3  r   r-  r   r   r
   r   r   r&  )rz   r   r)  r   r   r   r   r   r6  r   r.  r   r/  rM   rM   rR   r   9
  sL   	









zUpBlock2D.forward)
Nr    r   r   r   r   r   Tr   Tr  r   r   r   r   r   r   r   r   rs   r   r   r   r   r   rM   rM   r{   rR   ra   
  sj    	
5ra   c                       s   e Zd Z												dd
ededee dedededededededededee f fddZdde	j
dee	j
 de	j
fddZ  ZS ) rh   Nr    r   r   r   r   r   Tr   r#   r$   r^   r9   r"   r'   r2   r(   r+   r   r@   r]   r%   c                    s   t    g }t|D ]0}|dkr|n|}|dkr*|t|||||	|d||d	 q|t|||||	|||||
d
 qt|| _|rQtt	|d|dg| _
nd | _
|| _d S )Nr   r   r   r   Tr$  )rr   rs   r   r   r   r   r   r   r   r   r&  r^   )rz   r#   r$   r^   r9   r"   r'   r2   r(   r+   r   r@   r]   r%   r   r   input_channelsr{   rM   rR   rs   {
  sJ   

zUpDecoderBlock2D.__init__r   r   r`   c                 C   s:   | j D ]}|||d}q| jd ur| jD ]}||}q|S Nr   )r   r&  )rz   r   r   r   r/  rM   rM   rR   r   
  s   



zUpDecoderBlock2D.forward)Nr    r   r   r   r   r   Tr   TNr~   r   r   r   r   r   r   r   r   rs   r   r   r   r   rM   rM   r{   rR   rh   z
  sP    	
*=rh   c                       s   e Zd Z													dd
ededee dededededededededededee f fddZd de	j
dee	j
 de	j
fddZ  ZS )!ri   Nr    r   r   r   r   r   Tr   r#   r$   r^   r9   r"   r'   r2   r(   r+   r   r7   r@   r]   r%   c                    s   t    g }g }|d u rtd| d |}t|D ]P}|dkr$|n|}|dkr;|t|||||	|d||d	 n|t|||||	|||||
d
 |t||| ||||dkr\|	nd |dkrc|nd ddddd qt	
|| _t	
|| _|rt	
t|d|d	g| _nd | _|| _d S )
NjIt is not recommend to pass `attention_head_dim=None`. Defaulting `attention_head_dim` to `out_channels`: r:   r   r   r   r   Tr   r$  )rr   rs   rN   rO   r   r   r   r   r   r   r   r   r   r   r&  r^   )rz   r#   r$   r^   r9   r"   r'   r2   r(   r+   r   r7   r@   r]   r%   r   r   r   r8  r{   rM   rR   rs   
  sv   


zAttnUpDecoderBlock2D.__init__r   r   r`   c                 C   sR   t | j| jD ]\}}|||d}|||d}q| jd ur'| jD ]}||}q |S r9  )r   r   r   r&  )rz   r   r   r   r   r/  rM   rM   rR   r     s   


zAttnUpDecoderBlock2D.forward)Nr    r   r   r   r   r   Tr   r   TNr~   r:  rM   rM   r{   rR   ri   
  sV    	
*Vri   c                       s   e Zd Zddddddddeddf
d	ed
edededee dededededede	dedede	f fddZ
		d dejdeejdf deej deejejf fddZ  ZS )!rg   Nr    r   r   r   r   Tr  r#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r   r7   r@   r]   c                    s  t    tg | _tg | _t|D ]5}||d kr|n|}|dkr'|n|}| jt|| |||t	||d  dt	|d d||	|
||d q|d u rZt
d| d |}| jt||| |||dddddd	
 t||d
| _|rt||||t	|d dt	|d d||	|
||dddd| _tj|ddddd| _tjjt	|d d||dd| _t | _nd | _d | _d | _d | _|| _d S )Nr   r   r   r   r   r;  r:   Tr   r
  r  r#   r$   r%   r   r   r   r9   r   r   r@   r   r  r%  r	  r   r   r   r  ro   r  rp   
num_groupsnum_channelsr   affine)rr   rs   r   r   r   r   r   r   r   r   rN   rO   r   r   r/  	resnet_upru   r  r   	GroupNorm	skip_normSiLUactr^   )rz   r#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r   r7   r@   r]   r   r'  r(  r{   rM   rR   rs   '  s   


zAttnSkipUpBlock2D.__init__r   r)  .r   r`   c                 O   s   t |dks|dd d urd}tdd| | jD ]}|d }	|d d }tj||	gdd}|||}q| jd |}|d urE| |}nd}| jd ure| 	|}
| 
|
}
| |
}
||
 }| ||}||fS Nr   r   r   r   r*  r   r+  )r   r   r	   r   r   r-  r   r/  rC  rE  rG  r  rz   r   r)  r   r  r   r   r   r   r.  skip_sample_statesrM   rM   rR   r     s&   	




zAttnSkipUpBlock2D.forwardr  r   r   r   r  r  r   r   r   r   r   rs   r   r   r   r   r   rM   rM   r{   rR   rg   &  sf    	
arg   c                       s   e Zd Zdddddddedddf
d	ed
edededee dededededede	dede	def fddZ
		d dejdeejdf deej deejejf fddZ  ZS )!rf   Nr    r   r   r   r   Tr  r#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r   r@   r]   upsample_paddingc                    s:  t    tg | _t|D ]5}||d kr|n|}|dkr!|n|}| jt|| |||t|| d dt|d d||	|
||d qt	||d| _
|rt||||t|d dt|d d||	|
||dddd	| _tj|d
dddd| _tjjt|d d||dd| _t | _nd | _d | _d | _d | _|| _d S )Nr   r   r   r   r   r
  Tr  r<  r   r=  r  r>  r?  )rr   rs   r   r   r   r   r   r   r   r   r/  rC  ru   r  r   rD  rE  rF  rG  r^   )rz   r#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r   r@   r]   rL  r   r'  r(  r{   rM   rR   rs     s`   

zSkipUpBlock2D.__init__r   r)  .r   r`   c                 O   s   t |dks|dd d urd}tdd| | jD ]}|d }	|d d }tj||	gdd}|||}q|d ur>| |}nd}| jd ur^| |}
| 	|
}
| 
|
}
||
 }| ||}||fS rH  )r   r   r	   r   r   r-  r/  rC  rE  rG  r  rI  rM   rM   rR   r     s$   	




zSkipUpBlock2D.forwardr  rK  rM   rM   r{   rR   rf     sf    	
Krf   c                       s   e Zd Z												
d$dededededee dedededededededededef fddZ		d%de	j
dee	j
df dee	j
 d ee d!e	j
f
d"d#Z  ZS )&rb   Nr    r   r   r   r   r   Tr   Fr#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r+   r   r@   r]   r?   c                    s   t    g }t|D ](}||d kr|n|}|dkr|n|}|t|| ||||||	|
|||d qt|| _|rRtt|||||||	|
|||ddg| _nd | _d| _	|| _
d S )Nr   r   r   Tr#   r$   r%   r   r   r9   r   r   r@   r   r?   r%  F)rr   rs   r   r   r   r   r   r   r&  r   r^   )rz   r#   r\   r$   r%   r^   r9   r"   r'   r2   r(   r+   r   r@   r]   r?   r   r   r'  r(  r{   rM   rR   rs     sT   

zResnetUpsampleBlock2D.__init__r   r)  .r   r   r`   c                 O   s   t |dks|dd d urd}tdd| | jD ]C}|d }	|d d }tj||	gdd}| jrW| jrWdd	 }
td
drKtj	j
j
|
|||dd}qtj	j

|
|||}q|||}q| jd urm| jD ]}|||}qe|S )Nr   r   r   r   r*  r   r+  c                    r   )Nc                     r   r~   rM   r   r   rM   rR   r   u  r   zTResnetUpsampleBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r   t  r   z<ResnetUpsampleBlock2D.forward.<locals>.create_custom_forwardr   r   Fr   )r   r   r	   r   r   r-  r   r   r
   r   r   r&  )rz   r   r)  r   r   r   r   r   r   r.  r   r/  rM   rM   rR   r   _  s*   	




zResnetUpsampleBlock2D.forward)Nr    r   r   r   r   r   Tr   TFr  r7  rM   rM   r{   rR   rb     sp    	
Hrb   c                (       s  e Zd Z												
				d-dededededee dedededededededededededededee f& fdd Z						d.d!e	j
d"ee	j
d#f d$ee	j
 d%ee	j
 d&ee d'ee	j
 d(eeeef  d)ee	j
 d*e	j
fd+d,Z  ZS )/rd   Nr    r   r   r   r   r   Tr   r   Fr#   r$   r\   r%   r^   r9   r"   r'   r2   r(   r+   r   r7   r,   r@   r]   r?   r0   r6   c                    s   t    g }g }d| _|| _|| j | _t|D ]G}||d kr#|n|}|dkr+|n|}|t|| ||||||	|
|||d tt	drIt
 nt }|t||| j| j||dd|||d qt|| _t|| _|rtt|||||||	|
|||ddg| _nd | _d| _|| _d S )	NTr   r   r   r   r   rM  F)rr   rs   r   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r&  r   r^   )rz   r#   r$   r\   r%   r^   r9   r"   r'   r2   r(   r+   r   r7   r,   r@   r]   r?   r0   r6   r   r   r   r'  r(  r   r{   rM   rR   rs     s   

z!SimpleCrossAttnUpBlock2D.__init__r   r)  .r   r   r   r   r   r   r`   c	                 C   s  |d ur|ni }| dd d urtd |d u r"|d u rd n|}	n|}	t| j| jD ]I\}
}|d }|d d }tj||gdd}| jrd| j	rdd	dd}tj
j||
||}||f||	d|}q+|
||}||f||	d|}q+| jd ur| jD ]}|||}q}|S )
Nr   r   r*  r   r+  c                    r   )Nc                     r   r   rM   r   r   rM   rR   r     r   zWSimpleCrossAttnUpBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r     r   z?SimpleCrossAttnUpBlock2D.forward.<locals>.create_custom_forwardr   r~   )r   rN   rO   r   r   r   r   r-  r   r   r   r   r&  )rz   r   r)  r   r   r   r   r   r   r   r   r   r.  r   r/  rM   rM   rR   r     sD   

	


z SimpleCrossAttnUpBlock2D.forward)Nr    r   r   r   r   r   Tr   r   r   TFFNr   )r   r   r   r   r   r   r   r   rs   r   r   r   r   r   r   r   rM   rM   r{   rR   rd     s    	
f	
rd   c                       s   e Zd Z						ddeded	ed
edededededee def fddZ		dde	j
dee	j
df dee	j
 dee de	j
f
ddZ  ZS )rj   r       r  r  r   Tr#   r$   r%   r^   r9   r"   r'   r(   r  r]   c                    s   t    g }d| }|}|d }t|D ]+}|dkr|n|}||	 }||	 }|t|||d kr3|n|||||||ddd
 qt|| _|
rRtt g| _	nd | _	d| _
|| _d S )Nr   r   r   r  F)
r#   r$   r%   r   r   r   r9   r   r   r  )rr   rs   r   r   r   r   r   r   r   r&  r   r^   )rz   r#   r$   r%   r^   r9   r"   r'   r(   r  r]   r   k_in_channelsk_out_channelsr   r   r   r{   rM   rR   rs   4  s:   

zKUpBlock2D.__init__Nr   r)  .r   r   r`   c                 O   s   t |dks|dd d urd}tdd| |d }|d ur'tj||gdd}| jD ]0}| jrU| jrUdd	 }	td
drItj	j
j
|	|||dd}q*tj	j

|	|||}q*|||}q*| jd urj| jD ]}
|
|}qc|S )Nr   r   r   r   r*  r   r+  c                    r   )Nc                     r   r~   rM   r   r   rM   rR   r   z  r   zIKUpBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r   y  r   z1KUpBlock2D.forward.<locals>.create_custom_forwardr   r   Fr   )r   r   r	   r   r-  r   r   r   r
   r   r   r&  )rz   r   r)  r   r   r   r   r   r   r   r/  rM   rM   rR   r   e  s*   	





zKUpBlock2D.forward)r    rN  r  r  r   Tr  )r   r   r   r   r   r   r   r   rs   r   r   r   r   r   rM   rM   r{   rR   rj   3  sR    	
5rj   c                       s   e Zd Z										d&d
ededededededededededededef fddZ						d'dej	de
ej	df deej	 deej	 deeeef  d ee d!eej	 d"eej	 d#ej	fd$d%Z  ZS )(rk   r    r   r  r  r   r      TFr#   r$   r%   r^   r9   r"   r'   r(   r  r7   r,   r]   r1   c                    s^  t    g }g }||  ko|kn  }||k}|rdnd}d| _|
| _|r)|nd| }|}|d }t|D ]U}|dkr?|n|}||	 }||	 }|rT||d krT|}nd }|t|||||||||ddd |t||d krs|n|||d kr~||
 n||
 |
||d|d|d		 q7t	|| _
t	|| _|rt	t g| _nd | _d| _|| _d S )
NTFr   r   r   r  )r#   r$   conv_2d_out_channelsr%   r   r   r   r9   r   r   r  r  )r,   r%   r  rK   r6   r1   )rr   rs   r   r7   r   r   r   r!  r   r   r   r   r   r&  r   r^   )rz   r#   r$   r%   r^   r9   r"   r'   r(   r  r7   r,   r]   r1   r   r   is_first_blockis_middle_blockrK   rO  rP  r   r   r   rR  r{   rM   rR   rs     sn   

zKCrossAttnUpBlock2D.__init__Nr   r)  .r   r   r   r   r   r   r`   c	              	   C   s   |d }|d urt j||gdd}t| j| jD ]C\}	}
| jrL| jrLddd}tddr0dd	ini }t jj	j	||	||fi |}|
||||||d
}q|	||}|
||||||d
}q| j
d urk| j
D ]}||}qd|S )Nr*  r   r+  c                    r   )Nc                     r   r   rM   r   r   rM   rR   r     r   zRKCrossAttnUpBlock2D.forward.<locals>.create_custom_forward.<locals>.custom_forwardrM   r   rM   r   rR   r     r   z:KCrossAttnUpBlock2D.forward.<locals>.create_custom_forwardr   r   r   Fr"  r~   )r   r-  r   r   r   r   r   r
   r   r   r&  )rz   r   r)  r   r   r   r   r   r   r   r   r   r   r/  rM   rM   rR   r     sF   
	
	
	

zKCrossAttnUpBlock2D.forward)	r    r   r  r  r   r   rQ  TFr   )r   r   r   r   r   r   r   rs   r   r   r   r   r   r   r   r   rM   rM   r{   rR   rk     s|    	
Y	
rk   c                       s   e Zd ZdZ								d#deded	ed
edee dededededee def fddZ	de
jdedede
jfddZde
jdedede
jfddZ					d$de
jdee
j dee
j dee
j deeeef  d ee
j de
jfd!d"Z  ZS )%r!  aN  
    A basic Transformer block.

    Parameters:
        dim (`int`): The number of channels in the input and output.
        num_attention_heads (`int`): The number of heads to use for multi-head attention.
        attention_head_dim (`int`): The number of channels in each head.
        dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
        cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
        attention_bias (`bool`, *optional*, defaults to `False`):
            Configure if the attention layers should contain a bias parameter.
        upcast_attention (`bool`, *optional*, defaults to `False`):
            Set to `True` to upcast the attention computation to `float32`.
        temb_channels (`int`, *optional*, defaults to 768):
            The number of channels in the token embedding.
        add_self_attention (`bool`, *optional*, defaults to `False`):
            Set to `True` to add self-attention to the block.
        cross_attention_norm (`str`, *optional*, defaults to `None`):
            The type of normalization to use for the cross attention. Can be `None`, `layer_norm`, or `group_norm`.
        group_size (`int`, *optional*, defaults to 32):
            The number of groups to separate the channels into for group normalization.
    r    NFrQ  r   r,  r*   r7   r9   r,   r  r1   r%   rK   r6   r   c              
      sz   t    |	| _|	r"t||td|| | _t|||||d d d| _t||td|| | _t||||||||
d| _	d S )Nr   )r   r   r   r9   rq   r,   r6   )r   r,   r   r   r9   rq   r1   r6   )
rr   rs   rK   r   maxnorm1r   attn1norm2attn2)rz   r,  r*   r7   r9   r,   r  r1   r%   rK   r6   r   r{   rM   rR   rs   <  s0   
zKAttentionBlock.__init__r   heightweightr`   c                 C   s$   | dddd|jd || dS )Nr   r   r   r   r*  permutereshapeshaperz   r   rZ  r[  rM   rM   rR   _to_3dg  s   $zKAttentionBlock._to_3dc                 C   s    | ddd|jd d||S )Nr   r   r   r*  r\  r`  rM   rM   rR   _to_4dj  s    zKAttentionBlock._to_4dr   r#  r   r   r   c                 C   s   |d ur|ni }| dd d urtd | jrE| ||}|jdd  \}}	| |||	}| j|fd |d|}
| |
||	}
|
| }| 	||}|jdd  \}}	| |||	}| j
|f||d u rf|n|d|}
| |
||	}
|
| }|S )Nr   r   r   r   )r   rN   rO   rK   rV  r_  ra  rW  rb  rX  rY  )rz   r   r   r#  r   r   r   norm_hidden_statesrZ  r[  attn_outputrM   rM   rR   r   m  s>   
zKAttentionBlock.forward)r    NFFrQ  FNr   r   )r   r   r   r   r   r   r   r   r   rs   r   r   ra  rb  r   r   r   r   rM   rM   r{   rR   r!  $  sn    	
+	
r!  )r   NNNNFFFFr   r   Fr   NNNr    )r   r   NNFFFFr   r   FNr   r    )Nr   NNNFFFFr   r   Fr   NNNr    )Ntypingr   r   r   r   r   numpyr  r   torch.nn.functionalr   
functionalr   r   r	   r
   r   utils.torch_utilsr   activationsr   attention_processorr   r   r   normalizationr   r   r   r   r   r   r   r   r   r    transformers.dual_transformer_2dr   transformers.transformer_2dr   
get_loggerr   rN   r   r   r   r   rS   rZ   Modulerl   rm   rX   rV   rW   rA   rC   r=   rG   rH   rF   rE   r>   rD   rI   rJ   re   rc   ra   rh   ri   rg   rf   rb   rd   rj   rk   r!  rM   rM   rM   rR   <module>   s  (

	

 Y	

U	

 b%  }z $\OgpZh "T | 1sIc nr (^ 