o
    Gi                     @  s,  d dl mZ d dlmZ d dlZd dlmZ ddlmZmZ ddl	m
Z
 dd	lmZ dd
lmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZmZmZmZ eeZ G dd deZG dd deZG dd deZG dd deZG dd deZ													d_d`d=d>Z!														dadbdEdFZ"G dGdH dHej#Z$G dIdJ dJej#Z%G dKdL dLej#Z&G dMdN dNej#Z'G dOdP dPej#Z(G dQdR dRej#Z)G dSdT dTej#Z*G dUdV dVej#Z+G dWdX dXej#Z,G dYdZ dZej#Z-G d[d\ d\ej#Z.G d]d^ d^ej#Z/dS )c    )annotations)AnyN)nn   )	deprecatelogging)apply_freeu   )	Attention)Downsample2DResnetBlock2DSpatioTemporalResBlockTemporalConvLayer
Upsample2D)Transformer2DModel)TransformerSpatioTemporalModelTransformerTemporalModel   )CrossAttnDownBlockMotionCrossAttnUpBlockMotionDownBlockMotionUNetMidBlockCrossAttnMotionUpBlockMotionc                         e Zd Z fddZ  ZS )r   c                   &   d}t dd| t j|i | d S )NzImporting `DownBlockMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import DownBlockMotion` instead.r   1.0.0r   super__init__selfargskwargsdeprecation_message	__class__ Y/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/models/unets/unet_3d_blocks.pyr   2      zDownBlockMotion.__init____name__
__module____qualname__r   __classcell__r&   r&   r$   r'   r   1       r   c                      r   )r   c                   r   )NzImporting `CrossAttnDownBlockMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import CrossAttnDownBlockMotion` instead.r   r   r   r   r$   r&   r'   r   9   r(   z!CrossAttnDownBlockMotion.__init__r)   r&   r&   r$   r'   r   8   r.   r   c                      r   )r   c                   r   )NzImporting `UpBlockMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import UpBlockMotion` instead.r   r   r   r   r$   r&   r'   r   @   r(   zUpBlockMotion.__init__r)   r&   r&   r$   r'   r   ?   r.   r   c                      r   )r   c                   r   )NzImporting `CrossAttnUpBlockMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import CrossAttnUpBlockMotion` instead.r   r   r   r   r$   r&   r'   r   G   r(   zCrossAttnUpBlockMotion.__init__r)   r&   r&   r$   r'   r   F   r.   r   c                      r   )r   c                   r   )NzImporting `UNetMidBlockCrossAttnMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import UNetMidBlockCrossAttnMotion` instead.r   r   r   r   r$   r&   r'   r   N   r(   z$UNetMidBlockCrossAttnMotion.__init__r)   r&   r&   r$   r'   r   M   r.   r   FTdefault               down_block_typestr
num_layersintin_channelsout_channelstemb_channelsadd_downsamplebool
resnet_epsfloatresnet_act_fnnum_attention_headsresnet_groups
int | Nonecross_attention_dimdownsample_paddingdual_cross_attentionuse_linear_projectiononly_cross_attentionupcast_attentionresnet_time_scale_shifttemporal_num_attention_headstemporal_max_seq_lengthtransformer_layers_per_blockint | tuple[int]%temporal_transformer_layers_per_blockdropoutreturng'DownBlock3D' | 'CrossAttnDownBlock3D' | 'DownBlockSpatioTemporal' | 'CrossAttnDownBlockSpatioTemporal'c                 C  s  | dkrt ||||||||	|||dS | dkrW|
d u rtdtdi d|d|d|d|d	|d
|d|d|	d|d|
d|d|d|d|d|d|d|S | dkrdt|||||dS | dkr||
d u rptdt|||||||
|dS t|  d)NDownBlock3D)r5   r7   r8   r9   r:   r<   r>   r@   rC   rH   rN   CrossAttnDownBlock3Dz>cross_attention_dim must be specified for CrossAttnDownBlock3Dr5   r7   r8   r9   r:   r<   r>   r@   rC   rB   r?   rD   rE   rF   rG   rH   rN   DownBlockSpatioTemporal)r5   r7   r8   r9   r:    CrossAttnDownBlockSpatioTemporalzJcross_attention_dim must be specified for CrossAttnDownBlockSpatioTemporal)r7   r8   r9   r5   rK   r:   rB   r?    does not exist.r&   )rQ   
ValueErrorrR   rS   rT   )r3   r5   r7   r8   r9   r:   r<   r>   r?   r@   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rM   rN   r&   r&   r'   get_down_blockT   s   	
rW   up_block_typeprev_output_channeladd_upsampleresolution_idxtemporal_cross_attention_dim_'UpBlock3D' | 'CrossAttnUpBlock3D' | 'UpBlockSpatioTemporal' | 'CrossAttnUpBlockSpatioTemporal'c                 C  s  | dkrt |||||||||||
|dS | dkr[|d u r tdtdi d|d|d|d|d	|d
|d|d|d|d|d|	d|d|d|d|d|d|
d|S | dkrjt||||||
|dS | dkr|d u rvtdt|||||||||	|
d
S t|  d)N	UpBlock3D)r5   r7   r8   rY   r9   rZ   r<   r>   r@   rH   r[   rN   CrossAttnUpBlock3Dz<cross_attention_dim must be specified for CrossAttnUpBlock3Dr5   r7   r8   rY   r9   rZ   r<   r>   r@   rB   r?   rD   rE   rF   rG   rH   r[   rN   UpBlockSpatioTemporal)r5   r7   r8   rY   r9   r[   rZ   CrossAttnUpBlockSpatioTemporalzHcross_attention_dim must be specified for CrossAttnUpBlockSpatioTemporal)
r7   r8   rY   r9   r5   rK   rZ   rB   r?   r[   rU   r&   )r^   rV   r_   r`   ra   )rX   r5   r7   r8   rY   r9   rZ   r<   r>   r?   r[   r@   rB   rD   rE   rF   rG   rH   rI   r\   rJ   rK   rM   rN   r&   r&   r'   get_up_block   s   	
	rb   c                      sP   e Zd Z												
		
d-d. fddZ	 	 	 		 d/d0d+d,Z  ZS )1UNetMidBlock3DCrossAttnr2   r   ư>r/   swishr1   T      ?   Fr7   r6   r9   rN   r=   r5   r<   rH   r4   r>   r@   resnet_pre_normr;   r?   output_scale_factorrB   rD   rE   rG   c                   s"  t    d| _|
| _|d ur|nt|d d}t||||||||||	d
g}t||d|dg}g }g }t|D ]>}|t	||
 |
|d||||d |t
||
 |
|d||d	 |t||||||||||	d
 |t||d|d q8t|| _t|| _t|| _t|| _d S )
NT   r1   
r7   r8   r9   epsgroupsrN   time_embedding_normnon_linearityri   pre_norm皙?rN   norm_num_groupsr   )r7   r5   rB   rs   rE   rG   r7   r5   rB   rs   )r   r   has_cross_attentionr?   minr   r   rangeappendr   r   r   
ModuleListresnets
temp_convs
attentionstemp_attentions)r    r7   r9   rN   r5   r<   rH   r>   r@   rh   r?   ri   rB   rD   rE   rG   rz   r{   r|   r}   _r$   r&   r'   r     s   

	z UNetMidBlock3DCrossAttn.__init__Nhidden_statestorch.Tensortembtorch.Tensor | Noneencoder_hidden_statesattention_mask
num_framescross_attention_kwargsdict[str, Any] | NonerO   c                 C  s   | j d ||}| jd ||d}t| j| j| j dd  | jdd  D ]%\}}}	}
||||ddd }||||ddd }|	||}|
||d}q$|S )Nr   r   r   Fr   r   return_dictr   r   r   )rz   r{   zipr|   r}   )r    r   r   r   r   r   r   attn	temp_attnresnet	temp_convr&   r&   r'   forwardp  s0   	 
zUNetMidBlock3DCrossAttn.forward)r2   r   rd   r/   re   r1   Tr   rf   rg   FTF)r7   r6   r9   r6   rN   r=   r5   r6   r<   r=   rH   r4   r>   r4   r@   r6   rh   r;   r?   r6   ri   r=   rB   r6   rD   r;   rE   r;   rG   r;   NNNr   N)r   r   r   r   r   r   r   r   r   r6   r   r   rO   r   r*   r+   r,   r   r   r-   r&   r&   r$   r'   rc   
  s*    hrc   c                      sV   e Zd Z														
	
	
	
d2d3 fd"d#Z	$	$	$		$d4d5d0d1Z  ZS )6rR   r2   r   rd   r/   re   r1   Trg   rf   Fr7   r6   r8   r9   rN   r=   r5   r<   rH   r4   r>   r@   rh   r;   r?   rB   ri   rC   r:   rD   rE   rF   rG   c                   s   t    g }g }g }g }d| _|| _t|D ]G}|dkr|n|}|t|||||	|||||
d
 |t||d|	d |t|| ||d||	|||d	 |t	|| ||d||	d qt
|| _t
|| _t
|| _t
|| _|rt
t|d||d	d
g| _nd | _d| _d S )NTr   rk   rq   rr   r   r7   r5   rB   rs   rE   rF   rG   rt   opuse_convr8   paddingnameF)r   r   ru   r?   rw   rx   r   r   r   r   r   ry   rz   r{   r|   r}   r   downsamplersgradient_checkpointing)r    r7   r8   r9   rN   r5   r<   rH   r>   r@   rh   r?   rB   ri   rC   r:   rD   rE   rF   rG   rz   r|   r}   r{   ir$   r&   r'   r     s   


zCrossAttnDownBlock3D.__init__Nr   r   r   r   r   r   r   r   dict[str, Any]rO   'torch.Tensor | tuple[torch.Tensor, ...]c                 C  s   d}t | j| j| j| jD ]*\}}	}
}|||}|	||d}|
|||ddd }||||ddd }||f7 }q| jd urL| jD ]}||}q@||f7 }||fS )Nr&   r   Fr   r   r   )r   rz   r{   r|   r}   r   )r    r   r   r   r   r   r   output_statesr   r   r   r   downsamplerr&   r&   r'   r     s8   





zCrossAttnDownBlock3D.forward)r2   r   rd   r/   re   r1   Tr   rg   rf   r   TFFFF)&r7   r6   r8   r6   r9   r6   rN   r=   r5   r6   r<   r=   rH   r4   r>   r4   r@   r6   rh   r;   r?   r6   rB   r6   ri   r=   rC   r6   r:   r;   rD   r;   rE   r;   rF   r;   rG   r;   r   )r   r   r   r   r   r   r   r   r   r6   r   r   rO   r   r   r&   r&   r$   r'   rR     s0    grR   c                      sD   e Zd Z										d&d' fddZ		d(d)d$d%Z  ZS )*rQ   r2   r   rd   r/   re   r1   Trf   r7   r6   r8   r9   rN   r=   r5   r<   rH   r4   r>   r@   rh   r;   ri   r:   rC   c                   s   t    g }g }t|D ]&}|dkr|n|}|t|||||	|||||
d
 |t||d|	d qt|| _t|| _	|rQtt
|d||ddg| _nd | _d| _d S )	Nr   rk   rq   rr   Tr   r   F)r   r   rw   rx   r   r   r   ry   rz   r{   r   r   r   )r    r7   r8   r9   rN   r5   r<   rH   r>   r@   rh   ri   r:   rC   rz   r{   r   r$   r&   r'   r     sT   
	
zDownBlock3D.__init__Nr   r   r   r   r   rO   r   c                 C  sl   d}t | j| jD ]\}}|||}|||d}||f7 }q	| jd ur2| jD ]}||}q&||f7 }||fS )Nr&   r   )r   rz   r{   r   )r    r   r   r   r   r   r   r   r&   r&   r'   r   `  s   




zDownBlock3D.forward)
r2   r   rd   r/   re   r1   Trf   Tr   )r7   r6   r8   r6   r9   r6   rN   r=   r5   r6   r<   r=   rH   r4   r>   r4   r@   r6   rh   r;   ri   r=   r:   r;   rC   r6   )Nr   )r   r   r   r   r   r6   rO   r   r   r&   r&   r$   r'   rQ     s    DrQ   c                      sX   e Zd Z													
	
	
	
	d6d7 fd%d&Z						d8d9d4d5Z  ZS ):r_   r2   r   rd   r/   re   r1   Trg   rf   FNr7   r6   r8   rY   r9   rN   r=   r5   r<   rH   r4   r>   r@   rh   r;   r?   rB   ri   rZ   rD   rE   rF   rG   r[   rA   c                   s:  t    g }g }g }g }d| _|| _t|D ]S}||d kr!|n|}|dkr)|n|}|t|| ||||
|||	||d
 |t||d|
d |t|| ||d||
|||d	 |t	|| ||d||
d qt
|| _t
|| _t
|| _t
|| _|rt
t|d|d	g| _nd | _d
| _|| _d S )NTr   r   rk   rq   rr   r   rt   r   r8   F)r   r   ru   r?   rw   rx   r   r   r   r   r   ry   rz   r{   r|   r}   r   
upsamplersr   r[   )r    r7   r8   rY   r9   rN   r5   r<   rH   r>   r@   rh   r?   rB   ri   rZ   rD   rE   rF   rG   r[   rz   r{   r|   r}   r   res_skip_channelsresnet_in_channelsr$   r&   r'   r   x  s   


zCrossAttnUpBlock3D.__init__r   r   res_hidden_states_tupletuple[torch.Tensor, ...]r   r   r   upsample_sizer   r   r   r   rO   c	              
   C  s  t | dd ot | dd ot | dd ot | dd }	t| j| j| j| jD ]L\}
}}}|d }|d d }|	rGt| j||| j| j	| j
| jd\}}tj||gdd}|
||}|||d	}||||d
dd }||||d
dd }q#| jd ur| jD ]}|||}qx|S )Ns1s2b1b2r   r   r   r   r   dimr   Fr   r   r   )getattrr   rz   r{   r|   r}   r   r[   r   r   r   r   torchcatr   )r    r   r   r   r   r   r   r   r   is_freeu_enabledr   r   r   r   res_hidden_states	upsamplerr&   r&   r'   r     sZ   







zCrossAttnUpBlock3D.forward)r2   r   rd   r/   re   r1   Tr   rg   rf   TFFFFN)(r7   r6   r8   r6   rY   r6   r9   r6   rN   r=   r5   r6   r<   r=   rH   r4   r>   r4   r@   r6   rh   r;   r?   r6   rB   r6   ri   r=   rZ   r;   rD   r;   rE   r;   rF   r;   rG   r;   r[   rA   )NNNNr   N)r   r   r   r   r   r   r   r   r   rA   r   r   r   r6   r   r   rO   r   r   r&   r&   r$   r'   r_   w  s2    br_   c                      sF   e Zd Z											d*d+ fddZ					d,d-d(d)Z  ZS ).r^   r2   r   rd   r/   re   r1   Trf   Nr7   r6   rY   r8   r9   rN   r=   r5   r<   rH   r4   r>   r@   rh   r;   ri   rZ   r[   rA   c                   s   t    g }g }t|D ]2}||d kr|n|}|dkr|n|}|t|| ||||
|||	||d
 |t||d|
d qt|| _t|| _	|r[tt
|d|dg| _nd | _d| _|| _d S )	Nr   r   rk   rq   rr   Tr   F)r   r   rw   rx   r   r   r   ry   rz   r{   r   r   r   r[   )r    r7   rY   r8   r9   rN   r5   r<   rH   r>   r@   rh   ri   rZ   r[   rz   r{   r   r   r   r$   r&   r'   r     sF   
	
zUpBlock3D.__init__r   r   r   r   r   r   r   r   rO   c              
   C  s   t | dd ot | dd ot | dd ot | dd }t| j| jD ]6\}}|d }	|d d }|rAt| j||	| j| j| j| j	d\}}	t
j||	gdd}|||}|||d	}q| jd urf| jD ]}
|
||}q^|S )
Nr   r   r   r   r   r   r   r   r   )r   r   rz   r{   r   r[   r   r   r   r   r   r   r   )r    r   r   r   r   r   r   r   r   r   r   r&   r&   r'   r   P  s6   	







zUpBlock3D.forward)
r2   r   rd   r/   re   r1   Trf   TN)r7   r6   rY   r6   r8   r6   r9   r6   rN   r=   r5   r6   r<   r=   rH   r4   r>   r4   r@   r6   rh   r;   ri   r=   rZ   r;   r[   rA   )NNr   )r   r   r   r   r   r   r   rA   r   r6   rO   r   r   r&   r&   r$   r'   r^     s     ?r^   c                      s0   e Zd Z			dd fddZdddZ  ZS )MidBlockTemporalDecoder   r   Fr7   r6   r8   attention_head_dimr5   rG   r;   c           
        s   t    g }g }t|D ]}|dkr|n|}	|t|	|d dddddd q|t||| |d|dddd	 t|| _t|| _	d S )
Nr   rd   h㈵>r2   learnedTr7   r8   r9   rl   temporal_epsmerge_factormerge_strategyswitch_spatial_to_temporal_mixr1   )	query_dimheadsdim_headrl   rG   rs   biasresidual_connection)
r   r   rw   rx   r   r
   r   ry   r|   rz   )
r    r7   r8   r   r5   rG   rz   r|   r   input_channelsr$   r&   r'   r   |  s>   
z MidBlockTemporalDecoder.__init__r   r   image_only_indicatorc                 C  sJ   | j d ||d}t| j dd  | jD ]\}}||}|||d}q|S )Nr   r   r   )rz   r   r|   )r    r   r   r   r   r&   r&   r'   r     s   zMidBlockTemporalDecoder.forward)r   r   F)
r7   r6   r8   r6   r   r6   r5   r6   rG   r;   )r   r   r   r   r   r&   r&   r$   r'   r   {  s    +r   c                      s.   e Zd Z		dd fd	d
ZdddZ  ZS )UpBlockTemporalDecoderr   Tr7   r6   r8   r5   rZ   r;   c                   s   t    g }t|D ]}|dkr|n|}|t||d dddddd qt|| _|r;tt|d|dg| _	d S d | _	d S )	Nr   rd   r   r2   r   Tr   r   )
r   r   rw   rx   r   r   ry   rz   r   r   )r    r7   r8   r5   rZ   rz   r   r   r$   r&   r'   r     s(   

zUpBlockTemporalDecoder.__init__r   r   r   rO   c                 C  s:   | j D ]}|||d}q| jd ur| jD ]}||}q|S )Nr   )rz   r   )r    r   r   r   r   r&   r&   r'   r     s   



zUpBlockTemporalDecoder.forwardr   T)r7   r6   r8   r6   r5   r6   rZ   r;   )r   r   r   r   rO   r   r   r&   r&   r$   r'   r     s
    r   c                      s:   e Zd Z				dd fddZ			ddddZ  ZS )UNetMidBlockSpatioTemporalr   rg   r7   r6   r9   r5   rK   rL   r?   rB   c           
   
     s   t    d| _|| _t|tr|g| }t|||ddg}g }t|D ]}	|t	||| |||	 |d |t|||dd q$t
|| _t
|| _d| _d S )NTr   r7   r8   r9   rl   r7   r5   rB   F)r   r   ru   r?   
isinstancer6   r   rw   rx   r   r   ry   r|   rz   r   )
r    r7   r9   r5   rK   r?   rB   rz   r|   r   r$   r&   r'   r     sD   
	


	
z#UNetMidBlockSpatioTemporal.__init__Nr   r   r   r   r   r   rO   c                 C  s   | j d |||d}t| j| j dd  D ]/\}}t r3| jr3||||ddd }| ||||}q||||ddd }||||d}q|S )Nr   r   r   Fr   r   r   )rz   r   r|   r   is_grad_enabledr   _gradient_checkpointing_func)r    r   r   r   r   r   r   r&   r&   r'   r   #  s4   z"UNetMidBlockSpatioTemporal.forward)r   r   r   rg   )r7   r6   r9   r6   r5   r6   rK   rL   r?   r6   rB   r6   NNN)
r   r   r   r   r   r   r   r   rO   r   r   r&   r&   r$   r'   r     s    9r   c                      s4   e Zd Z		dd fd
dZ		ddddZ  ZS )rS   r   Tr7   r6   r8   r9   r5   r:   r;   c              	     s~   t    g }t|D ]}|dkr|n|}|t|||dd qt|| _|r7tt|d|ddg| _	nd | _	d| _
d S )Nr   r   r   Tr   )r   r8   r   F)r   r   rw   rx   r   r   ry   rz   r   r   r   )r    r7   r8   r9   r5   r:   rz   r   r$   r&   r'   r   F  s2   
	
z DownBlockSpatioTemporal.__init__Nr   r   r   r   r   rO   -tuple[torch.Tensor, tuple[torch.Tensor, ...]]c                 C  sx   d}| j D ]}t r| jr| ||||}n||||d}||f }q| jd ur8| jD ]}||}q,||f }||fS )Nr&   r   )rz   r   r   r   r   r   )r    r   r   r   r   r   r   r&   r&   r'   r   n  s   




zDownBlockSpatioTemporal.forwardr   )
r7   r6   r8   r6   r9   r6   r5   r6   r:   r;   )NN)r   r   r   r   r   r   rO   r   r   r&   r&   r$   r'   rS   E  s    +rS   c                      s<   e Zd Z					dd fddZ			ddddZ  ZS ) rT   r   rg   Tr7   r6   r8   r9   r5   rK   rL   r?   rB   r:   r;   c	              
     s   t    g }	g }
d| _|| _t|tr|g| }t|D ]%}|dkr%|n|}|	t|||dd |
t	||| ||| |d qt
|
| _t
|	| _|r`t
t|d|dddg| _nd | _d	| _d S )
NTr   rd   r   r   r   r   r   F)r   r   ru   r?   r   r6   rw   rx   r   r   r   ry   r|   rz   r   r   r   )r    r7   r8   r9   r5   rK   r?   rB   r:   rz   r|   r   r$   r&   r'   r     sR   




z)CrossAttnDownBlockSpatioTemporal.__init__Nr   r   r   r   r   r   rO   r   c           
      C  s   d}t t| j| j}|D ]4\}}t r+| jr+| ||||}||||ddd }n||||d}||||ddd }||f }q| jd urV| jD ]}	|	|}qJ||f }||fS )Nr&   Fr   r   r   )	listr   rz   r|   r   r   r   r   r   )
r    r   r   r   r   r   blocksr   r   r   r&   r&   r'   r     s8   



z(CrossAttnDownBlockSpatioTemporal.forward)r   r   r   rg   T)r7   r6   r8   r6   r9   r6   r5   r6   rK   rL   r?   r6   rB   r6   r:   r;   r   )
r   r   r   r   r   r   r   r   rO   r   r   r&   r&   r$   r'   rT     s    ?rT   c                      s:   e Zd Z				dd fddZ			d d!ddZ  ZS )"r`   Nr   rd   Tr7   r6   rY   r8   r9   r[   rA   r5   r<   r=   rZ   r;   c	              	     s   t    g }	t|D ]!}
|
|d kr|n|}|
dkr|n|}|	t|| |||d qt|	| _|rBtt|d|dg| _	nd | _	d| _
|| _d S )Nr   r   r   Tr   F)r   r   rw   rx   r   r   ry   rz   r   r   r   r[   )r    r7   rY   r8   r9   r[   r5   r<   rZ   rz   r   r   r   r$   r&   r'   r     s&   
	
zUpBlockSpatioTemporal.__init__r   r   r   r   r   r   r   r   rO   c           	      C  s   | j D ],}|d }|d d }tj||gdd}t r(| jr(| ||||}q||||d}q| jd ur@| jD ]}|||}q8|S )Nr   r   r   r   )rz   r   r   r   r   r   r   )	r    r   r   r   r   r   r   r   r   r&   r&   r'   r     s   


zUpBlockSpatioTemporal.forward)Nr   rd   T)r7   r6   rY   r6   r8   r6   r9   r6   r[   rA   r5   r6   r<   r=   rZ   r;   r   )r   r   r   r   r   r   r   r   r   rA   rO   r   r   r&   r&   r$   r'   r`     s    )r`   c                      sB   e Zd Z							d$d% fddZ				d&d'd"d#Z  ZS )(ra   Nr   rd   rg   Tr7   r6   r8   rY   r9   r[   rA   r5   rK   rL   r<   r=   r?   rB   rZ   r;   c              
     s   t    g }g }d| _|	| _t|tr|g| }t|D ]1}||d kr'|n|}|dkr/|n|}|t|| |||d |t	|	||	 ||| |
d qt
|| _t
|| _|rjt
t|d|dg| _nd | _d| _|| _d S )NTr   r   r   r   r   F)r   r   ru   r?   r   r6   rw   rx   r   r   r   ry   r|   rz   r   r   r   r[   )r    r7   r8   rY   r9   r[   r5   rK   r<   r?   rB   rZ   rz   r|   r   r   r   r$   r&   r'   r   -  sD   




z'CrossAttnUpBlockSpatioTemporal.__init__r   r   r   r   r   r   r   r   r   rO   c                 C  s   t | j| jD ]B\}}|d }	|d d }tj||	gdd}t r8| jr8| ||||}||||ddd }q||||d}||||ddd }q| jd urZ| jD ]}
|
||}qR|S )Nr   r   r   Fr   r   r   )	r   rz   r|   r   r   r   r   r   r   )r    r   r   r   r   r   r   r   r   r   r   r&   r&   r'   r   f  s6   	

z&CrossAttnUpBlockSpatioTemporal.forward)Nr   r   rd   r   rg   T)r7   r6   r8   r6   rY   r6   r9   r6   r[   rA   r5   r6   rK   rL   r<   r=   r?   r6   rB   r6   rZ   r;   )NNNN)r   r   r   r   r   r   r   r   r   r   r   rA   rO   r   r   r&   r&   r$   r'   ra   ,  s    =ra   )NNNFTFFr/   r0   r1   r   r   r2   ).r3   r4   r5   r6   r7   r6   r8   r6   r9   r6   r:   r;   r<   r=   r>   r4   r?   r6   r@   rA   rB   rA   rC   rA   rD   r;   rE   r;   rF   r;   rG   r;   rH   r4   rI   r6   rJ   r6   rK   rL   rM   rL   rN   r=   rO   rP   )NNNFTFFr/   r0   Nr1   r   r   r2   )2rX   r4   r5   r6   r7   r6   r8   r6   rY   r6   r9   r6   rZ   r;   r<   r=   r>   r4   r?   r6   r[   rA   r@   rA   rB   rA   rD   r;   rE   r;   rF   r;   rG   r;   rH   r4   rI   r6   r\   rA   rJ   r6   rK   rL   rM   rL   rN   r=   rO   r]   )0
__future__r   typingr   r   r   utilsr   r   utils.torch_utilsr   	attentionr
   r   r   r   r   r   r   transformers.transformer_2dr   !transformers.transformer_temporalr   r   unet_motion_modelr   r   r   r   r   
get_loggerr*   loggerrW   rb   Modulerc   rR   rQ   r_   r^   r   r   r   rS   rT   r`   ra   r&   r&   r&   r'   <module>   sz   
	b_  Y g?2YAeA