o
    pi                  4   @   sH  d dl mZmZmZmZmZ d dlZd dlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ dd	lmZmZmZmZmZ dd
lmZ ddlmZmZ ddlmZmZmZmZm Z  e!e"Z#G dd deZG dd deZG dd de Z G dd deZG dd deZ													dXde$de%d e%d!e%d"e%d#e&d$e'd%e$d&e%d'ee% d(ee% d)ee% d*e&d+e&d,e&d-e&d.e$d/e%d0e%d1ee%ee% f d2ee%ee% f d3e'd4ed5 f.d6d7Z(														dYd8e$de%d e%d!e%d9e%d"e%d:e&d$e'd%e$d&e%d;ee% d'ee% d(ee% d*e&d+e&d,e&d-e&d.e$d/e%d<ee% d0e%d1ee%ee% f d2ee%ee% f d3e'd4ed= f2d>d?Z)G d@dA dAej*Z+G dBdC dCej*Z,G dDdE dEej*Z-G dFdG dGej*Z.G dHdI dIej*Z/G dJdK dKej*Z0G dLdM dMej*Z1G dNdO dOej*Z2G dPdQ dQej*Z3G dRdS dSej*Z4G dTdU dUej*Z5G dVdW dWej*Z6dS )Z    )AnyDictOptionalTupleUnionN)nn   )	deprecateis_torch_versionlogging)apply_freeu   )	Attention)Downsample2DResnetBlock2DSpatioTemporalResBlockTemporalConvLayer
Upsample2D)Transformer2DModel)TransformerSpatioTemporalModelTransformerTemporalModel   )CrossAttnDownBlockMotionCrossAttnUpBlockMotionDownBlockMotionUNetMidBlockCrossAttnMotionUpBlockMotionc                          e Zd Z fddZ  ZS )r   c                    &   d}t dd| t j|i | d S )NzImporting `DownBlockMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import DownBlockMotion` instead.r   1.0.0r	   super__init__selfargskwargsdeprecation_message	__class__ c/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/models/unets/unet_3d_blocks.pyr"   0      zDownBlockMotion.__init____name__
__module____qualname__r"   __classcell__r*   r*   r(   r+   r   /       r   c                       r   )r   c                    r   )NzImporting `CrossAttnDownBlockMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import CrossAttnDownBlockMotion` instead.r   r   r    r#   r(   r*   r+   r"   7   r,   z!CrossAttnDownBlockMotion.__init__r-   r*   r*   r(   r+   r   6   r2   r   c                       r   )r   c                    r   )NzImporting `UpBlockMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import UpBlockMotion` instead.r   r   r    r#   r(   r*   r+   r"   >   r,   zUpBlockMotion.__init__r-   r*   r*   r(   r+   r   =   r2   r   c                       r   )r   c                    r   )NzImporting `CrossAttnUpBlockMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import CrossAttnUpBlockMotion` instead.r   r   r    r#   r(   r*   r+   r"   E   r,   zCrossAttnUpBlockMotion.__init__r-   r*   r*   r(   r+   r   D   r2   r   c                       r   )r   c                    r   )NzImporting `UNetMidBlockCrossAttnMotion` from `diffusers.models.unets.unet_3d_blocks` is deprecated and this will be removed in a future version. Please use `from diffusers.models.unets.unet_motion_model import UNetMidBlockCrossAttnMotion` instead.r   r   r    r#   r(   r*   r+   r"   L   r,   z$UNetMidBlockCrossAttnMotion.__init__r-   r*   r*   r(   r+   r   K   r2   r   FTdefault               down_block_type
num_layersin_channelsout_channelstemb_channelsadd_downsample
resnet_epsresnet_act_fnnum_attention_headsresnet_groupscross_attention_dimdownsample_paddingdual_cross_attentionuse_linear_projectiononly_cross_attentionupcast_attentionresnet_time_scale_shifttemporal_num_attention_headstemporal_max_seq_lengthtransformer_layers_per_block%temporal_transformer_layers_per_blockdropoutreturn)DownBlock3DCrossAttnDownBlock3DDownBlockSpatioTemporal CrossAttnDownBlockSpatioTemporalc                 C   s  | dkrt ||||||||	|||dS | dkrW|
d u rtdtdi d|d|d|d|d	|d
|d|d|	d|d|
d|d|d|d|d|d|d|S | dkrdt|||||dS | dkr||
d u rptdt|||||||
|dS t|  d)NrN   )r8   r9   r:   r;   r<   r=   r>   r@   rB   rG   rL   rO   z>cross_attention_dim must be specified for CrossAttnDownBlock3Dr8   r9   r:   r;   r<   r=   r>   r@   rB   rA   r?   rC   rD   rE   rF   rG   rL   rP   )r8   r9   r:   r;   r<   rQ   zJcross_attention_dim must be specified for CrossAttnDownBlockSpatioTemporal)r9   r:   r;   r8   rJ   r<   rA   r?    does not exist.r*   )rN   
ValueErrorrO   rP   rQ   )r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   r*   r*   r+   get_down_blockR   s   	
rT   up_block_typeprev_output_channeladd_upsampleresolution_idxtemporal_cross_attention_dim)	UpBlock3DCrossAttnUpBlock3DUpBlockSpatioTemporalCrossAttnUpBlockSpatioTemporalc                 C   s  | dkrt |||||||||||
|dS | dkr[|d u r tdtdi d|d|d|d|d	|d
|d|d|d|d|d|	d|d|d|d|d|d|
d|S | dkrjt||||||
|dS | dkr|d u rvtdt|||||||||	|
d
S t|  d)NrZ   )r8   r9   r:   rV   r;   rW   r=   r>   r@   rG   rX   rL   r[   z<cross_attention_dim must be specified for CrossAttnUpBlock3Dr8   r9   r:   rV   r;   rW   r=   r>   r@   rA   r?   rC   rD   rE   rF   rG   rX   rL   r\   )r8   r9   r:   rV   r;   rX   rW   r]   zHcross_attention_dim must be specified for CrossAttnUpBlockSpatioTemporal)
r9   r:   rV   r;   r8   rJ   rW   rA   r?   rX   rR   r*   )rZ   rS   r[   r\   r]   )rU   r8   r9   r:   rV   r;   rW   r=   r>   r?   rX   r@   rA   rC   rD   rE   rF   rG   rH   rY   rI   rJ   rK   rL   r*   r*   r+   get_up_block   s   	
	r^   c                       s   e Zd Z												
		
d&dededededededededededededededef fddZ					d'dej	de
ej	 de
ej	 d e
ej	 d!ed"e
eeef  d#ej	fd$d%Z  ZS )(UNetMidBlock3DCrossAttnr6   r   ư>r3   swishr5   T      ?   Fr9   r;   rL   r8   r=   rG   r>   r@   resnet_pre_normr?   output_scale_factorrA   rC   rD   rF   c                    s"  t    d| _|
| _|d ur|nt|d d}t||||||||||	d
g}t||d|dg}g }g }t|D ]>}|t	||
 |
|d||||d |t
||
 |
|d||d	 |t||||||||||	d
 |t||d|d q8t|| _t|| _t|| _t|| _d S )
NT   r5   
r9   r:   r;   epsgroupsrL   time_embedding_normnon_linearityre   pre_norm皙?rL   norm_num_groupsr   )r9   r8   rA   ro   rD   rF   r9   r8   rA   ro   )r!   r"   has_cross_attentionr?   minr   r   rangeappendr   r   r   
ModuleListresnets
temp_convs
attentionstemp_attentions)r$   r9   r;   rL   r8   r=   rG   r>   r@   rd   r?   re   rA   rC   rD   rF   rv   rw   rx   ry   _r(   r*   r+   r"     s   

	z UNetMidBlock3DCrossAttn.__init__Nhidden_statestembencoder_hidden_statesattention_mask
num_framescross_attention_kwargsrM   c                 C   s   | j d ||}| jd ||d}t| j| j| j dd  | jdd  D ]%\}}}	}
||||ddd }||||ddd }|	||}|
||d}q$|S )Nr   r   r   Fr}   r   return_dictr   r   r   )rv   rw   ziprx   ry   )r$   r{   r|   r}   r~   r   r   attn	temp_attnresnet	temp_convr*   r*   r+   forwardx  s0   	 
zUNetMidBlock3DCrossAttn.forward)r6   r   r`   r3   ra   r5   Tr   rb   rc   FTFNNNr   N)r.   r/   r0   intfloatstrboolr"   torchTensorr   r   r   r   r1   r*   r*   r(   r+   r_     s    	
hr_   c                '       s   e Zd Z														
	
	
	
d+dededededededededededededededededededef& fddZ	 	 	 		 d,d!ej	d"e
ej	 d#e
ej	 d$e
ej	 d%ed&eeef d'eej	eej	d(f f fd)d*Z  ZS )-rO   r6   r   r`   r3   ra   r5   Trc   rb   Fr9   r:   r;   rL   r8   r=   rG   r>   r@   rd   r?   rA   re   rB   r<   rC   rD   rE   rF   c                    s   t    g }g }g }g }d| _|| _t|D ]G}|dkr|n|}|t|||||	|||||
d
 |t||d|	d |t|| ||d||	|||d	 |t	|| ||d||	d qt
|| _t
|| _t
|| _t
|| _|rt
t|d||d	d
g| _nd | _d| _d S )NTr   rg   rm   rn   r   r9   r8   rA   ro   rD   rE   rF   rp   opuse_convr:   paddingnameF)r!   r"   rq   r?   rs   rt   r   r   r   r   r   ru   rv   rw   rx   ry   r   downsamplersgradient_checkpointing)r$   r9   r:   r;   rL   r8   r=   rG   r>   r@   rd   r?   rA   re   rB   r<   rC   rD   rE   rF   rv   rx   ry   rw   ir(   r*   r+   r"     s   


zCrossAttnDownBlock3D.__init__Nr{   r|   r}   r~   r   r   rM   .c                 C   s   d}t | j| j| j| jD ]*\}}	}
}|||}|	||d}|
|||ddd }||||ddd }||f7 }q| jd urL| jD ]}||}q@||f7 }||fS )Nr*   r   Fr   r   r   )r   rv   rw   rx   ry   r   )r$   r{   r|   r}   r~   r   r   output_statesr   r   r   r   downsamplerr*   r*   r+   r     s8   





zCrossAttnDownBlock3D.forward)r6   r   r`   r3   ra   r5   Tr   rc   rb   r   TFFFFr   )r.   r/   r0   r   r   r   r   r"   r   r   r   r   r   r   r   r   r1   r*   r*   r(   r+   rO     s    	
g
rO   c                       s   e Zd Z										d d	ed
edededededededededededef fddZ		d!dej	de
ej	 dedeej	eej	df f fddZ  ZS )"rN   r6   r   r`   r3   ra   r5   Trb   r9   r:   r;   rL   r8   r=   rG   r>   r@   rd   re   r<   rB   c                    s   t    g }g }t|D ]&}|dkr|n|}|t|||||	|||||
d
 |t||d|	d qt|| _t|| _	|rQtt
|d||ddg| _nd | _d| _d S )	Nr   rg   rm   rn   Tr   r   F)r!   r"   rs   rt   r   r   r   ru   rv   rw   r   r   r   )r$   r9   r:   r;   rL   r8   r=   rG   r>   r@   rd   re   r<   rB   rv   rw   r   r(   r*   r+   r"   '  sT   
	
zDownBlock3D.__init__Nr{   r|   r   rM   .c                 C   sl   d}t | j| jD ]\}}|||}|||d}||f7 }q	| jd ur2| jD ]}||}q&||f7 }||fS )Nr*   r   )r   rv   rw   r   )r$   r{   r|   r   r   r   r   r   r*   r*   r+   r   h  s   




zDownBlock3D.forward)
r6   r   r`   r3   ra   r5   Trb   Tr   )Nr   )r.   r/   r0   r   r   r   r   r"   r   r   r   r   r   r   r1   r*   r*   r(   r+   rN   &  sb    	
DrN   c                *       s   e Zd Z													
	
	
	
	d.dedededededededededededededededededededee f( fd d!Z						d/d"e	j
d#ee	j
d$f d%ee	j
 d&ee	j
 d'ee d(ee	j
 d)ed*eeef d+e	j
fd,d-Z  ZS )0r[   r6   r   r`   r3   ra   r5   Trc   rb   FNr9   r:   rV   r;   rL   r8   r=   rG   r>   r@   rd   r?   rA   re   rW   rC   rD   rE   rF   rX   c                    s:  t    g }g }g }g }d| _|| _t|D ]S}||d kr!|n|}|dkr)|n|}|t|| ||||
|||	||d
 |t||d|
d |t|| ||d||
|||d	 |t	|| ||d||
d qt
|| _t
|| _t
|| _t
|| _|rt
t|d|d	g| _nd | _d
| _|| _d S )NTr   r   rg   rm   rn   r   rp   r   r:   F)r!   r"   rq   r?   rs   rt   r   r   r   r   r   ru   rv   rw   rx   ry   r   
upsamplersr   rX   )r$   r9   r:   rV   r;   rL   r8   r=   rG   r>   r@   rd   r?   rA   re   rW   rC   rD   rE   rF   rX   rv   rw   rx   ry   r   res_skip_channelsresnet_in_channelsr(   r*   r+   r"     s   


zCrossAttnUpBlock3D.__init__r{   res_hidden_states_tuple.r|   r}   upsample_sizer~   r   r   rM   c	              
   C   s  t | dd ot | dd ot | dd ot | dd }	t| j| j| j| jD ]L\}
}}}|d }|d d }|	rGt| j||| j| j	| j
| jd\}}tj||gdd}|
||}|||d	}||||d
dd }||||d
dd }q#| jd ur| jD ]}|||}qx|S )Ns1s2b1b2r   r   r   r   r   dimr   Fr   r   r   )getattrr   rv   rw   rx   ry   r   rX   r   r   r   r   r   catr   )r$   r{   r   r|   r}   r   r~   r   r   is_freeu_enabledr   r   r   r   res_hidden_states	upsamplerr*   r*   r+   r     sZ   







zCrossAttnUpBlock3D.forward)r6   r   r`   r3   ra   r5   Tr   rc   rb   TFFFFN)NNNNr   N)r.   r/   r0   r   r   r   r   r   r"   r   r   r   r   r   r   r1   r*   r*   r(   r+   r[     s    	
b
	
r[   c                       s   e Zd Z											d#d
edededededededededededededee f fddZ					d$de	j
dee	j
df dee	j
 dee ded e	j
fd!d"Z  ZS )%rZ   r6   r   r`   r3   ra   r5   Trb   Nr9   rV   r:   r;   rL   r8   r=   rG   r>   r@   rd   re   rW   rX   c                    s   t    g }g }t|D ]2}||d kr|n|}|dkr|n|}|t|| ||||
|||	||d
 |t||d|
d qt|| _t|| _	|r[tt
|d|dg| _nd | _d| _|| _d S )	Nr   r   rg   rm   rn   Tr   F)r!   r"   rs   rt   r   r   r   ru   rv   rw   r   r   r   rX   )r$   r9   rV   r:   r;   rL   r8   r=   rG   r>   r@   rd   re   rW   rX   rv   rw   r   r   r   r(   r*   r+   r"     sF   
	
zUpBlock3D.__init__r{   r   .r|   r   r   rM   c              
   C   s   t | dd ot | dd ot | dd ot | dd }t| j| jD ]6\}}|d }	|d d }|rAt| j||	| j| j| j| j	d\}}	t
j||	gdd}|||}|||d	}q| jd urf| jD ]}
|
||}q^|S )
Nr   r   r   r   r   r   r   r   r   )r   r   rv   rw   r   rX   r   r   r   r   r   r   r   )r$   r{   r   r|   r   r   r   r   r   r   r   r*   r*   r+   r   X  s6   	







zUpBlock3D.forward)
r6   r   r`   r3   ra   r5   Trb   TN)NNr   )r.   r/   r0   r   r   r   r   r   r"   r   r   r   r   r1   r*   r*   r(   r+   rZ     sp    	
?rZ   c                       sP   e Zd Z			ddededededef
 fd	d
ZdejdejfddZ  Z	S )MidBlockTemporalDecoder   r   Fr9   r:   attention_head_dimr8   rF   c           
         s   t    g }g }t|D ]}|dkr|n|}	|t|	|d dddddd q|t||| |d|dddd	 t|| _t|| _	d S )
Nr   r`   h㈵>r6   learnedTr9   r:   r;   rh   temporal_epsmerge_factormerge_strategyswitch_spatial_to_temporal_mixr5   )	query_dimheadsdim_headrh   rF   ro   biasresidual_connection)
r!   r"   rs   rt   r   r   r   ru   rx   rv   )
r$   r9   r:   r   r8   rF   rv   rx   r   input_channelsr(   r*   r+   r"     s>   
z MidBlockTemporalDecoder.__init__r{   image_only_indicatorc                 C   sJ   | j d ||d}t| j dd  | jD ]\}}||}|||d}q|S )Nr   r   r   )rv   r   rx   )r$   r{   r   r   r   r*   r*   r+   r     s   zMidBlockTemporalDecoder.forward)r   r   F
r.   r/   r0   r   r   r"   r   r   r   r1   r*   r*   r(   r+   r     s(    +r   c                	       sP   e Zd Z		ddedededef fddZd	ejd
ejdejfddZ  Z	S )UpBlockTemporalDecoderr   Tr9   r:   r8   rW   c                    s   t    g }t|D ]}|dkr|n|}|t||d dddddd qt|| _|r;tt|d|dg| _	d S d | _	d S )	Nr   r`   r   r6   r   Tr   r   )
r!   r"   rs   rt   r   r   ru   rv   r   r   )r$   r9   r:   r8   rW   rv   r   r   r(   r*   r+   r"     s(   

zUpBlockTemporalDecoder.__init__r{   r   rM   c                 C   s:   | j D ]}|||d}q| jd ur| jD ]}||}q|S )Nr   )rv   r   )r$   r{   r   r   r   r*   r*   r+   r     s   



zUpBlockTemporalDecoder.forwardr   Tr   r*   r*   r(   r+   r     s&    r   c                       s   e Zd Z				ddedededeeee f dedef fd	d
Z			ddejde	ej de	ej de	ej dejf
ddZ
  ZS )UNetMidBlockSpatioTemporalr   rc   r9   r;   r8   rJ   r?   rA   c           
   
      s   t    d| _|| _t|tr|g| }t|||ddg}g }t|D ]}	|t	||| |||	 |d |t|||dd q$t
|| _t
|| _d| _d S )NTr   r9   r:   r;   rh   r9   r8   rA   F)r!   r"   rq   r?   
isinstancer   r   rs   rt   r   r   ru   rx   rv   r   )
r$   r9   r;   r8   rJ   r?   rA   rv   rx   r   r(   r*   r+   r"     sD   
	


	
z#UNetMidBlockSpatioTemporal.__init__Nr{   r|   r}   r   rM   c           	      C   s   | j d |||d}t| j| j dd  D ]F\}}| jrJ| jrJddd}tddr-dd	ini }||||d	d
d }tjjj|||||fi |}q||||d	d
d }||||d}q|S )Nr   r   r   c                        fdd}|S )Nc                        d ur | diS  |  S Nr   r*   inputsmoduler   r*   r+   custom_forward<     zYUNetMidBlockSpatioTemporal.forward.<locals>.create_custom_forward.<locals>.custom_forwardr*   r   r   r   r*   r   r+   create_custom_forward;     zAUNetMidBlockSpatioTemporal.forward.<locals>.create_custom_forward>=1.11.0use_reentrantFr}   r   r   N)	rv   r   rx   trainingr   r
   r   utils
checkpoint)	r$   r{   r|   r}   r   r   r   r   ckpt_kwargsr*   r*   r+   r   +  sN   
	z"UNetMidBlockSpatioTemporal.forward)r   r   r   rc   NNN)r.   r/   r0   r   r   r   r"   r   r   r   r   r1   r*   r*   r(   r+   r     s@    9r   c                       sz   e Zd Z		ddededededef
 fdd	Z	
	
ddejdeej deej de	eje	ejdf f fddZ
  ZS )rP   r   Tr9   r:   r;   r8   r<   c              	      s~   t    g }t|D ]}|dkr|n|}|t|||dd qt|| _|r7tt|d|ddg| _	nd | _	d| _
d S )Nr   r   r   Tr   )r   r:   r   F)r!   r"   rs   rt   r   r   ru   rv   r   r   r   )r$   r9   r:   r;   r8   r<   rv   r   r(   r*   r+   r"   c  s2   
	
z DownBlockSpatioTemporal.__init__Nr{   r|   r   rM   .c                 C   s   d}| j D ]9}| jr2| jr2dd }tddr%tjjj|||||dd}ntjj|||||}n||||d}||f }q| jd urS| jD ]}||}qG||f }||fS )	Nr*   c                        fdd}|S )Nc                         |  S r   r*   r   r   r*   r+   r        zVDownBlockSpatioTemporal.forward.<locals>.create_custom_forward.<locals>.custom_forwardr*   r   r   r*   r   r+   r        z>DownBlockSpatioTemporal.forward.<locals>.create_custom_forwardr   r   Fr   r   )rv   r   r   r
   r   r   r   r   )r$   r{   r|   r   r   r   r   r   r*   r*   r+   r     s:   





zDownBlockSpatioTemporal.forwardr   NN)r.   r/   r0   r   r   r"   r   r   r   r   r   r1   r*   r*   r(   r+   rP   b  s2    +rP   c                       s   e Zd Z					ddededededeeee f d	ed
edef fddZ			ddej	de
ej	 de
ej	 de
ej	 deej	eej	df f f
ddZ  ZS )rQ   r   rc   Tr9   r:   r;   r8   rJ   r?   rA   r<   c	              
      s   t    g }	g }
d| _|| _t|tr|g| }t|D ]%}|dkr%|n|}|	t|||dd |
t	||| ||| |d qt
|
| _t
|	| _|r`t
t|d|dddg| _nd | _d	| _d S )
NTr   r`   r   r   r   r   r   F)r!   r"   rq   r?   r   r   rs   rt   r   r   r   ru   rx   rv   r   r   r   )r$   r9   r:   r;   r8   rJ   r?   rA   r<   rv   rx   r   r(   r*   r+   r"     sR   




z)CrossAttnDownBlockSpatioTemporal.__init__Nr{   r|   r}   r   rM   .c                 C   s   d}t t| j| j}|D ]K\}}| jrB| jrBddd}	tddr%ddini }
tjj	j	|	||||fi |
}||||ddd	 }n||||d
}||||ddd	 }||f }q| j
d urm| j
D ]}||}qa||f }||fS )Nr*   c                    r   )Nc                     r   r   r*   r   r   r*   r+   r     r   z_CrossAttnDownBlockSpatioTemporal.forward.<locals>.create_custom_forward.<locals>.custom_forwardr*   r   r*   r   r+   r     r   zGCrossAttnDownBlockSpatioTemporal.forward.<locals>.create_custom_forwardr   r   r   Fr   r   r   r   )listr   rv   rx   r   r   r
   r   r   r   r   )r$   r{   r|   r}   r   r   blocksr   r   r   r   r   r*   r*   r+   r     sR   
	



z(CrossAttnDownBlockSpatioTemporal.forward)r   r   r   rc   Tr   )r.   r/   r0   r   r   r   r   r"   r   r   r   r   r1   r*   r*   r(   r+   rQ     sJ    	?rQ   c                       s   e Zd Z				ddedededed	ee d
ededef fddZ		ddej	de
ej	df deej	 deej	 dej	f
ddZ  ZS )r\   Nr   r`   Tr9   rV   r:   r;   rX   r8   r=   rW   c	              	      s   t    g }	t|D ]!}
|
|d kr|n|}|
dkr|n|}|	t|| |||d qt|	| _|rBtt|d|dg| _	nd | _	d| _
|| _d S )Nr   r   r   Tr   F)r!   r"   rs   rt   r   r   ru   rv   r   r   r   rX   )r$   r9   rV   r:   r;   rX   r8   r=   rW   rv   r   r   r   r(   r*   r+   r"   7  s&   
	
zUpBlockSpatioTemporal.__init__r{   r   .r|   r   rM   c           	      C   s   | j D ]G}|d }|d d }tj||gdd}| jrC| jrCdd }tddr6tjjj|||||dd	}qtjj|||||}q||||d
}q| jd urZ| jD ]}||}qS|S )Nr   r   r   c                    r   )Nc                     r   r   r*   r   r   r*   r+   r   m  r   zTUpBlockSpatioTemporal.forward.<locals>.create_custom_forward.<locals>.custom_forwardr*   r   r*   r   r+   r   l  r   z<UpBlockSpatioTemporal.forward.<locals>.create_custom_forwardr   r   Fr   r   )	rv   r   r   r   r   r
   r   r   r   )	r$   r{   r   r|   r   r   r   r   r   r*   r*   r+   r   \  s:   




zUpBlockSpatioTemporal.forward)Nr   r`   Tr   )r.   r/   r0   r   r   r   r   r"   r   r   r   r   r1   r*   r*   r(   r+   r\   6  sF    	)r\   c                       s   e Zd Z							ddededed	ed
ee dedeeee f dedededef fddZ				dde
jdee
jdf dee
j dee
j dee
j de
jfddZ  ZS )r]   Nr   r`   rc   Tr9   r:   rV   r;   rX   r8   rJ   r=   r?   rA   rW   c              
      s   t    g }g }d| _|	| _t|tr|g| }t|D ]1}||d kr'|n|}|dkr/|n|}|t|| |||d |t	|	||	 ||| |
d qt
|| _t
|| _|rjt
t|d|dg| _nd | _d| _|| _d S )NTr   r   r   r   r   F)r!   r"   rq   r?   r   r   rs   rt   r   r   r   ru   rx   rv   r   r   r   rX   )r$   r9   r:   rV   r;   rX   r8   rJ   r=   r?   rA   rW   rv   rx   r   r   r   r(   r*   r+   r"     sD   




z'CrossAttnUpBlockSpatioTemporal.__init__r{   r   .r|   r}   r   rM   c                 C   s   t | j| jD ]Y\}}|d }|d d }tj||gdd}| jrO| jrOddd}	tddr2dd	ini }
tjj	j	|	||||fi |
}||||d	d
d }q||||d}||||d	d
d }q| j
d urp| j
D ]}||}qi|S )Nr   r   r   c                    r   )Nc                     r   r   r*   r   r   r*   r+   r     r   z]CrossAttnUpBlockSpatioTemporal.forward.<locals>.create_custom_forward.<locals>.custom_forwardr*   r   r*   r   r+   r     r   zECrossAttnUpBlockSpatioTemporal.forward.<locals>.create_custom_forwardr   r   r   Fr   r   r   r   )r   rv   rx   r   r   r   r   r
   r   r   r   )r$   r{   r   r|   r}   r   r   r   r   r   r   r   r*   r*   r+   r     sP   
	


z&CrossAttnUpBlockSpatioTemporal.forward)Nr   r   r`   r   rc   Tr   )r.   r/   r0   r   r   r   r   r   r   r"   r   r   r   r1   r*   r*   r(   r+   r]     s^    	
=r]   )NNNFTFFr3   r4   r5   r   r   r6   )NNNFTFFr3   r4   Nr5   r   r   r6   )7typingr   r   r   r   r   r   r   r   r	   r
   r   utils.torch_utilsr   	attentionr   r   r   r   r   r   r   transformers.transformer_2dr   !transformers.transformer_temporalr   r   unet_motion_modelr   r   r   r   r   
get_loggerr.   loggerr   r   r   r   rT   r^   Moduler_   rO   rN   r[   rZ   r   r   r   rP   rQ   r\   r]   r*   r*   r*   r+   <module>   s@  
		

g	

d  Y g?2nZzY