o
    pih                     @   s  d dl Z d dlmZmZmZ d dlZd dlm  mZ	 d dlmZ ddl
mZ ddlmZmZmZmZ G dd dejZG d	d
 d
ejZG dd dejZG dd dejZG dd dejZG dd dejZg dg dg ddZG dd dejZG dd dejZG dd dejZG dd dejZG dd  d ejZG d!d" d"ejZG d#d$ d$ejZG d%d& d&ejZ G d'd( d(ejZ!G d)d* d*ejZ"G d+d, d,ejZ#eeeee f Z$eeeef Z%eeef Z&eee"e!e#f Z'd-e(d.e)d/e)d0e)d1e)d2e*d3e$fd4d5Z+d6e(d.e)d/e)d0e)d1e)d7e*d3e'fd8d9Z,d:e(d.e)d/e)d;e)d0e)d<e)d2e*d3e%fd=d>Z-d?e(d@e)d<e)d0e)dAe(dBe)d3ee& fdCdDZ.dS )E    N)OptionalTupleUnion)nn   )get_activation)Downsample1DResidualTemporalBlock1D
Upsample1Drearrange_dimsc                       s   e Zd Z										dded	ee d
ededededee dee dededef fddZdde	j
dee	j
 de	j
fddZ  ZS )DownResnetBlock1DN   F    default      ?Tin_channelsout_channels
num_layersconv_shortcuttemb_channelsgroups
groups_outnon_linearitytime_embedding_normoutput_scale_factoradd_downsamplec                    s   t    || _|d u r|n|}|| _|| _|	| _|| _|
| _|d u r%|}t|||dg}t	|D ]}|
t|||d q1t|| _|d u rLd | _nt|| _d | _|r`t|ddd| _d S d S )N	embed_dimTr   )use_convpadding)super__init__r   r   use_conv_shortcutr   r   r   r	   rangeappendr   
ModuleListresnetsnonlinearityr   
downsampler   )selfr   r   r   r   r   r   r   r   r   r   r   r&   _	__class__ c/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/models/unets/unet_1d_blocks.pyr!      s*   

zDownResnetBlock1D.__init__hidden_statestembreturnc                 C   sl   d}| j d ||}| j dd  D ]}|||}q||f7 }| jd ur(| |}| jd ur2| |}||fS )Nr-   r   r   )r&   r'   r(   )r)   r/   r0   output_statesresnetr-   r-   r.   forwardE   s   




zDownResnetBlock1D.forward)
Nr   Fr   r   NNr   r   TN)__name__
__module____qualname__intr   boolstrfloatr!   torchTensorr4   __classcell__r-   r-   r+   r.   r      sF    	
*+r   c                       s   e Zd Z									ddedee d	ed
ededee dee dededef fddZ		dde	j
deee	j
df  dee	j
 de	j
fddZ  ZS )UpResnetBlock1DNr   r   r   r   Tr   r   r   r   r   r   r   r   r   add_upsamplec                    s   t    || _|d u r|n|}|| _|| _|
| _|	| _|d u r"|}td| ||dg}t|D ]}|	t|||d q0t
|| _|d u rKd | _nt|| _d | _|
r^t|dd| _d S d S )Nr   r   T)use_conv_transpose)r    r!   r   r   r   rA   r   r	   r#   r$   r   r%   r&   r'   r   upsampler
   )r)   r   r   r   r   r   r   r   r   r   rA   r&   r*   r+   r-   r.   r!   X   s(   

zUpResnetBlock1D.__init__r/   res_hidden_states_tuple.r0   r1   c                 C   s|   |d ur|d }t j||fdd}| jd ||}| jdd  D ]}|||}q | jd ur2| |}| jd ur<| |}|S )Nr   dimr   )r=   catr&   r'   rC   r)   r/   rD   r0   res_hidden_statesr3   r-   r-   r.   r4      s   



zUpResnetBlock1D.forward)	Nr   r   r   NNr   r   T)NN)r6   r7   r8   r9   r   r;   r<   r:   r!   r=   r>   r   r4   r?   r-   r-   r+   r.   r@   W   sT    	
,r@   c                       sL   e Zd Zdededef fddZddejdeej d	ejfd
dZ  Z	S )ValueFunctionMidBlock1Dr   r   r   c                    sp   t    || _|| _|| _t||d |d| _t|d dd| _t|d |d |d| _	t|d dd| _
d S )Nr   r   Tr      )r    r!   r   r   r   r	   res1r   down1res2down2)r)   r   r   r   r+   r-   r.   r!      s   
z ValueFunctionMidBlock1D.__init__Nxr0   r1   c                 C   s0   |  ||}| |}| ||}| |}|S r5   )rN   rO   rP   rQ   )r)   rR   r0   r-   r-   r.   r4      s
   

zValueFunctionMidBlock1D.forwardr5   )
r6   r7   r8   r9   r!   r=   r>   r   r4   r?   r-   r-   r+   r.   rK      s    *rK   c                       sd   e Zd Z				ddededededed	ed
ee f fddZdej	dej	dej	fddZ
  ZS )MidResTemporalBlock1Dr   FNr   r   r   r   r   rA   r   c           
         s   t    || _|| _|| _t|||dg}t|D ]}	|t|||d qt	|| _
|d u r5d | _nt|| _d | _|rFt|dd| _d | _|rRt|dd| _| jr\| jr^tdd S d S )Nr   TrL   z$Block cannot downsample and upsample)r    r!   r   r   r   r	   r#   r$   r   r%   r&   r'   r   rC   r
   r(   r   
ValueError)
r)   r   r   r   r   r   rA   r   r&   r*   r+   r-   r.   r!      s(   


zMidResTemporalBlock1D.__init__r/   r0   r1   c                 C   sT   | j d ||}| j dd  D ]}|||}q| jr| |}| jr(| || _|S )Nr   r   )r&   rC   r(   r)   r/   r0   r3   r-   r-   r.   r4      s   
zMidResTemporalBlock1D.forward)r   FFN)r6   r7   r8   r9   r:   r   r;   r!   r=   r>   r4   r?   r-   r-   r+   r.   rS      s*    $'rS   c                       sP   e Zd Zdedededef fddZddejd	eej d
ejfddZ	  Z
S )OutConv1DBlocknum_groups_outr   r   act_fnc                    sJ   t    tj||ddd| _t||| _t|| _t||d| _	d S )N   r   r   r   )
r    r!   r   Conv1dfinal_conv1d_1	GroupNormfinal_conv1d_gnr   final_conv1d_actfinal_conv1d_2)r)   rW   r   r   rX   r+   r-   r.   r!      s
   

zOutConv1DBlock.__init__Nr/   r0   r1   c                 C   s<   |  |}t|}| |}t|}| |}| |}|S r5   )r\   r   r^   r_   r`   )r)   r/   r0   r-   r-   r.   r4      s   



zOutConv1DBlock.forwardr5   )r6   r7   r8   r9   r;   r!   r=   r>   r   r4   r?   r-   r-   r+   r.   rV      s    *rV   c                       sH   e Zd Zddededef fddZdejdejd	ejfd
dZ  Z	S )OutValueFunctionBlockmishfc_dimr   rX   c                    s@   t    tt|| |d t|t|d dg| _d S )Nr   r   )r    r!   r   r%   Linearr   final_block)r)   rc   r   rX   r+   r-   r.   r!      s   

zOutValueFunctionBlock.__init__r/   r0   r1   c                 C   s<   | |jd d}tj||fdd}| jD ]}||}q|S )Nr   rE   rF   )viewshaper=   rH   re   )r)   r/   r0   layerr-   r-   r.   r4      s
   

zOutValueFunctionBlock.forward)rb   )
r6   r7   r8   r9   r;   r!   r=   r>   r4   r?   r-   r-   r+   r.   ra      s    $
ra   )      ?      ?rj   ri   )                  ?     ?rn   rm   rl   rk   )    8n?   Վ?   @Dh   $   <X?   ?rt   rs   rr   rq   rp   ro   )linearcubiclanczos3c                       s>   e Zd Zddedef fddZdejdejfd	d
Z  ZS )Downsample1dru   reflectkernelpad_modec                    sB   t    || _tt| }|jd d d | _| d| d S )Nr   r   r   rz   	r    r!   r{   r=   tensor_kernelsrg   padregister_bufferr)   rz   r{   	kernel_1dr+   r-   r.   r!     s
   
zDownsample1d.__init__r/   r1   c                 C   s   t || jfd | j}||jd |jd | jjd g}tj|jd |jd}| j	|d d d f 
|jd d}||||f< t j||ddS )Nr   r   r   devicerE   )stride)Fr   r{   	new_zerosrg   rz   r=   aranger   toexpandconv1d)r)   r/   weightindicesrz   r-   r-   r.   r4   #  s   $&zDownsample1d.forwardru   ry   )	r6   r7   r8   r;   r!   r=   r>   r4   r?   r-   r-   r+   r.   rx     s    rx   c                       sJ   e Zd Zddedef fddZddejd	eej d
ejfddZ  Z	S )
Upsample1dru   ry   rz   r{   c                    sF   t    || _tt| d }|jd d d | _| d| d S )Nr   r   r   rz   r|   r   r+   r-   r.   r!   -  s
   
zUpsample1d.__init__Nr/   r0   r1   c                 C   s   t || jd d fd | j}||jd |jd | jjd g}tj|jd |jd}| j	|d d d f 
|jd d}||||f< t j||d| jd d dS )Nr   r   r   r   rE   )r   r   )r   r   r{   r   rg   rz   r=   r   r   r   r   conv_transpose1d)r)   r/   r0   r   r   rz   r-   r-   r.   r4   4  s    $&zUpsample1d.forwardr   r5   )
r6   r7   r8   r;   r!   r=   r>   r   r4   r?   r-   r-   r+   r.   r   ,  s    *r   c                       sX   e Zd Zddededef fddZdejd	ejfd
dZdejd	ejfddZ	  Z
S )SelfAttention1dr           r   n_headdropout_ratec                    s   t    || _tjd|d| _|| _t| j| j| _t| j| j| _	t| j| j| _
tj| j| jdd| _tj|dd| _d S )Nr   )num_channelsTbias)inplace)r    r!   channelsr   r]   
group_norm	num_headsrd   querykeyvalue	proj_attnDropoutdropout)r)   r   r   r   r+   r-   r.   r!   >  s   
zSelfAttention1d.__init__
projectionr1   c                 C   s4   |  d d | jdf }||dddd}|S )NrE   r   r   r      )sizer   rf   permute)r)   r   new_projection_shapenew_projectionr-   r-   r.   transpose_for_scoresL  s   z$SelfAttention1d.transpose_for_scoresr/   c                 C   s  |}|j \}}}| |}|dd}| |}| |}| |}| |}	| |}
| |}dtt|
j d  }t	
|	| |
dd| }t	j|dd}t	
||}|dddd }| d d | jf }||}| |}|dd}| |}|| }|S )Nr   r   rE   rF   r   r   )rg   r   	transposer   r   r   r   mathsqrtr=   matmulsoftmaxr   
contiguousr   r   rf   r   r   )r)   r/   residualbatchchannel_dimseq
query_projkey_proj
value_projquery_states
key_statesvalue_statesscaleattention_scoresattention_probsnew_hidden_states_shapeoutputr-   r-   r.   r4   R  s,   









zSelfAttention1d.forward)r   r   )r6   r7   r8   r9   r<   r!   r=   r>   r   r4   r?   r-   r-   r+   r.   r   =  s    r   c                	       sF   e Zd Zddedededef fddZdejd	ejfd
dZ  Z	S )ResConvBlockFr   mid_channelsr   is_lastc                    s   t    || _||k| _| jrtj||ddd| _tj||ddd| _td|| _	t
 | _tj||ddd| _| jsKtd|| _t
 | _d S d S )Nr   Fr   rY   r   rZ   )r    r!   r   has_conv_skipr   r[   	conv_skipconv_1r]   group_norm_1GELUgelu_1conv_2group_norm_2gelu_2)r)   r   r   r   r   r+   r-   r.   r!   x  s   


zResConvBlock.__init__r/   r1   c                 C   sb   | j r| |n|}| |}| |}| |}| |}| js+| |}| |}|| }|S r5   )	r   r   r   r   r   r   r   r   r   )r)   r/   r   r   r-   r-   r.   r4     s   





zResConvBlock.forward)F)
r6   r7   r8   r9   r:   r!   r=   r>   r4   r?   r-   r-   r+   r.   r   w  s     r   c                       R   e Zd Zddededee f fddZddejdeej d	ejfd
dZ  Z	S )UNetMidBlock1DNr   r   r   c              	      s   t    |d u r|n|}td| _t|||t|||t|||t|||t|||t|||g}t||d t||d t||d t||d t||d t||d g}tdd| _t	|| _
t	|| _d S )Nrv   r   rz   )r    r!   rx   downr   r   r   upr   r%   
attentionsr&   )r)   r   r   r   r&   r   r+   r-   r.   r!     s(   







	zUNetMidBlock1D.__init__r/   r0   r1   c                 C   s@   |  |}t| j| jD ]\}}||}||}q| |}|S r5   )r   zipr   r&   r   )r)   r/   r0   attnr3   r-   r-   r.   r4     s   


zUNetMidBlock1D.forwardr5   
r6   r7   r8   r9   r   r!   r=   r>   r4   r?   r-   r-   r+   r.   r     s     *r   c                       r   )AttnDownBlock1DNr   r   r   c                    s   t    |d u r|n|}td| _t|||t|||t|||g}t||d t||d t||d g}t|| _t|| _	d S )Nrv   r   )
r    r!   rx   r   r   r   r   r%   r   r&   )r)   r   r   r   r&   r   r+   r-   r.   r!     s   




zAttnDownBlock1D.__init__r/   r0   r1   c                 C   s<   |  |}t| j| jD ]\}}||}||}q||ffS r5   )r   r   r&   r   )r)   r/   r0   r3   r   r-   r-   r.   r4     s
   


zAttnDownBlock1D.forwardr5   r   r-   r-   r+   r.   r     s     *r   c                       r   )DownBlock1DNr   r   r   c                    sV   t    |d u r|n|}td| _t|||t|||t|||g}t|| _d S )Nrv   )r    r!   rx   r   r   r   r%   r&   r)   r   r   r   r&   r+   r-   r.   r!     s   




zDownBlock1D.__init__r/   r0   r1   c                 C   s(   |  |}| jD ]}||}q||ffS r5   )r   r&   rU   r-   r-   r.   r4     s   



zDownBlock1D.forwardr5   r   r-   r-   r+   r.   r     s     *r   c                       r   )DownBlock1DNoSkipNr   r   r   c                    sL   t    |d u r|n|}t|||t|||t|||g}t|| _d S r5   r    r!   r   r   r%   r&   r   r+   r-   r.   r!     s   



zDownBlock1DNoSkip.__init__r/   r0   r1   c                 C   s0   t j||gdd}| jD ]}||}q||ffS )Nr   rF   r=   rH   r&   rU   r-   r-   r.   r4     s   


zDownBlock1DNoSkip.forwardr5   r   r-   r-   r+   r.   r     s     *r   c                	       b   e Zd Zddededee f fddZ	ddejdeejd	f d
eej dejfddZ	  Z
S )AttnUpBlock1DNr   r   r   c                    s   t    |d u r|n|}td| ||t|||t|||g}t||d t||d t||d g}t|| _t|| _tdd| _	d S )Nr   r   rv   r   )
r    r!   r   r   r   r%   r   r&   r   r   )r)   r   r   r   r&   r   r+   r-   r.   r!     s   


zAttnUpBlock1D.__init__r/   rD   .r0   r1   c                 C   sP   |d }t j||gdd}t| j| jD ]\}}||}||}q| |}|S NrE   r   rF   )r=   rH   r   r&   r   r   )r)   r/   rD   r0   rJ   r3   r   r-   r-   r.   r4     s   

zAttnUpBlock1D.forwardr5   r6   r7   r8   r9   r   r!   r=   r>   r   r4   r?   r-   r-   r+   r.   r     s     r   c                	       r   )	UpBlock1DNr   r   r   c                    s\   t    |d u r|n|}td| ||t|||t|||g}t|| _tdd| _d S )Nr   rv   r   )r    r!   r   r   r%   r&   r   r   r)   r   r   r   r&   r+   r-   r.   r!   2  s   


zUpBlock1D.__init__r/   rD   .r0   r1   c                 C   s<   |d }t j||gdd}| jD ]}||}q| |}|S r   )r=   rH   r&   r   rI   r-   r-   r.   r4   ?  s   


zUpBlock1D.forwardr5   r   r-   r-   r+   r.   r   1  s     r   c                	       r   )UpBlock1DNoSkipNr   r   r   c                    sT   t    |d u r|n|}td| ||t|||t|||ddg}t|| _d S )Nr   T)r   r   r   r+   r-   r.   r!   Q  s   

zUpBlock1DNoSkip.__init__r/   rD   .r0   r1   c                 C   s2   |d }t j||gdd}| jD ]}||}q|S r   r   rI   r-   r-   r.   r4   ]  s
   

zUpBlock1DNoSkip.forwardr5   r   r-   r-   r+   r.   r   P  s     r   down_block_typer   r   r   r   r   r1   c                 C   sd   | dkrt |||||dS | dkrt||dS | dkr!t||dS | dkr+t||dS t|  d)Nr   )r   r   r   r   r   r   )r   r   r   r    does not exist.)r   r   r   r   rT   )r   r   r   r   r   r   r-   r-   r.   get_down_blockr  s   r   up_block_typerA   c                 C   sd   | dkrt |||||dS | dkrt||dS | dkr!t||dS | dkr+t||dS t|  d)Nr@   )r   r   r   r   rA   r   )r   r   r   r   r   )r@   r   r   r   rT   )r   r   r   r   r   rA   r-   r-   r.   get_up_block  s   r   mid_block_typer   r   c                 C   sT   | dkrt |||||dS | dkrt|||dS | dkr#t|||dS t|  d)NrS   )r   r   r   r   r   rK   )r   r   r   r   )r   r   r   r   )rS   rK   r   rT   )r   r   r   r   r   r   r   r-   r-   r.   get_mid_block  s   	r   out_block_typerW   rX   rc   c                 C   s.   | dkrt ||||S | dkrt|||S d S )NrV   ValueFunction)rV   ra   )r   rW   r   r   rX   rc   r-   r-   r.   get_out_block  s
   r   )/r   typingr   r   r   r=   torch.nn.functionalr   
functionalr   activationsr   r3   r   r	   r
   r   Moduler   r@   rK   rS   rV   ra   r~   rx   r   r   r   r   r   r   r   r   r   r   DownBlockTypeMidBlockTypeOutBlockTypeUpBlockTyper;   r9   r:   r   r   r   r   r-   r-   r-   r.   <module>   s   >A5:"(&


