o
    Si                     @   s   d dl mZmZ d dlZd dlmZ d dlmZmZ G dd dejZ	G dd dejZ
G d	d
 d
ejZddejdedejfddZdejdejfddZdejdejfddZdS )    )OptionalTupleN)nn)weight_normremove_weight_normc                
       s\   e Zd ZdZ	ddedededee f fddZdd	ej	d
eej	 dej	fddZ
  ZS )ConvNeXtBlocka  ConvNeXt Block adapted from https://github.com/facebookresearch/ConvNeXt to 1D audio signal.

    Args:
        dim (int): Number of input channels.
        intermediate_dim (int): Dimensionality of the intermediate layer.
        layer_scale_init_value (float, optional): Initial value for the layer scale. None means no scaling.
            Defaults to None.
        adanorm_num_embeddings (int, optional): Number of embeddings for AdaLayerNorm.
            None means non-conditional LayerNorm. Defaults to None.
    Ndimintermediate_dimlayer_scale_init_valueadanorm_num_embeddingsc                    s   t    tj||dd|d| _|d u| _|r t||dd| _ntj|dd| _t	||| _
t | _t	||| _|dkrNtj|t| dd| _d S d | _d S )	N      )kernel_sizepaddinggroupsư>epsr   Trequires_grad)super__init__r   Conv1ddwconvadanormAdaLayerNormnorm	LayerNormLinearpwconv1GELUactpwconv2	Parametertorchonesgamma)selfr   r	   r
   r   	__class__ A/home/ubuntu/.local/lib/python3.10/site-packages/vocos/modules.pyr      s   


zConvNeXtBlock.__init__xcond_embedding_idreturnc                 C   s   |}|  |}|dd}| jr|d usJ | ||}n| |}| |}| |}| |}| jd ur;| j| }|dd}|| }|S )N      )r   	transposer   r   r   r!   r"   r&   )r'   r,   r-   residualr*   r*   r+   forward+   s   






zConvNeXtBlock.forwardN)__name__
__module____qualname____doc__intfloatr   r   r$   Tensorr3   __classcell__r*   r*   r(   r+   r      s    *r   c                       sL   e Zd ZdZddededef fddZdejd	ejd
ejfddZ	  Z
S )r   z
    Adaptive Layer Normalization module with learnable embeddings per `num_embeddings` classes

    Args:
        num_embeddings (int): Number of embeddings.
        embedding_dim (int): Dimension of the embeddings.
    r   num_embeddingsembedding_dimr   c                    s^   t    || _|| _tj||d| _tj||d| _tjj	
| jj tjj	| jj d S )N)r=   r>   )r   r   r   r   r   	Embeddingscaleshiftr$   initones_weightzeros_)r'   r=   r>   r   r(   r*   r+   r   H   s   
zAdaLayerNorm.__init__r,   r-   r.   c                 C   s<   |  |}| |}tjj|| jf| jd}|| | }|S )Nr   )r@   rA   r   
functional
layer_normr   r   )r'   r,   r-   r@   rA   r*   r*   r+   r3   Q   s
   

zAdaLayerNorm.forward)r   )r5   r6   r7   r8   r9   r:   r   r$   r;   r3   r<   r*   r*   r(   r+   r   ?   s    $	r   c                       s   e Zd ZdZ				ddededeeeef d	ed
ee f
 fddZde	j
de	j
fddZdd ZeddededefddZ  ZS )	ResBlock1a  
    ResBlock adapted from HiFi-GAN V1 (https://github.com/jik876/hifi-gan) with dilated 1D convolutions,
    but without upsampling layers.

    Args:
        dim (int): Number of input channels.
        kernel_size (int, optional): Size of the convolutional kernel. Defaults to 3.
        dilation (tuple[int], optional): Dilation factors for the dilated convolutions.
            Defaults to (1, 3, 5).
        lrelu_slope (float, optional): Negative slope of the LeakyReLU activation function.
            Defaults to 0.1.
        layer_scale_init_value (float, optional): Initial value for the layer scale. None means no scaling.
            Defaults to None.
    r   r/   r      皙?Nr   r   dilationlrelu_sloper
   c                    s|  t    || _tttj|||d|d | ||d dttj|||d|d | ||d dttj|||d|d | ||d dg| _tttj|||dd| |ddttj|||dd| |ddttj|||dd| |ddg| _	t
|d urtj|t|d ddnd |d urtj|t|d ddnd |d urtj|t|d ddnd g| _d S )Nr/   r   )rL   r   r0   Tr   )r   r   rM   r   
ModuleListr   r   get_paddingconvs1convs2ParameterListr#   r$   r%   r&   )r'   r   r   rL   rM   r
   r(   r*   r+   r   i   sj   


#   
zResBlock1.__init__r,   r.   c                 C   sr   t | j| j| jD ]-\}}}tjjj|| jd}||}tjjj|| jd}||}|d ur2|| }|| }q	|S )N)negative_slope)	ziprP   rQ   r&   r$   r   rF   
leaky_relurM   )r'   r,   c1c2r&   xtr*   r*   r+   r3      s   
zResBlock1.forwardc                 C   s,   | j D ]}t| q| jD ]}t| qd S r4   )rP   r   rQ   )r'   lr*   r*   r+   r      s
   



zResBlock1.remove_weight_normr/   c                 C   s   t | | | d S )Nr0   )r9   )r   rL   r*   r*   r+   rO      s   zResBlock1.get_padding)r   rI   rK   N)r/   )r5   r6   r7   r8   r9   r   r:   r   r   r$   r;   r3   r   staticmethodrO   r<   r*   r*   r(   r+   rH   Y   s*    C"rH   Hz>r,   clip_valr.   c                 C   s   t t j| |dS )aU  
    Computes the element-wise logarithm of the input tensor with clipping to avoid near-zero values.

    Args:
        x (Tensor): Input tensor.
        clip_val (float, optional): Minimum value to clip the input tensor. Defaults to 1e-7.

    Returns:
        Tensor: Element-wise logarithm of the input tensor with clipping applied.
    )min)r$   logclip)r,   r\   r*   r*   r+   safe_log   s   r`   c                 C   s   t | t |   S r4   )r$   signlog1pabsr,   r*   r*   r+   symlog   s   re   c                 C   s   t | t |  d  S )Nr/   )r$   ra   exprc   rd   r*   r*   r+   symexp   s   rg   )r[   )typingr   r   r$   r   torch.nn.utilsr   r   Moduler   r   rH   r;   r:   r`   re   rg   r*   r*   r*   r+   <module>   s    7i