o
    i                     @   s*   d dl Z d dl mZ G dd dejZdS )    N)nnc                	       sL   e Zd ZdZ	ddedededef fddZd	ejd
ejfddZ	  Z
S )ConvNeXtBlockam  ConvNeXt Block adapted from https://github.com/facebookresearch/ConvNeXt to 1D audio signal.

    Args:
        dim (int): Number of input channels.
        intermediate_dim (int): Dimensionality of the intermediate layer.
        layer_scale_init_value (float, optional): Initial value for the layer scale. None means no scaling.
            Defaults to None.
    	   dimintermediate_dimlayer_scale_init_valuedw_kernel_sizec                    s   t    tj||||d |d| _tj|dd| _t||| _t	 | _
t||| _|dkr@tj|t| dd| _d S d | _d S )N   )kernel_sizepaddinggroupsgư>)epsr   T)requires_grad)super__init__r   Conv1ddwconv	LayerNormnormLinearpwconv1GELUactpwconv2	Parametertorchonesgamma)selfr   r   r   r   	__class__ I/home/ubuntu/.local/lib/python3.10/site-packages/soprano/vocos/modules.pyr      s   

zConvNeXtBlock.__init__xreturnc                 C   sn   |}|  |}|dd}| |}| |}| |}| |}| jd ur+| j| }|dd}|| }|S )N   r	   )r   	transposer   r   r   r   r   )r   r#   residualr!   r!   r"   forward"   s   






zConvNeXtBlock.forward)r   )__name__
__module____qualname____doc__intfloatr   r   Tensorr(   __classcell__r!   r!   r   r"   r      s    r   )r   r   Moduler   r!   r!   r!   r"   <module>   s    