o
    iJ#                     @   st   d Z ddlZddlmZ ddlZddlmZ G dd dejjZ	G dd dejjZ
G d	d
 d
ZG dd dZdS )zSinc convolutions.    N)Union)check_argument_typesc                       s6   e Zd ZdZ fddZdejdejfddZ  ZS )LogCompressionzLLog Compression Activation.

    Activation function `log(abs(x) + 1)`.
    c                    s   t    dS )zInitialize.N)super__init__)self	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/layers/sinc_conv.pyr      s   zLogCompression.__init__xreturnc                 C   s   t t |d S )zXForward.

        Applies the Log Compression function elementwise on tensor x.
           )torchlogabs)r   r   r
   r
   r   forward   s   zLogCompression.forward)	__name__
__module____qualname____doc__r   r   Tensorr   __classcell__r
   r
   r   r   r      s    r   c                       s   e Zd ZdZ						d%deded	ed
edededededeeef f fddZe	de
jde
jfddZe	de
jde
jfddZe	de
jde
jfddZdd ZdefddZde
jde
jfd d!Zd"edefd#d$Z  ZS )&SincConva*  Sinc Convolution.

    This module performs a convolution using Sinc filters in time domain as kernel.
    Sinc filters function as band passes in spectral domain.
    The filtering is done as a convolution in time domain, and no transformation
    to spectral domain is necessary.

    This implementation of the Sinc convolution is heavily inspired
    by Ravanelli et al. https://github.com/mravanelli/SincNet,
    and adapted for the ESpnet toolkit.
    Combine Sinc convolutions with a log compression activation function, as in:
    https://arxiv.org/abs/2010.07597

    Notes:
    Currently, the same filters are applied to all input channels.
    The windowing function is applied on the kernel to obtained a smoother filter,
    and not on the input values, which is different to traditional ASR.
    r   r   hammingmel>  in_channelsout_channelskernel_sizestridepaddingdilationwindow_func
scale_typefsc
                    s  t  sJ t   | j| jd}
||
vr tdt|
  |
| | _t	t
d}||vr9tdt|  || | _|| _|| _|| _|| _|| _|| _t|	| _| jd dkr`tdd| _| jd }dtj td	|| | _| td	||| _|   dS )
a  Initialize Sinc convolutions.

        Args:
            in_channels: Number of input channels.
            out_channels: Number of output channels.
            kernel_size: Sinc filter kernel size (needs to be an odd number).
            stride: See torch.nn.functional.conv1d.
            padding: See torch.nn.functional.conv1d.
            dilation: See torch.nn.functional.conv1d.
            window_func: Window function on the filter, one of ["hamming", "none"].
            fs (str, int, float): Sample rate of the input data
        )noner   z!Window function has to be one of )r   barkzScale has to be one of    r   z"SincConv: Kernel size must be odd.Nr   )r   r   r   none_windowhamming_windowNotImplementedErrorlistkeysr#   MelScale	BarkScalescaler   r   r   r!   r"   r    floatr%   
ValueErrorfmathpir   linspace_x_windowinit_filters)r   r   r   r   r    r!   r"   r#   r$   r%   window_funcsscale_choicesNr   r
   r   r   3   s@   





zSincConv.__init__r   r   c                 C   s   | d }t || S )zSinc function.gư>)r   sin)r   x2r
   r
   r   sincp   s   zSincConv.sincc                 C   s
   t | S )z!Identity-like windowing function.)r   	ones_liker   r
   r
   r   r)   v   s   
zSincConv.none_windowc                 C   s<   d|  d d }| d} ddtdtj |  |   S )zHamming Windowing function.r(   r   r   gHzG?gq=
ףp?       @)sizeflipr   cosr4   r5   )r   Lr
   r
   r   r*   {   s   
 zSincConv.hamming_windowc                 C   s6   | j | j| j}t|| j}tjj|dd| _dS )z*Initialize filters with filterbank values.T)requires_gradN)	r0   bankr   r%   r   divnn	Parameterr3   )r   r3   r
   r
   r   r9      s   zSincConv.init_filtersdevicec           
      C   s2  t | jdddf }t | jdddf t | jdddf | jdddf   }| j|| _| j|| _t |dd| jdd}t |dd| jdd}t |t | d| j  }|| j }|	d}d| d|  
d}t j|||gdd}	|	|	dd|	d}	|	| _dS )zmCalculate coefficients.

        This function (re-)calculates the filter convolutions coefficients.
        Nr   r         ?r(   dim)r   r   r3   r7   tor8   matmulviewr=   rD   	unsqueezecatrC   sinc_filters)
r   rL   f_minsf_maxsf_mins_xf_maxs_xkernelkernel_leftkernel_centerfiltersr
   r
   r   _create_filters   s   B


zSincConv._create_filtersxsc                 C   s4   |  |j tjjj|| j| j| j| j	| j
d}|S )zSinc convolution forward function.

        Args:
            xs: Batch in form of torch.Tensor (B, C_in, D_in).

        Returns:
            xs: Batch in form of torch.Tensor (B, C_out, D_out).
        )r!   r    r"   groups)r_   rL   r   rJ   
functionalconv1drV   r!   r    r"   r   )r   r`   r
   r
   r   r      s   	zSincConv.forwardidimc                 C   s4   |d| j   | j| jd   d }|| j d }|S )z*Obtain the output dimension of the filter.r(   r   )r!   r"   r   r    )r   rd   D_outr
   r
   r   get_odim   s   "zSincConv.get_odim)r   r   r   r   r   r   )r   r   r   r   intstrr   r1   r   staticmethodr   r   r?   r)   r*   r9   r_   r   rf   r   r
   r
   r   r   r      sJ    	

=r   c                   @   D   e Zd ZdZedd Zedd Zedede	de
jfd	d
ZdS )r.   zMel frequency scale.c                 C   s   dt t | dd  S )zConvert Hz to mel.     @     @      ?)r   r   rI   )r3   r
   r
   r   convert      zMelScale.convertc                 C   s   dt t | dd  S )zConvert mel to Hz.rl   rk   rm   )r   exprI   rA   r
   r
   r   invert   ro   zMelScale.invertchannelsr%   r   c                 C   sv   t  sJ td}t|d }t| || ||d }| |}|dd |dd }}tj||gddS )u  Obtain initialization values for the mel scale.

        Args:
            channels: Number of channels.
            fs: Sample rate.

        Returns:
            torch.Tensor: Filter start frequencíes.
            torch.Tensor: Filter stop frequencies.
        g      >@rN   r(   Nr   rO   )r   r   tensorr6   rn   rq   stack)clsrr   r%   min_frequencymax_frequencyfrequenciesf1f2r
   r
   r   rH      s   


zMelScale.bankNr   r   r   r   ri   rn   rq   classmethodrg   r1   r   r   rH   r
   r
   r
   r   r.      s    

r.   c                   @   rj   )r/   zBark frequency scale.

    Has wider bandwidths at lower frequencies, see:
    Critical bandwidth: BARK
    Zwicker and Terhardt, 1980
    c                 C   s8   t | d}t |dd }t |d d}|d d S )zConvert Hz to Bark.     @@rB   ffffff?rm   gGz?     R@      9@r   rI   pow)r3   br
   r
   r   rn      s   zBarkScale.convertc                 C   s@   t | d d}t |d}t |d d}t |d}|d S )zConvert Bark to Hz.r   r   gt\;0?rm   r   rN   r~   r   )r   r3   r
   r
   r   rq      s
   zBarkScale.invertrr   r%   r   c                 C   s   t  sJ td}t|d }t| || ||}| |}|t| |d }|t| |d }tj||gddS )u
  Obtain initialization values for the Bark scale.

        Args:
            channels: Number of channels.
            fs: Sample rate.

        Returns:
            torch.Tensor: Filter start frequencíes.
            torch.Tensor: Filter stop frequencíes.
        g     Q@g?r(   r   rO   )r   r   rt   r6   rn   rq   rI   ru   )rv   rr   r%   min_center_frequencymax_center_frequencycenter_frequenciesrz   r{   r
   r
   r   rH      s   


zBarkScale.bankNr|   r
   r
   r
   r   r/      s    

r/   )r   r4   typingr   r   	typeguardr   rJ   Moduler   r   r.   r/   r
   r
   r
   r   <module>   s    %