o
    8wi)W                     @   s   d Z ddlZddlZddlmZ d&ddZd'd	d
Zd(ddZd)ddZ						d*ddZ	d+ddZ
dd Zd,ddZdd Zd-d d!Zd"d# Zd$d% ZdS ).z
Low level signal processing utilities

Authors
 * Peter Plantinga 2020
 * Francois Grondin 2020
 * William Aris 2020
 * Samuele Cornell 2020
 * Sarthak Yadav 2022
    N)versionavglinearc                 C   sl  t | jdkr| d} |dv sJ |dv sJ |dkrN|du r,tjt| ddd}nrtjt| ddd	}t |jd
krIt|tjrI|d}|| }nP|dkr|du rdt	tj| d ddd}n:tjt
| dddd	}t |jd
krt|tjr|d}t	|| }n|dkrtjt| dddd }nt|dkr|S |dkrtjdt| ddS t)aQ  Compute amplitude of a batch of waveforms.

    Arguments
    ---------
    waveforms : tensor
        The waveforms used for computing amplitude.
        Shape should be `[time]` or `[batch, time]` or
        `[batch, time, channels]`.
    lengths : tensor
        The lengths of the waveforms excluding the padding.
        Shape should be a single dimension, `[batch]`.
    amp_type : str
        Whether to compute "avg" average or "peak" amplitude.
        Choose between ["avg", "peak"].
    scale : str
        Whether to compute amplitude in "dB" or "linear" scale.
        Choose between ["linear", "dB"].

    Returns
    -------
    The average amplitude of the waveforms.

    Example
    -------
    >>> signal = torch.sin(torch.arange(16000.0)).unsqueeze(0)
    >>> compute_amplitude(signal, signal.size(1))
    tensor([[0.6366]])
       r   )r   rmspeakr   dBr   NT)dimkeepdim)inputr
   r         r   r   r   r	      i)min)lenshape	unsqueezetorchmeanabssum
isinstanceTensorsqrtpowmaxNotImplementedErrorclamplog10)	waveformslengthsamp_typescaleoutwav_sum r&   e/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/speechbrain/processing/signal_processing.pycompute_amplitude   s8   



r(   +=c                 C   sR   |dv sJ d}t | jdkrd}| d} t| ||| }|r%| d} | | S )a  This function normalizes a signal to unitary average or peak amplitude.

    Arguments
    ---------
    waveforms : tensor
        The waveforms to normalize.
        Shape should be `[batch, time]` or `[batch, time, channels]`.
    lengths : tensor
        The lengths of the waveforms excluding the padding.
        Shape should be a single dimension, `[batch]`.
    amp_type : str
        Whether one wants to normalize with respect to "avg" or "peak"
        amplitude. Choose between ["avg", "peak"]. Note: for "avg" clipping
        is not prevented and can occur.
    eps : float
        A small number to add to the denominator to prevent NaN.

    Returns
    -------
    waveforms : tensor
        Normalized level waveform.
    )r   r   Fr   Tr   )r   r   r   r(   squeeze)r    r!   r"   epsbatch_addeddenr&   r&   r'   	normalizeV   s   

r.   r   ư>c                 C   s0   | j |dd}| j|dd}| | ||  } | S )a  This function normalizes the mean and std of the input
        waveform (along the specified axis).

    Arguments
    ---------
    waveforms : tensor
        The waveforms to normalize.
        Shape should be `[batch, time]` or `[batch, time, channels]`.
    dims : int or tuple
        The dimension(s) on which mean and std are computed
    eps : float
        A small number to add to the denominator to prevent NaN.

    Returns
    -------
    waveforms : tensor
        Normalized level waveform.
    T)r   )r   std)r    dimsr+   r   r0   r&   r&   r'   mean_std_normz   s   r2   c                 C   s   |dv sJ |dv sJ d}t | jdkrd}| d} t| ||} |dkr+||  }n|dkr6t||  }ntd	|rA|d}|S )
a  This functions performs signal rescaling to a target level.

    Arguments
    ---------
    waveforms : tensor
        The waveforms to normalize.
        Shape should be `[batch, time]` or `[batch, time, channels]`.
    lengths : tensor
        The lengths of the waveforms excluding the padding.
        Shape should be a single dimension, `[batch]`.
    target_lvl : float
        Target lvl in dB or linear scale.
    amp_type : str
        Whether one wants to rescale with respect to "avg" or "peak" amplitude.
        Choose between ["avg", "peak"].
    scale : str
        whether target_lvl belongs to linear or dB scale.
        Choose between ["linear", "dB"].

    Returns
    -------
    waveforms : tensor
        Rescaled waveforms.
    )r   r   r   Fr   Tr   r   r	   z+Invalid scale, choose between dB and linear)r   r   r   r.   dB_to_amplituder   r*   )r    r!   
target_lvlr"   r#   r,   r$   r&   r&   r'   rescale   s   


r5   constantFc                 C   s  t | jdkrtd| dd} |dd}t|tr&tjjj	| ||d} |r| 
d|
d }|dk r@|dd	|f }d}tj|
d|
d||jd
}	|d|d	f }
|dd	|f }tj|
|	|fdd}ttjtdkrdd	lm} || || }|j|| 
dd}nOt| d}t|d}|d\}}|d\}}tj|| ||  || ||  gdd}tj|d| 
dgd}ntjjj| |||t|ts|ndd}|ddS )aM  Use torch.nn.functional to perform 1d padding and conv.

    Arguments
    ---------
    waveform : tensor
        The tensor to perform operations on.
    kernel : tensor
        The filter to apply during convolution.
    padding : int or tuple
        The padding (pad_left, pad_right) to apply.
        If an integer is passed instead, this is passed
        to the conv1d function and pad_type is ignored.
    pad_type : str
        The type of padding to use. Passed directly to
        `torch.nn.functional.pad`, see PyTorch documentation
        for available options.
    stride : int
        The number of units to move each time convolution is applied.
        Passed to conv1d. Has no effect if `use_fft` is True.
    groups : int
        This option is passed to `conv1d` to split the input into groups for
        convolution. Input channels should be divisible by the number of groups.
    use_fft : bool
        When `use_fft` is passed `True`, then compute the convolution in the
        spectral domain using complex multiply. This is more efficient on CPU
        when the size of the kernel is large (e.g. reverberation). WARNING:
        Without padding, circular convolution occurs. This makes little
        difference in the case of reverberation, but may make more difference
        with different kernels.
    rotation_index : int
        This option only applies if `use_fft` is true. If so, the kernel is
        rolled by this amount before convolution to shift the output location.

    Returns
    -------
    The convolved waveform.

    Example
    -------
    >>> from speechbrain.dataio.dataio import read_audio
    >>> signal = read_audio('tests/samples/single-mic/example1.wav')
    >>> signal = signal.unsqueeze(0).unsqueeze(2)
    >>> kernel = torch.rand(1, 10, 1)
    >>> signal = convolve1d(signal, kernel, padding=(9, 0))
    r   z)Convolve1D expects a 3-dimensional tensorr   r   )r   padmoder   .Ndevice)r
   z1.6.0)n)signal_sizes)r   weightstridegroupspadding)r   r   
ValueError	transposer   tupler   nn
functionalr7   sizezerosr;   catr   parse__version__	torch.fftfftrfftirfftunbindstackconv1d)waveformkernelrA   pad_typer?   r@   use_fftrotation_indexzero_lengthrH   after_indexbefore_indexrM   result	convolvedf_signalf_kernelsig_realsig_imagker_realker_imagf_resultr&   r&   r'   
convolve1d   sX   7
	rd   c                 C   s  | j }t| j dkst|j dkrtt| j dkr#| dd} nt| j dkr/| d} t|j dkr?|dd}nt|j dkrK|d}t| | d|}| jddd\}}t| |d|d} t	| | d||} t|dkr| 
d
d} t|dkr| 
d} | S )	a  
    General function to contaminate a given signal with reverberation given a
    Room Impulse Response (RIR).
    It performs convolution between RIR and signal, but without changing
    the original amplitude of the signal.

    Arguments
    ---------
    waveforms : tensor
        The waveforms to normalize.
        Shape should be `[batch, time]` or `[batch, time, channels]`.
    rir_waveform : tensor
        RIR tensor, shape should be [time, channels].
    rescale_amp : str
        Whether reverberated signal is rescaled (None) and with respect either
        to original signal "peak" amplitude or "avg" average amplitude.
        Choose between [None, "avg", "peak"].

    Returns
    -------
    waveforms: tensor
        Reverberated signal.
    r   r   r   r9   r   T)axisr   )rS   rT   rV   rW   )r   r   r   r   r(   rG   r   r   rd   r5   r*   )r    rir_waveformrescale_amp
orig_shapeorig_amplitude	value_maxdirect_indexr&   r&   r'   reverberate?  s:   


rl   c                 C   s   d| d  S )a6  Returns the amplitude ratio, converted from decibels.

    Arguments
    ---------
    SNR : float
        The ratio in decibels to convert.

    Returns
    -------
    The amplitude ratio

    Example
    -------
    >>> round(dB_to_amplitude(SNR=10), 3)
    3.162
    >>> dB_to_amplitude(SNR=0)
    1.0
    
   r   r&   )SNRr&   r&   r'   r3     s   r3   e   皙?c                    s   d|   k rdksJ  J |d dksJ |d  t |  }| |7 }  fdd}|d| |  | }|t |9 }|t | }|d| |  | }|t |9 }|t |  }|   d7  < || dddS )a  Returns a notch filter constructed from a high-pass and low-pass filter.

    (from https://tomroelandts.com/articles/
    how-to-create-simple-band-pass-and-band-reject-filters)

    Arguments
    ---------
    notch_freq : float
        frequency to put notch as a fraction of the
        sampling rate / 2. The range of possible inputs is 0 to 1.
    filter_width : int
        Filter width in samples. Longer filters have
        smaller transition bands, but are more inefficient.
    notch_width : float
        Width of the notch, as a fraction of the sampling_rate / 2.

    Returns
    -------
    The computed filter

    Example
    -------
    >>> from speechbrain.dataio.dataio import read_audio
    >>> signal = read_audio('tests/samples/single-mic/example1.wav')
    >>> signal = signal.unsqueeze(0).unsqueeze(2)
    >>> kernel = notch_filter(0.25)
    >>> notched_signal = convolve1d(signal, kernel)
    r   r   r   c                    s:   dd }t || d  t d||  d d gS )zComputes the sinc function.c                 S   s   t | |  S )N)r   sin)xr&   r&   r'   _sinc  s   z)notch_filter.<locals>.sinc.<locals>._sincNr   )r   rI   ones)rr   rs   r7   r&   r'   sinc  s   2znotch_filter.<locals>.sincr   r9   )r   arangeblackman_windowr   view)
notch_freqfilter_widthnotch_widthinputsrv   hlpfhhpfr&   ru   r'   notch_filter  s   
r   c                 C   s   |   dd }|   dd \}}t||}|| }|| }||d  | }|| }	| jg |d|R  }
td|	d||}|  	| j
j}| d}| jg ||	|R  }|d||
 |jg |dR  }|S )a  Taken from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/utils.py

    Reconstructs a signal from a framed representation.
    Adds potentially overlapping frames of a signal with shape
    `[..., frames, frame_length]`, offsetting subsequent frames by `frame_step`.
    The resulting tensor has shape `[..., output_size]` where
        output_size = (frames - 1) * frame_step + frame_length

    Arguments
    ---------
    signal: A [..., frames, frame_length] torch.Tensor.
        All dimensions may be unknown, and rank must be at least 2.
    frame_step: int
        An integer denoting overlap offsets. Must be less than or equal to frame_length.

    Returns
    -------
    A Tensor with shape [..., output_size] containing the overlap-added frames of signal's inner-most two dimensions.
        output_size = (frames - 1) * frame_step + frame_length
    Based on https://github.com/tensorflow/tensorflow/blob/r1.12/tensorflow/contrib/signal/python/ops/reconstruction_ops.py

    Example
    -------
    >>> signal = torch.randn(5, 20)
    >>> overlapped = overlap_and_add(signal, 20)
    >>> overlapped.shape
    torch.Size([100])
    Nr   r9   r   )rG   mathgcdry   r   rw   unfoldclonedetachtor;   type
contiguous	new_zeros
index_add_)signal
frame_stepouter_dimensionsframesframe_lengthsubframe_lengthsubframe_stepsubframes_per_frameoutput_sizeoutput_subframessubframe_signalframer[   r&   r&   r'   overlap_and_add  s2   r   Tc           	      C   s   ||}t |dddddddf |dddddddf }t t | dt t t |dt t |dfd}|||jd d}|rQt|dd}|S )a  Function for resynthesizing waveforms from enhanced mags.

    Arguments
    ---------
    enhanced_mag : torch.Tensor
        Predicted spectral magnitude, should be three dimensional.
    noisy_inputs : torch.Tensor
        The noisy waveforms before any processing, to extract phase.
    stft : torch.nn.Module
        Module for computing the STFT for extracting phase.
    istft : torch.nn.Module
        Module for computing the iSTFT for resynthesis.
    normalize_wavs : bool
        Whether to normalize the output wavs before returning them.

    Returns
    -------
    enhanced_wav : torch.Tensor
        The resynthesized waveforms of the enhanced magnitudes with noisy phase.
    Nr   r   r9   )
sig_lengthr   )r"   )	r   atan2mulr   rI   cosrq   r   r.   )	enhanced_magnoisy_inputsstftistftnormalize_wavsnoisy_featsnoisy_phasecomplex_predictions	pred_wavsr&   r&   r'   resynthesize  s   <

r   c                 C   s   dt t dtj |  }t t jdd|dd   | d  ddd}|t j	}| t j	}t t 
t dt dt j|d|ddd }|t j	d}|t j	}|| | S )a*  
    Function for generating gabor impulse responses
    as used by GaborConv1d proposed in

    Neil Zeghidour, Olivier Teboul, F{'e}lix de Chaumont Quitry & Marco Tagliasacchi, "LEAF: A LEARNABLE FRONTEND
    FOR AUDIO CLASSIFICATION", in Proc of ICLR 2021 (https://arxiv.org/abs/2101.08596)
          ?       @r   r   r   r1   g        )r   r   tensorr   piexp	tensordotr   r   	complex64complex)tcenterfwhmdenominatorgaussiancenter_frequency_complex	t_complexsinusoidr&   r&   r'   gabor_impulse_responseE  s,   r   c           
      C   s  dt t dtj |  }t t jdd|dd   | d  ddd}t j|d| ddd}t j|j	d d|j
i}|d	d	d	d	df  d
|d	d	d	d	df  9  < |d	d	d	d	f |d	d	d	d	df< t j||j
d}t |d	d	d	d	df t |d	d	d	d	df  |d	d	d	d	df< t |d	d	d	d	df t |d	d	d	d	df  |d	d	d	d	df< t j|j	d d|j
i}|d
d|d	d	d	d	df  t |d
d|d	d	d	d	df   |d	d	d	d	df< |d
d|d	d	d	d	df  t |d
d|d	d	d	d	df   |d	d	d	d	df< t j|j	d d|j
i}	|d	d	d	d	df | |d	d	d	d	df t |  |	d	d	d	d	df< |d	d	d	d	df t | |d	d	d	d	df |  |	d	d	d	d	df< |	S )aM  
    Function for generating gabor impulse responses, but without using complex64 dtype
    as used by GaborConv1d proposed in

    Neil Zeghidour, Olivier Teboul, F{'e}lix de Chaumont Quitry & Marco Tagliasacchi, "LEAF: A LEARNABLE FRONTEND
    FOR AUDIO CLASSIFICATION", in Proc of ICLR 2021 (https://arxiv.org/abs/2101.08596)
    r   r   r   r   r   r   )r   r;   Nr9   r:   )r   r   r   r   r   r   r   r   rH   r   r;   
zeros_liker   rq   ry   )
r   r   r   r   r   temptemp2r   denominator_sinusoidoutputr&   r&   r'   %gabor_impulse_response_legacy_complexd  s<   4&JJ & &r   )Nr   r   )Nr   r)   )r   r/   )r   r   )r   r6   r   r   Fr   )r   )ro   rp   )T)__doc__r   r   	packagingr   r(   r.   r2   r5   rd   rl   r3   r   r   r   r   r   r&   r&   r&   r'   <module>   s,    

D
$
4

{I
?
;-