o
    zi4                     @   s>   d dl Z d dlZd dlZd dlZddlmZ G dd dZdS )    N   )utilc                   @   s  e Zd ZdZdZdZdd Z	d,dededefdd	Z		d,dededefd
dZ
defddZ	d-dejejejef defddZ	d-dejejejef defddZ	d.dejejejef dejejejef defddZ	d.dejejejef dejejejef defddZ	d.dejejejef defd d!Zd"ejejejef fd#d$Zd%ejejejef fd&d'Zd/d)efd*d+ZdS )0DSPMixinNc                 C   sd   | j | _| j| _t|| j }t|| j }|| dkr$|| }|| }| || | j| _||fS )Nr   )	
batch_size_original_batch_sizenum_channels_original_num_channelsintsample_ratezero_padsignal_length_padded_signal_length)selfwindow_durationhop_durationwindow_length
hop_lengthfactor r   G/home/ubuntu/.local/lib/python3.10/site-packages/audiotools/core/dsp.py _preprocess_signal_for_windowing   s   z)DSPMixin._preprocess_signal_for_windowingTr   r   
preprocessc           
      c   s    |r|  ||\}}| jdd| j| _t| jD ]&}d}|| }	 || }|d7 }|| }	|	| jkr5n| |d||	f V  q#qdS )aF  Generator which yields windows of specified duration from signal with a specified
        hop length.

        Parameters
        ----------
        window_duration : float
            Duration of every window in seconds.
        hop_duration : float
            Hop between windows in seconds.
        preprocess : bool, optional
            Whether to preprocess the signal, so that the first sample is in
            the middle of the first window, by default True

        Yields
        ------
        AudioSignal
            Each window is returned as an AudioSignal.
        r   r   T.N)r   
audio_datareshaper   ranger   )
r   r   r   r   r   r   bi	start_idxend_idxr   r   r   windows   s&   
zDSPMixin.windowsc                 C   sb   |r
|  ||\}}tjjj| jddd| jd|fd|fd}|ddddd|}|| _| S )a  Reshapes signal into windows of specified duration from signal with a specified
        hop length. Window are placed along the batch dimension. Use with
        :py:func:`audiotools.core.dsp.DSPMixin.overlap_and_add` to reconstruct the
        original signal.

        Parameters
        ----------
        window_duration : float
            Duration of every window in seconds.
        hop_duration : float
            Hop between windows in seconds.
        preprocess : bool, optional
            Whether to preprocess the signal, so that the first sample is in
            the middle of the first window, by default True

        Returns
        -------
        AudioSignal
            AudioSignal unfolded with shape ``(nb * nch * num_windows, 1, window_length)``
        r   r   )kernel_sizestrider      )	r   torchnn
functionalunfoldr   r   r   permute)r   r   r   r   r   r   unfoldedr   r   r   collect_windowsF   s   zDSPMixin.collect_windowsc           	      C   s   t || j }| j}| j| j}}| j|| d|ddd}tj	j
j|d| jfd|fd|fd}tj||jd}tj	j
j|d| jfd|fd|fd}|| }|||d}|| _| || | S )a  Function which takes a list of windows and overlap adds them into a
        signal the same length as ``audio_signal``.

        Parameters
        ----------
        hop_duration : float
            How much to shift for each window
            (overlap is window_duration - hop_duration) in seconds.

        Returns
        -------
        AudioSignal
            overlap-and-added signal.
        r   r   r#   r   )output_sizer!   r"   device)r	   r
   r   r   r   r   r   r(   r$   r%   r&   foldr   	ones_liker-   trim)	r   r   r   r   nbnchr)   foldednormr   r   r   overlap_and_addn   s,   zDSPMixin.overlap_and_add3   cutoffszerosc                 C   t   t |d| j}|| j }t| j}t|D ]\}}tj	|
 |d| j}|| j| ||< q|| _d| _| S )a9  Low-passes the signal in-place. Each item in the batch
        can have a different low-pass cutoff, if the input
        to this signal is an array or tensor. If a float, all
        items are given the same low-pass filter.

        Parameters
        ----------
        cutoffs : typing.Union[torch.Tensor, np.ndarray, float]
            Cutoff in Hz of low-pass filter.
        zeros : int, optional
            Number of taps to use in low-pass filter, by default 51

        Returns
        -------
        AudioSignal
            Low-passed AudioSignal.
        r#   r8   N)r   ensure_tensorr   r
   r$   
empty_liker   	enumeratejuliusLowPassFiltercputor-   	stft_data)r   r7   r8   filteredr   cutoff	lp_filterr   r   r   low_pass      
zDSPMixin.low_passc                 C   r9   )a?  High-passes the signal in-place. Each item in the batch
        can have a different high-pass cutoff, if the input
        to this signal is an array or tensor. If a float, all
        items are given the same high-pass filter.

        Parameters
        ----------
        cutoffs : typing.Union[torch.Tensor, np.ndarray, float]
            Cutoff in Hz of high-pass filter.
        zeros : int, optional
            Number of taps to use in high-pass filter, by default 51

        Returns
        -------
        AudioSignal
            High-passed AudioSignal.
        r#   r:   N)r   r;   r   r
   r$   r<   r   r=   r>   HighPassFilterr@   rA   r-   rB   )r   r7   r8   rC   r   rD   	hp_filterr   r   r   	high_pass   rG   zDSPMixin.high_pass        fmin_hzfmax_hzvalc           	      C   s   | j | j}}tj||jd}tj||jd}t||k s J |jd }tjd| j	d || j
d}|dddddf | jdd|jd }||k||k @ }|| j
}|||}|||}|td	|  | _| S )
a  Masks frequencies between ``fmin_hz`` and ``fmax_hz``, and fills them
        with the value specified by ``val``. Useful for implementing SpecAug.
        The min and max can be different for every item in the batch.

        Parameters
        ----------
        fmin_hz : typing.Union[torch.Tensor, np.ndarray, float]
            Lower end of band to mask out.
        fmax_hz : typing.Union[torch.Tensor, np.ndarray, float]
            Upper end of band to mask out.
        val : float, optional
            Value to fill in, by default 0.0

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        ndimr   r#   r,   Nr   r                 ?)	magnitudephaser   r;   rP   r$   allshapelinspacer
   r-   repeatr   rA   masked_fillexprB   )	r   rL   rM   rN   magrT   nbinsbins_hzmaskr   r   r   mask_frequencies   s   
zDSPMixin.mask_frequenciestmin_stmax_sc           	      C   s   | j | j}}tj||jd}tj||jd}t||k s J |jd }tjd| j	|| j
d}|dddddf | jd|jd d}||k||k @ }|||}|||}|td|  | _| S )	a  Masks timesteps between ``tmin_s`` and ``tmax_s``, and fills them
        with the value specified by ``val``. Useful for implementing SpecAug.
        The min and max can be different for every item in the batch.

        Parameters
        ----------
        tmin_s : typing.Union[torch.Tensor, np.ndarray, float]
            Lower end of timesteps to mask out.
        tmax_s : typing.Union[torch.Tensor, np.ndarray, float]
            Upper end of timesteps to mask out.
        val : float, optional
            Value to fill in, by default 0.0

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        rO   r   r   r,   Nr   rQ   rR   )rS   rT   r   r;   rP   r$   rU   rV   rW   signal_durationr-   rX   r   rY   rZ   rB   )	r   r`   ra   rN   r[   rT   ntbins_tr^   r   r   r   mask_timesteps  s   
zDSPMixin.mask_timesteps	db_cutoffc                 C   s<   | j }|  }tj||jd}||k }|||}|| _ | S )a,  Mask away magnitudes below a specified threshold, which
        can be different for every item in the batch.

        Parameters
        ----------
        db_cutoff : typing.Union[torch.Tensor, np.ndarray, float]
            Decibel value for which things below it will be masked away.
        val : float, optional
            Value to fill in for masked portions, by default 0.0

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        rO   )rS   log_magnituder   r;   rP   rY   )r   rf   rN   r[   log_magr^   r   r   r   mask_low_magnitudes3  s   zDSPMixin.mask_low_magnitudesshiftc                 C   s"   t j|| jjd}| j| | _| S )a`  Shifts the phase by a constant value.

        Parameters
        ----------
        shift : typing.Union[torch.Tensor, np.ndarray, float]
            What to shift the phase by.

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        rO   )r   r;   rT   rP   )r   rj   r   r   r   shift_phaseP  s   zDSPMixin.shift_phasescalec                 C   s.   t j|| jjd}| j|t| j  | _| S )a  Corrupts the phase randomly by some scaled value.

        Parameters
        ----------
        scale : typing.Union[torch.Tensor, np.ndarray, float]
            Standard deviation of noise to add to the phase.

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        rO   )r   r;   rT   rP   r$   
randn_like)r   rl   r   r   r   corrupt_phaseb  s   zDSPMixin.corrupt_phase333333?coefc                 C   s^   t d| dgddd| j}| jdd| j}t jj	j
||dd}|j| jj | _| S )a:  Applies pre-emphasis to audio signal.

        Parameters
        ----------
        coef : float, optional
            How much pre-emphasis to apply, lower values do less. 0 does nothing.
            by default 0.85

        Returns
        -------
        AudioSignal
            Pre-emphasized signal.
        r   r   r   )padding)r$   tensorviewrA   r-   r   r   r   r%   r&   conv1drV   )r   rp   kernelxr   r   r   preemphasist  s
   $zDSPMixin.preemphasis)T)r6   )rK   )ro   )__name__
__module____qualname__r   r   r   r   floatboolr    r*   r5   typingUnionr$   Tensornpndarrayr	   rF   rJ   r_   re   ri   rk   rn   rw   r   r   r   r   r   
   sv    
(
(,
!
$
1
.
r   )r}   r>   numpyr   r$    r   r   r   r   r   r   <module>   s    