o
    ziQ                     @   sT   d dl Z d dlZd dlZd dlZd dlZddlmZ G dd dZG dd dZ	dS )    N   )utilc                   @   s  e Zd Zedd Z	 ddddddid	d
dddddddddidZ	 		dJdeje	j
ejef deje	j
ejf fddZdKdefddZ			dLdeje	j
ejef deje	j
ejf defddZdMd!efd"d#ZdNd%eje	j
ejef fd&d'Zd%eje	j
ejef fd(d)Zd*d+ Zd,d- ZdKd.ed/efd0d1ZdKd2ed/efd3d4Z					dOd5eded6ed7ed8ef
d9d:Zd;efd<d=Zd%eje	j
ejf fd>d?Zd@eje	j
ejef fdAdBZdCeje	j
ejef fdDdEZdCeje	j
ejef fdFdGZdHdI Z dS )PEffectMixin
      wavULAW   )formatencodingbits_per_sampler
   gsmmp3i)r
   compressionvorbisoggamr-nb)z8-bitzGSM-FRMP3VorbisOggzAmr-nbNsnrother_eqc                 C   sv   t || j}td| j|j }|d| || j |dur'||}| 	 | }|
|}| j|j | _| S )ae  Mixes noise with signal at specified
        signal-to-noise ratio. Optionally, the
        other signal can be equalized in-place.


        Parameters
        ----------
        other : AudioSignal
            AudioSignal object to mix with.
        snr : typing.Union[torch.Tensor, np.ndarray, float], optional
            Signal to noise ratio, by default 10
        other_eq : typing.Union[torch.Tensor, np.ndarray], optional
            EQ curve to apply to other signal, if any, by default None

        Returns
        -------
        AudioSignal
            In-place modification of AudioSignal.
        r   N)r   ensure_tensortodevicemaxsignal_lengthzero_padtruncate_samples	equalizerloudness	normalize
audio_data)selfotherr   r   pad_lentgt_loudness r(   K/home/ubuntu/.local/lib/python3.10/site-packages/audiotools/core/effects.pymix   s   

zEffectMixin.mixTstart_at_maxc                 C   sD  ddl m} | j|j }|dkr|d| n|| j |rM|j jdd}t	|j}t
|jD ]}t|j| ||   d||< q3|||j}t	|j}d|d< | j}	tj||	}
tj|j|	}tj| j|	}|| }tj||	}||
 }tj||	}| jdddd }d|d	 }|| }|| _| S )
a  Convolves self with other.
        This function uses FFTs to do the convolution.

        Parameters
        ----------
        other : AudioSignal
            Signal to convolve with.
        start_at_max : bool, optional
            Whether to start at the max value of other signal, to
            avoid inducing delays, by default True

        Returns
        -------
        AudioSignal
            Convolved signal, in-place.
        r   )AudioSignalr   r   )axis).r   Tdimkeepdimsgh㈵>) r,   r   r   r   r#   absargmaxtorch
zeros_likerange
batch_sizerollitemsample_ratefftrfftirfftr   clamp)r$   r%   r+   r,   r&   idxirsideltalength	delta_fft	other_fftself_fftconvolved_fftconvolved_audiodelta_convolved_fftdelta_audio	delta_maxscaler(   r(   r)   convolveB   s4   $zEffectMixin.convolveFdrrir_equse_original_phasec           	      C   s   |dur	| |}|dur||}| j jdddj}| j}| | |r:|   | j	t
d|  | _|   | j jdddj}|d|d }| | } | S )a  Applies an impulse response to the signal. If ` is`ir_eq``
        is specified, the impulse response is equalized before
        it is applied, using the given curve.

        Parameters
        ----------
        ir : AudioSignal
            Impulse response to convolve with.
        drr : typing.Union[torch.Tensor, np.ndarray, float], optional
            Direct-to-reverberant ratio that impulse response will be
            altered to, if specified, by default None
        ir_eq : typing.Union[torch.Tensor, np.ndarray], optional
            Equalization that will be applied to impulse response
            if specified, by default None
        use_original_phase : bool, optional
            Whether to use the original phase, instead of the convolved
            phase, by default False

        Returns
        -------
        AudioSignal
            Signal with impulse response applied to it
        Nr   Tr.   y              ?g:0yE>)r    	alter_drrr#   r2   r   valuesphaserM   stft	magnituder4   exp	stft_dataistftr>   )	r$   irrN   rO   rP   max_spkrS   max_transformedscale_factorr(   r(   r)   apply_ir}   s   


zEffectMixin.apply_ir      ?r   c                 C   sJ   | j  jdddd }t|}||||k  |||k< | j | | _ | S )a   Ensures that ``abs(audio_data) <= max``.

        Parameters
        ----------
        max : float, optional
            Max absolute value of signal, by default 1.0

        Returns
        -------
        AudioSignal
            Signal with values scaled between -max and max.
        r   Tr.   r   )r#   r2   r   r4   	ones_like)r$   r   peak	peak_gainr(   r(   r)   ensure_max_of_audio   s
   
zEffectMixin.ensure_max_of_audio      8dbc                 C   sP   t || j}|  }|| }t|| j }| j|ddddf  | _| S )a  Normalizes the signal's volume to the specified db, in LUFS.
        This is GPU-compatible, making for very fast loudness normalization.

        Parameters
        ----------
        db : typing.Union[torch.Tensor, np.ndarray, float], optional
            Loudness to normalize to, by default -24.0

        Returns
        -------
        AudioSignal
            Normalized audio signal.
        N)	r   r   r   r   r!   r4   rV   GAIN_FACTORr#   )r$   rd   ref_dbgainr(   r(   r)   r"      s   zEffectMixin.normalizec                 C   sD   t j|dd| j}t|| j }| j|ddddf  | _| S )a  Change volume of signal by some amount, in dB.

        Parameters
        ----------
        db : typing.Union[torch.Tensor, np.ndarray, float]
            Amount to change volume by.

        Returns
        -------
        AudioSignal
            Signal at new volume.
        r   ndimN)r   r   r   r   r4   rV   re   r#   )r$   rd   rg   r(   r(   r)   volume_change   s   zEffectMixin.volume_changec                 C   s   | j d| j}|S Nr   )r#   reshaper   r$   waveformr(   r(   r)   _to_2d   s   zEffectMixin._to_2dc                 C   s   | | j| jdS rk   )rl   r7   num_channelsrm   r(   r(   r)   _to_3d   s   zEffectMixin._to_3dn_semitonesquickc                 C   sz   | j }dt|d gdt| jgg}|r|d dd |   }tjj|| j|dd\}}|| _| 	|| _
| |S )	a  Pitch shift the signal. All items in the batch
        get the same pitch shift.

        Parameters
        ----------
        n_semitones : int
            How many semitones to shift the signal by.
        quick : bool, optional
            Using quick pitch shifting, by default True

        Returns
        -------
        AudioSignal
            Pitch shifted audio signal.
        pitchd   rater   r   -qTchannels_firstr   strr:   insertro   cpu
torchaudiosox_effectsapply_effects_tensorrq   r#   r   )r$   rr   rs   r   effectsrn   r:   r(   r(   r)   pitch_shift   s   


zEffectMixin.pitch_shiftfactorc                 C   sv   | j }dt|gdt| jgg}|r|d dd |   }tjj|| j|dd\}}|| _| 	|| _
| |S )a  Time stretch the audio signal.

        Parameters
        ----------
        factor : float
            Factor by which to stretch the AudioSignal. Typically
            between 0.8 and 1.2.
        quick : bool, optional
            Whether to use quick time stretching, by default True

        Returns
        -------
        AudioSignal
            Time-stretched AudioSignal.
        temporv   r   r   rw   Trx   rz   )r$   r   rs   r   r   rn   r:   r(   r(   r)   time_stretch  s   



zEffectMixin.time_stretchpresetr   r   r   c                    s   dt jv }|r	S ||||d |dur.|jv rj|  ntd| dtj    d dv rNtj fdd	t	j
d
 D d
d}nt jjjfi  }|}|_S )a  Applies an audio codec to the signal.

        Parameters
        ----------
        preset : str, optional
            One of the keys in ``self.CODEC_PRESETS``, by default None
        format : str, optional
            Format for audio codec, by default "wav"
        encoding : str, optional
            Encoding to use, by default None
        bits_per_sample : int, optional
            How many bits per sample, by default None
        compression : int, optional
            Compression amount of codec, by default None

        Returns
        -------
        AudioSignal
            AudioSignal with codec applied.

        Raises
        ------
        ValueError
            If preset is not in ``self.CODEC_PRESETS``, an error
            is thrown.
        z0.7)r
   r   r   r   NzUnknown preset: z. Known presets: r
   )r   r   r   r   c                    s4   g | ]}t jj| d d d f jfi  qS N)r~   
functionalapply_codecr:   ).0rA   kwargsr$   rn   r(   r)   
<listcomp>q  s    z+EffectMixin.apply_codec.<locals>.<listcomp>r   r/   )r~   __version__CODEC_PRESETS
ValueErrorlistkeysro   r4   catr6   shaper   r   r:   rq   r#   )r$   r   r
   r   r   r   torchaudio_version_070	augmentedr(   r   r)   r   7  s@   
"


zEffectMixin.apply_codecn_bandsc                 C   s4   t | j| | j}|| j}|ddddS )a  Breaks signal into mel bands.

        Parameters
        ----------
        n_bands : int
            Number of mel bands to use.

        Returns
        -------
        torch.Tensor
            Mel-filtered bands, with last axis being the band index.
        r         r   )julius
SplitBandsr:   floatr   r   r#   permute)r$   r   
filterbankfilteredr(   r(   r)   mel_filterbank  s   
zEffectMixin.mel_filterbankc                 C   s   t |}|jd }| |}|jdkr(|jd dkr'|jd |jd ks'J n|d}d| | j }||ddddddf  }|	d}|| _
| S )a  Applies a mel-spaced equalizer to the audio signal.

        Parameters
        ----------
        db : typing.Union[torch.Tensor, np.ndarray]
            EQ curve to apply.

        Returns
        -------
        AudioSignal
            AudioSignal with equalization applied.
        r   r   r   r   r   N)r   r   r   r   ri   	unsqueezer   r   r   sumr#   )r$   rd   r   fbankweightseq_audio_datar(   r(   r)   r      s   





zEffectMixin.equalizerclip_percentilec                 C   s   t j|dd}tj| j|d dd}tj| jd|d  dd}| jjd }|ddd|ddf }|ddd|ddf }| j||| _| S )a  Clips the signal at a given percentile. The higher it is,
        the lower the threshold for clipping.

        Parameters
        ----------
        clip_percentile : typing.Union[torch.Tensor, np.ndarray, float]
            Values are between 0.0 to 1.0. Typical values are 0.1 or below.

        Returns
        -------
        AudioSignal
            Audio signal with clipped audio data.
        r   rh   r   r   r   N)r   r   r4   quantiler#   r   r>   )r$   r   
min_thresh
max_threshncr(   r(   r)   clip_distortion  s   zEffectMixin.clip_distortionquantization_channelsc                 C   sb   t j|dd}| j}|d d }|| }| }|| }d| d }| j|  }| j| | _| S )aU  Applies quantization to the input waveform.

        Parameters
        ----------
        quantization_channels : typing.Union[torch.Tensor, np.ndarray, int]
            Number of evenly spaced quantization channels to quantize
            to.

        Returns
        -------
        AudioSignal
            Quantized AudioSignal.
        r   rh   r   r   )r   r   r#   floordetach)r$   r   xresidualr(   r(   r)   quantization  s   zEffectMixin.quantizationc                 C   s   |d }t j|dd}| j}t|t|t|  t| }|d d | d tj}|| d d }t|t	t|t| d  | }| j| 
 }| j| | _| S )a\  Applies mu-law quantization to the input waveform.

        Parameters
        ----------
        quantization_channels : typing.Union[torch.Tensor, np.ndarray, int]
            Number of mu-law spaced quantization channels to quantize
            to.

        Returns
        -------
        AudioSignal
            Quantized AudioSignal.
        r^   r   rh   r   r   g      ?)r   r   r#   r4   signlog1pr2   r   int64rV   r   )r$   r   mur   r   r(   r(   r)   mulaw_quantization  s   (,zEffectMixin.mulaw_quantizationc                 C   s
   |  |S r   )rM   )r$   r%   r(   r(   r)   
__matmul__  s   
zEffectMixin.__matmul__)r   N)T)NNF)r^   )rc   )Nr   NNN)!__name__
__module____qualname__nplogre   r   typingUnionr4   Tensorndarrayr   r*   boolrM   r]   rb   r"   rj   ro   rq   intr   r   r{   r   r   r    r   r   r   r   r(   r(   r(   r)   r      s    

'>
8  "
K


!r   c                   @   sJ   e Zd ZdZdd Zdd Zedd Zdej	e
jejef fd	d
ZdS )ImpulseResponseMixina  These functions are generally only used with AudioSignals that are derived
    from impulse responses, not other sources like music or speech. These methods
    are used to replicate the data augmentation described in [1].

    1.  Bryan, Nicholas J. "Impulse response data augmentation and deep
        neural networks for blind room acoustic parameter estimation."
        ICASSP 2020-2020 IEEE International Conference on Acoustics,
        Speech and Signal Processing (ICASSP). IEEE, 2020.
    c           
      C   s  t j| jddd}t| jd }t j| jjd | jdddddf }|| j	dd}||| k||| k }t j
| j| jd}| j| ||< | }t j
| j| jd}| j| ||< t j
| j| jd}t| j	D ]}||df  }	| d|	jd | j||d	|	f< qh|||fS )
zTDecomposes an impulse response into early and late
        field responses.
        r   T)r/   keepdimg{Gzd?)r   Nr   hann.)r4   r3   r#   r   r:   aranger   r   expandr7   r5   r6   nonzero
get_window)
r$   tdt0r?   	early_idxearly_responselate_idx
late_fieldwindow
window_idxr(   r(   r)   decompose_ir  s"   	&
z!ImpulseResponseMixin.decompose_irc                 C   sD   |   \}}}|d jdd}|d jdd}dt||  }|S )zMeasures the direct-to-reverberant ratio of the impulse
        response.

        Returns
        -------
        float
            Direct-to-reverberant ratio
        r   r   r   r   )r   r   r4   log10)r$   r   r   _numdenrN   r(   r(   r)   measure_drr@  s
   	z ImpulseResponseMixin.measure_drrc                 C   s   |d }d| d }| d }|d }|| j dd}dd|  | | j dd}	|| j ddtd|d |j dd  }
|	d d| |
   }t|	 | d|  |	 | d|  }|S )zSUsed to solve for the alpha value, which is used
        to alter the drr.
        r   r   r   r   r      )r   r4   powsqrtmaximum)r   r   wd
target_drrwd_sqwd_sq_1e_sql_sqabcexpralphar(   r(   r)   solve_alphaO  s   	 
z ImpulseResponseMixin.solve_alpharN   c                 C   s   t |d| j| j}|  \}}}| ||||}| jddd | jddd  }t	
||d }|| | d| |  | }|| _|   | S )a  Alters the direct-to-reverberant ratio of the impulse response.

        Parameters
        ----------
        drr : typing.Union[torch.Tensor, np.ndarray, float]
            Direct-to-reverberant ratio that impulse response will be
            altered to, if specified, by default None

        Returns
        -------
        AudioSignal
            Altered impulse response.
        r   r   r   r   ).Nr   )r   r   r7   r   r   r   r   r2   r   r4   r   r#   rb   )r$   rN   r   r   r   r   	min_alphaaug_ir_datar(   r(   r)   rQ   i  s   &

zImpulseResponseMixin.alter_drrN)r   r   r   __doc__r   r   staticmethodr   r   r   r4   r   r   r   r   rQ   r(   r(   r(   r)   r     s    
$
"r   )
r   r   numpyr   r4   r~   r1   r   r   r   r(   r(   r(   r)   <module>   s        
