o
    .wiZ7                     @   s  d dl mZ d dlmZmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ er6es9d	gZed
ddedededejdef
ddZed
ddedededejdef
ddZed
ddedededededejdeeeeef fddZd:dedee defddZd ed!edefd"d#Zd;d%ed&edefd'd(Zd)ed*ed+edefd,d-Z	.	/	0		1	1d<d2eded3edededee d4ed5edefd6d	Z	.	/	0	7	1	1d=ded3edededee d4ed5eddfd8d9ZdS )>    )	lru_cache)ceilpi)OptionalN)Tensor)pad)rank_zero_warn)_GAMMATONE_AVAILABLE_TORCHAUDIO_AVAILABLE,speech_reverberation_modulation_energy_ratiod   )maxsizelow_freqfs	n_filtersdevicereturnc           	      C   sJ   ddl m} d}d}d}|||| | | ||  d|  }tj||dS )Nr   )centre_freqsg<;k"@g333338@   r   )gammatone.filtersr   torchtensor)	r   r   r   r   r   ear_qmin_bwordererbs r   _/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/functional/audio/srmr.py
_calc_erbs$   s   $r   	num_freqscutoffc                 C   s4   ddl m}m} || ||}|| |}tj||dS )Nr   )r   make_erb_filtersr   )r   r   r"   r   r   )r   r    r!   r   r   r"   cfsfcoefsr   r   r   _make_erb_filters/   s   
r%   min_cfmax_cfnqc              
      s   ||  d|d   }t j|t jd}| |d< td|D ]}||d  | ||< qdtdtdtfdd	 t j fd
ddt | | D dd}	dtdtdtdt	ttf fdd}
|j
|d}|	j
|d}	|
||\}}||	||fS )N      ?r   dtyper   w0r)   r   c                 S   sz   t | d } | | }t j|d| gt jd}t jd| | d  d| d  d d| | d  gt jd}t j||gddS )N   r   r+   r   dim)r   tanr   float64stack)r-   r)   b0bar   r   r   _make_modulation_filterC   s
   :zK_compute_modulation_filterbank_and_cutoffs.<locals>._make_modulation_filterc                    s   g | ]} |qS r   r   ).0r-   r7   r)   r   r   
<listcomp>J   s    z>_compute_modulation_filterbank_and_cutoffs.<locals>.<listcomp>r.   r/   r#   r   c                 S   sR   dt  |  | }t|d | }| || dt    }| || dt    }||fS )Nr.   )r   r   r1   )r#   r   r)   r-   r4   llrrr   r   r   _calc_cutoffsL   s
   zA_compute_modulation_filterbank_and_cutoffs.<locals>._calc_cutoffsr   )r   zerosr2   ranger   intr3   r   floattupleto)r&   r'   r(   r   r)   r   spacing_factorr#   kmfbr=   r;   r<   r   r9   r   *_compute_modulation_filterbank_and_cutoffs8   s   *"rG   xc                 C   s   |   rtd|d u r| jd }|d rt|d d }|dkr%tdtjj| |dd}tj|| j| jdd}|d	 dkrRd
 |d< ||d	 < d	|d
|d	 < nd
|d< d	|d
|d
 d	 < tjj	|| dd}|dd | jd f S )Nzx must be real.   r   zN must be positive.)r(   r0   F)r,   r   requires_gradr.   r   r/   .)

is_complex
ValueErrorshaper   r   fftr>   r,   r   ifft)rH   r(   x_ffthyr   r   r   _hilbertZ   s"   
rT   wavecoefsc                 C   s   ddl m} | j\}}| j|jd|d|} | d|jd d} |dddf }|dddf }|ddd	f }|ddd
f }|dddf }	|ddddf }
|| |
|dd}|||
|dd}|||
|dd}|||
|	dd}||ddd S )zTranslated from gammatone package.

    Args:
        wave: shape [B, time]
        coefs: shape [N, 10]

    Returns:
        Tensor: shape [B, N, time]

    r   lfilterr+   r   rI   N	   )r   r      )r   r.   rZ   )r      rZ   )r      rZ      T)batching)torchaudio.functional.filteringrX   rN   rC   r,   reshapeexpand)rU   rV   rX   	num_batchtimegainas1as2as3as4bsy1y2y3y4r   r   r   _erb_filterbanks   s   
rn         >@energydrangec                 C   sb   t j| dddjdddj}|jdddj}|d| d   }t | |k || } t | |k|| S )zNormalize energy to a dynamic range of 30 dB.

    Args:
        energy: shape [B, N_filters, 8, n_frames]
        drange: dynamic range in dB

    r   Tr0   keepdimr.   r[   g      $@)r   meanmaxvalueswhere)rp   rq   peak_energy
min_energyr   r   r   _normalize_energy   s
   rz   bw
avg_energycutoffsc                 C   s   |d | kr|d | krd}n+|d | kr|d | krd}n|d | kr-|d | kr-d}n|d | kr6d}nt dt|ddddf t|ddd|f  S )zCalculate srmr score.r\   rZ   r]         z7Something wrong with the cutoffs compared to bw values.N)rM   r   sum)r{   r|   r}   kstarr   r   r   _cal_srmr_score   s   4r      }   r\   Fpredsn_cochlear_filtersnormfastc           -   	   C   s  t rtstdddlm} ddlm}	 t|||||||d | j}
t	|
dkr.| 
ddn| 
d|
d } | j\}}t| sM| tjt| jj } |  jddd	j}t|dk|tjd
|j|jd}| | } d}d}|rtd d}g }|    }t|D ]}||| |dd||}|t| qtj|ddj| jd}nt|||| jd}ttt | |}|}t!|| }t!|| }|du r|rdnd}t"||d|d| jd\}}}}t#d|| |  }tj$|d tj| jddd }|	|%d&dd|jd d|dddddf |dddddf ddd}dtt!|| | | || f} t'|| ddd}!|!(d||}"|"dd|ddf | d j)dd}#|r^t*|#}#t+t,|||| jd}$tj-|#dd}%tj)|%
|ddd}&tj)|%dd}'|'d  |&
dd }(|(.d/d})t0|)d!k/ddkdddf }*|$|* }+g }t|D ]}t1|+| |%| |d"},||, qt|},t	|
dkr|,j
|
dd  S |,S )#a  Calculate `Speech-to-Reverberation Modulation Energy Ratio`_ (SRMR).

    SRMR is a non-intrusive metric for speech quality and intelligibility based on
    a modulation spectral representation of the speech signal.
    This code is translated from SRMRToolbox and `SRMRpy`_.

    Args:
        preds: shape ``(..., time)``
        fs: the sampling rate
        n_cochlear_filters: Number of filters in the acoustic filterbank
        low_freq: determines the frequency cutoff for the corresponding gammatone filterbank.
        min_cf: Center frequency in Hz of the first modulation filter.
        max_cf: Center frequency in Hz of the last modulation filter. If None is given,
            then 30 Hz will be used for `norm==False`, otherwise 128 Hz will be used.
        norm: Use modulation spectrum energy normalization
        fast: Use the faster version based on the gammatonegram.
            Note: this argument is inherited from `SRMRpy`_. As the translated code is based to pytorch,
            setting `fast=True` may slow down the speed for calculating this metric on GPU.

    .. hint::
        Usingsing this metrics requires you to have ``gammatone`` and ``torchaudio`` installed.
        Either install as ``pip install torchmetrics[audio]`` or ``pip install torchaudio``
        and ``pip install git+https://github.com/detly/gammatone``.

    .. attention::
        This implementation is experimental, and might not be consistent with the matlab
        implementation SRMRToolbox, especially the fast implementation.
        The slow versions, a) ``fast=False, norm=False, max_cf=128``, b) ``fast=False, norm=True, max_cf=30``,
        have a relatively small inconsistency.

    Returns:
        Scalar tensor with srmr value with shape ``(...)``

    Raises:
        ModuleNotFoundError:
            If ``gammatone`` or ``torchaudio`` package is not installed

    Example:
        >>> from torch import randn
        >>> from torchmetrics.functional.audio import speech_reverberation_modulation_energy_ratio
        >>> preds = randn(8000)
        >>> speech_reverberation_modulation_energy_ratio(preds, 8000)
        tensor([0.3191], dtype=torch.float64)

    a  speech_reverberation_modulation_energy_ratio requires you to have `gammatone` and `torchaudio>=0.10` installed. Either install as ``pip install torchmetrics[audio]`` or ``pip install torchaudio>=0.10`` and ``pip install git+https://github.com/detly/gammatone``r   )
fft_gtgramrW   r   r   r   r&   r'   r   r   r   rI   Trr   r*   )r,   r   gMb?gMb?z:`fast=True` may slow down the speed of SRMR metric on GPU.g      y@g{Gz?g{Gzd?r/   r   N      r   r.   )r(   r   r)   r   F)clampr^   constant)r   modevalue.r   Z   )r}   )2r
   r	   ModuleNotFoundErrorgammatone.fftweightr   r_   rX   _srmr_arg_validaterN   lenr`   r   is_floating_pointrC   r2   finfor,   ru   absrv   rw   r   r   r   detachcpunumpyr?   appendr3   r%   rT   rn   r   rG   r@   hamming_window	unsqueezera   r   unfoldr   rz   flipudr   rt   flipcumsumnonzeror   )-r   r   r   r   r&   r'   r   r   r   rX   rN   rb   rc   max_valsval_norm
w_length_sw_inc_smfstemppreds_npr5   gt_env_bgt_envr$   w_lengthw_inc_mfr}   
num_frameswmod_outpaddingmod_out_padmod_out_framerp   r   r|   total_energy	ac_energyac_percac_perc_cumsumk90perc_idxr{   scorer   r   r   r      s   7	(

 F"&$
$r   c                 C   s   t | tr	| dkstd|  t |tr|dks td| t |ttfr+|dks2td| t |ttfr=|dksDtd| |durZt |ttfrS|dksZtd| t |tsctdt |tsltd	dS )
a9  Validate the arguments for speech_reverberation_modulation_energy_ratio.

    Args:
        fs: the sampling rate
        n_cochlear_filters: Number of filters in the acoustic filterbank
        low_freq: determines the frequency cutoff for the corresponding gammatone filterbank.
        min_cf: Center frequency in Hz of the first modulation filter.
        max_cf: Center frequency in Hz of the last modulation filter. If None is given,
        norm: Use modulation spectrum energy normalization
        fast: Use the faster version based on the gammatonegram.

    r   z;Expected argument `fs` to be an int larger than 0, but got zKExpected argument `n_cochlear_filters` to be an int larger than 0, but got zBExpected argument `low_freq` to be a float larger than 0, but got z@Expected argument `min_cf` to be a float larger than 0, but got Nz@Expected argument `max_cf` to be a float larger than 0, but got z+Expected argument `norm` to be a bool valuez+Expected argument `fast` to be a bool value)
isinstancer@   rM   rA   boolr   r   r   r   r   E  s"   

r   )N)ro   )r   r   r\   NFF)r   r   r\   r   FF)	functoolsr   mathr   r   typingr   r   r   torch.nn.functionalr   torchmetrics.utilitiesr   torchmetrics.utilities.importsr	   r
   __doctest_skip__rA   r@   r   r   r%   rB   rG   rT   rn   rz   r   r   r   r   r   r   r   r   <module>   s   "
"!	
 