o
    .wi                     @   s   d dl mZ d dlmZmZmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZ dd	giZes>d
gZG dd deZdS )    )Sequence)AnyOptionalUnion)Tensortensor)$perceptual_evaluation_speech_quality)Metric)_MATPLOTLIB_AVAILABLE_PESQ_AVAILABLE)_AX_TYPE_PLOT_OUT_TYPE!PerceptualEvaluationSpeechQualitypesq&PerceptualEvaluationSpeechQuality.plotc                       s   e Zd ZU dZeed< eed< dZeed< dZeed< dZ	eed< d	Z
eed
< dZeed< 	d dededededdf
 fddZdededdfddZdefddZd!deeee df dee defddZ  ZS )"r   a
  Calculate `Perceptual Evaluation of Speech Quality`_ (PESQ).

    It's a recognized industry standard for audio quality that takes into considerations characteristics such as:
    audio sharpness, call volume, background noise, clipping, audio interference etc. PESQ returns a score between
    -0.5 and 4.5 with the higher scores indicating a better quality.

    This metric is a wrapper for the `pesq package`_. Note that input will be moved to ``cpu`` to perform the metric
    calculation.

    As input to ``forward`` and ``update`` the metric accepts the following input

    - ``preds`` (:class:`~torch.Tensor`): float tensor with shape ``(...,time)``
    - ``target`` (:class:`~torch.Tensor`): float tensor with shape ``(...,time)``

    As output of `forward` and `compute` the metric returns the following output

    - ``pesq`` (:class:`~torch.Tensor`): float tensor of PESQ value reduced across the batch

    .. hint::
        Using this metrics requires you to have ``pesq`` install. Either install as ``pip install
        torchmetrics[audio]`` or ``pip install pesq``. ``pesq`` will compile with your currently
        installed version of numpy, meaning that if you upgrade numpy at some point in the future you will
        most likely have to reinstall ``pesq``.

    .. caution::
        The ``forward`` and ``compute`` methods in this class return a single (reduced) PESQ value
        for a batch. To obtain a PESQ value for each sample, you may use the functional counterpart in
        :func:`~torchmetrics.functional.audio.pesq.perceptual_evaluation_speech_quality`.

    Args:
        fs: sampling frequency, should be 16000 or 8000 (Hz)
        mode: ``'wb'`` (wide-band) or ``'nb'`` (narrow-band)
        keep_same_device: whether to move the pesq value to the device of preds
        n_processes: integer specifying the number of processes to run in parallel for the metric calculation.
            Only applies to batches of data and if ``multiprocessing`` package is installed.
        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.

    Raises:
        ModuleNotFoundError:
            If ``pesq`` package is not installed
        ValueError:
            If ``fs`` is not either  ``8000`` or ``16000``
        ValueError:
            If ``mode`` is not either ``"wb"`` or ``"nb"``

    Example:
        >>> from torch import randn
        >>> from torchmetrics.audio import PerceptualEvaluationSpeechQuality
        >>> preds = randn(8000)
        >>> target = randn(8000)
        >>> pesq = PerceptualEvaluationSpeechQuality(8000, 'nb')
        >>> pesq(preds, target)
        tensor(2.2885)
        >>> wb_pesq = PerceptualEvaluationSpeechQuality(16000, 'wb')
        >>> wb_pesq(preds, target)
        tensor(1.6805)

    sum_pesqtotalFfull_state_updateis_differentiableThigher_is_betterg      plot_lower_boundg      @plot_upper_bound   fsmoden_processeskwargsreturnNc                    s   t  jdi | tstd|dvrtd| || _|dvr(td| || _t|ts;|dkr;td| || _	| j
dtd	d
d | j
dtdd
d d S )NzPerceptualEvaluationSpeechQuality metric requires that `pesq` is installed. Either install as `pip install torchmetrics[audio]` or `pip install pesq`.)i@  i>  z:Expected argument `fs` to either be 8000 or 16000 but got )wbnbz;Expected argument `mode` to either be 'wb' or 'nb' but got r   zCExpected argument `n_processes` to be an int larger than 0 but got r   g        sum)defaultdist_reduce_fxr    )super__init__r   ModuleNotFoundError
ValueErrorr   r   
isinstanceintr   	add_stater   )selfr   r   r   r   	__class__r#   T/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/audio/pesq.pyr%   b   s    z*PerceptualEvaluationSpeechQuality.__init__predstargetc                 C   sJ   t ||| j| jd| j| jj}|  j| 7  _|  j|	 7  _dS )z*Update state with predictions and targets.FN)
r   r   r   r   tor   devicer    r   numel)r+   r/   r0   
pesq_batchr#   r#   r.   update|   s   
z(PerceptualEvaluationSpeechQuality.updatec                 C   s   | j | j S )zCompute metric.)r   r   )r+   r#   r#   r.   compute   s   z)PerceptualEvaluationSpeechQuality.computevalaxc                 C   s   |  ||S )ab  Plot a single or multiple values from the metric.

        Args:
            val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
                If no value is provided, will automatically call `metric.compute` and plot that result.
            ax: An matplotlib axis object. If provided will add plot to that axis

        Returns:
            Figure and Axes object

        Raises:
            ModuleNotFoundError:
                If `matplotlib` is not installed

        .. plot::
            :scale: 75

            >>> # Example plotting a single value
            >>> import torch
            >>> from torchmetrics.audio import PerceptualEvaluationSpeechQuality
            >>> metric = PerceptualEvaluationSpeechQuality(8000, 'nb')
            >>> metric.update(torch.rand(8000), torch.rand(8000))
            >>> fig_, ax_ = metric.plot()

        .. plot::
            :scale: 75

            >>> # Example plotting multiple values
            >>> import torch
            >>> from torchmetrics.audio import PerceptualEvaluationSpeechQuality
            >>> metric = PerceptualEvaluationSpeechQuality(8000, 'nb')
            >>> values = [ ]
            >>> for _ in range(10):
            ...     values.append(metric(torch.rand(8000), torch.rand(8000)))
            >>> fig_, ax_ = metric.plot(values)

        )_plot)r+   r7   r8   r#   r#   r.   plot   s   &r   )r   )NN)__name__
__module____qualname____doc__r   __annotations__r   boolr   r   r   floatr   r)   strr   r%   r5   r6   r   r   r   r   r   r:   __classcell__r#   r#   r,   r.   r      s0   
 ;	2N)collections.abcr   typingr   r   r   torchr   r   "torchmetrics.functional.audio.pesqr   torchmetrics.metricr	   torchmetrics.utilities.importsr
   r   torchmetrics.utilities.plotr   r   __doctest_requires____doctest_skip__r   r#   r#   r#   r.   <module>   s   
