o
    }oiM!                  	   @   s   d dl mZmZmZmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZ d dlmZ d d	lmZmZ d d
lmZ dgZeeeeee
eeeg	ZG dd deZdS )    )CallableIterableListOptionalTupleN)Metric)!PerceptualEvaluationSpeechQuality)PermutationInvariantTraining)#ScaleInvariantSignalDistortionRatioSignalDistortionRatio)ScaleInvariantSignalNoiseRatioSignalNoiseRatio)!ShortTimeObjectiveIntelligibility)SquimMOSMetricSquimObjectiveMetric)loggingAudioMetricWrapperc                       sX  e Zd ZU dZdZeed< ejed< 	d#de	de
e de
e f fd	d
Zdejdejdeejejf fddZedejdejdejdeeejejf  fddZedeej dejfddZd$dejdejde
ej ddfddZdejfddZ	d$dejdejde
ej dejfddZd% fddZdefddZd edefd!d"Z  ZS )&r   a  A wrapper around an audio metric enabling selection of a specific channel
    and handling of examples in a batch with varying valid input length.

    Note:
        This class assumes that the underlying metric uses averaging to calculate the
        value over a batch. This assumption is only used by `forward` and does not
        impact other methods, such as `update` and `compute`.

    Args:
        metric: base metric that should be wrapped. It is assumed that calculation
                of the metric over a batch is done by averaging.
        channel: Optional, for selecting a channel from `preds` and `target` signals.
                 If None, all channels are used.
        metric_using_batch_averaging: Optional, used to denote that the base metric
                                      is using averaging to calculate the metric value
                                      for a batch.
    Ffull_state_updatenum_examplesNmetricchannelmetric_using_batch_averagingc                    s   t    t|tstd| |s&t|tvr&td| d| jj d|| _	|| _
| jdtddd td	|t| d S )
NzNExpected argument `metric` to be an instance of `torchmetrics.Metric` but got zMetric z is not in verified metrics. aJ   assumes reduction over batch is calculated using averaging. 
This should not affect the final results, but values for a single batch obtained using `forward` may be inaccurate if using `input_length`. 
To suppress this message, please confirm the used metric is using batch averaging and set "metric_using_batch_averaging = True"r   r   sum)defaultdist_reduce_fxzSetup metric %s, channel %s)super__init__
isinstancer   
ValueErrortype__VERIFIED_METRICS__	__class____name___metric_channel	add_statetorchtensorr   debugstr)selfr   r   r   r!    X/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/audio/metrics/audio.pyr   A   s   

zAudioMetricWrapper.__init__predstargetreturnc                 C   s:   | j du r	||fS |dd| j df |dd| j df fS )aP  Select a single channel from input signals.

        Args:
            preds: tensor with shape (B, C, T)
            target: tensor with shape (B, C, T)

        Returns:
            Original tensors if self.channel is None, shape (B, C, T).
            A single channel from input tensors if self.channel is set, shape (B, T)
        N.)r$   )r*   r.   r/   r,   r,   r-   _select_channelT   s   
(z"AudioMetricWrapper._select_channelinput_lengthc                 c   sF    t |D ]\}}| |dd|f }||dd|f }||fV  qdS )a  Trim input tensors to input_length samples.

        Args:
            preds: tensor with shape (B, C, T)
            target: tensor with shape (B, C, T)

        Returns:
            An iterable with tuples of (preds, target) with
            the correct length.
        .N)	enumerate)r.   r/   r2   b_idxb_lenb_predsb_targetr,   r,   r-   _trim_inputsd   s   zAudioMetricWrapper._trim_inputsbatch_valuesc                 C   s   t | t|  S )a  Reduce metric values for each example in a batch to a single
        value for the whole batch.

        Args:
            batch_values: list of metric values for each example in a batch

        Returns:
            Average metric value over the batch.
        )r   len)r9   r,   r,   r-   _batch_reductiony   s   z#AudioMetricWrapper._batch_reductionc                 C   sn   | j ||d\}}|du r| jj||d n| j|||dD ]\}}| jj||d q|  j|d7  _dS )a  Update the underlying metric by taking into account channel selector and input length.

        Args:
            preds: tensor with predictions, shape (B, C, T)
            target: tensor with target signals, shape (B, C, T)
            input_length: Optional, input tensor with length (in samples) of each signal in the batch, shape (B,).
                          If not provided, it is assumed that all samples are valid.
        r.   r/   Nr.   r/   r2   r   )r1   r#   updater8   r   size)r*   r.   r/   r2   r6   r7   r,   r,   r-   r>      s   	zAudioMetricWrapper.updatec                 C   s
   | j  S )zCompute the underlying metric.)r#   computer*   r,   r,   r-   r@      s   
zAudioMetricWrapper.computec                 C   sd   | j ||d\}}|du r| j||dS g }| j|||dD ]\}}|| j||d q| |S )a:  Call underlying forward method to add the batch statistics to the accumulated metric state
        and return the result for the current batch.

        Args:
            preds: tensor with predictions, shape (B, C, T)
            target: tensor with target signals, shape (B, C, T)
            input_length: Optional, input tensor with length (in samples) of each signal in the batch, shape (B,).
                          If not provided, it is assumed that all samples are valid.

        Returns:
            Underlying metric averaged on the current batch.
        r<   Nr=   )r1   r#   r8   appendr;   )r*   r.   r/   r2   r9   r6   r7   r,   r,   r-   forward   s   
zAudioMetricWrapper.forwardc                    s   t    | j  dS )zReset the underlying metric.N)r   resetr#   rA   r+   r,   r-   rD      s   
zAudioMetricWrapper.resetc                 C   s*   dt | j d| j d}| jj| }|S )z+Return string representation of the object.z	(metric: z, channel: ))reprr#   r$   r!   r"   )r*   
_op_metricrepr_strr,   r,   r-   __repr__   s   zAudioMetricWrapper.__repr__r@   c                 C   s   |S )z3Overwrite to do nothing, as in CompositionalMetric.r,   )r*   r@   r,   r,   r-   _wrap_compute   s   z AudioMetricWrapper._wrap_compute)NN)N)r0   N)r"   
__module____qualname____doc__r   bool__annotations__r&   Tensorr   r   intr   r   r1   staticmethodr   r8   r   r;   r>   r@   rC   rD   r)   rI   r   rJ   __classcell__r,   r,   r+   r-   r   +   sP   
 
&&
)typingr   r   r   r   r   r&   torchmetricsr   torchmetrics.audio.pesqr   torchmetrics.audio.pitr	   torchmetrics.audio.sdrr
   r   torchmetrics.audio.snrr   r   torchmetrics.audio.stoir   $nemo.collections.audio.metrics.squimr   r   
nemo.utilsr   __all__r    r   r,   r,   r,   r-   <module>   s,   