o
    yi                     @   s\   d dl mZmZmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ G dd deZdS )	    )AnyCallableDict)Tensortensor)Literal)permutation_invariant_training)Metricc                	       s   e Zd ZU dZdZeed< dZeed< eed< eed< 	dd	e	d
e
d deddf fddZdededdfddZdefddZ  ZS )PermutationInvariantTraininga  Calculates `Permutation invariant training`_ (PIT) that can evaluate models for speaker independent multi-
    talker speech separation in a permutation invariant way.

    As input to ``forward`` and ``update`` the metric accepts the following input

    - ``preds`` (:class:`~torch.Tensor`): float tensor with shape ``(batch_size,num_speakers,...)``
    - ``target`` (:class:`~torch.Tensor`): float tensor with shape ``(batch_size,num_speakers,...)``

    As output of `forward` and `compute` the metric returns the following output

    - ``pesq`` (:class:`~torch.Tensor`): float scalar tensor with average PESQ value over samples

    Args:
        metric_func:
            a metric function accept a batch of target and estimate,
            i.e. ``metric_func(preds[:, i, ...], target[:, j, ...])``, and returns a batch of metric
            tensors ``(batch,)``
        eval_func:
            the function to find the best permutation, can be 'min' or 'max', i.e. the smaller the better
            or the larger the better.
        kwargs: Additional keyword arguments for either the ``metric_func`` or distributed communication,
            see :ref:`Metric kwargs` for more info.

    Example:
        >>> import torch
        >>> from torchmetrics import PermutationInvariantTraining
        >>> from torchmetrics.functional import scale_invariant_signal_noise_ratio
        >>> _ = torch.manual_seed(42)
        >>> preds = torch.randn(3, 2, 5) # [batch, spk, time]
        >>> target = torch.randn(3, 2, 5) # [batch, spk, time]
        >>> pit = PermutationInvariantTraining(scale_invariant_signal_noise_ratio, 'max')
        >>> pit(preds, target)
        tensor(-2.1065)
    Ffull_state_updateTis_differentiablesum_pit_metrictotalmaxmetric_func	eval_func)r   minkwargsreturnNc                    st   | dd| dd | dd d}t jdi | || _|| _|| _| jdtddd	 | jd
tddd	 d S )Ndist_sync_on_stepFprocess_groupdist_sync_fn)r   r   r   r   g        sum)defaultdist_reduce_fxr   r    )popsuper__init__r   r   r   	add_stater   )selfr   r   r   base_kwargs	__class__r   J/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/audio/pit.pyr   @   s   


z%PermutationInvariantTraining.__init__predstargetc                 C   sH   t ||| j| jfi | jd }|  j| 7  _|  j| 7  _dS )z*Update state with predictions and targets.r   N)r   r   r   r   r   r   r   numel)r    r%   r&   
pit_metricr   r   r$   updateS   s    z#PermutationInvariantTraining.updatec                 C   s   | j | j S )zComputes metric.)r   r   )r    r   r   r$   computeZ   s   z$PermutationInvariantTraining.compute)r   )__name__
__module____qualname____doc__r   bool__annotations__r   r   r   r   r   r   r)   r*   __classcell__r   r   r"   r$   r
      s$   
 #r
   N)typingr   r   r   torchr   r   typing_extensionsr   !torchmetrics.functional.audio.pitr   torchmetrics.metricr	   r
   r   r   r   r$   <module>   s   