o
    yi<                     @   s
  U d dl mZ d dlmZmZmZmZ d dlmZ d dl	Z	d dl	m
Z
 d dlmZ d dlmZ i Zeed< d	e
d
ee	je	jf dee
e
f fddZd	e
d
ee	je	jf dee
e
f fddZ	dde
de
ded
ed dedee
e
f fddZde
de
de
fddZdS )    )permutations)AnyCallableTupleUnion)warnN)Tensor)Literal)_SCIPY_AVAILABLE_ps_dict
metric_mtx	eval_funcreturnc                    sp   ddl m |   }t fdd|D }|| j}t| d|dddddf 	ddg}||fS )	a  Solves the linear sum assignment problem using scipy, and returns the best metric values and the
    corresponding permutations.

    Args:
        metric_mtx: the metric matrix, shape [batch_size, spk_num, spk_num]
        eval_func: the function to reduce the metric values of different the permutations

    Returns:
        best_metric: shape ``[batch]``
        best_perm: shape ``[batch, spk]``
    r   )linear_sum_assignmentc                    s    g | ]}| t jkd  qS )   )torchmax).0pwmr   r    U/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/functional/audio/pit.py
<listcomp>/   s     z<_find_best_perm_by_linear_sum_assignment.<locals>.<listcomp>   N)
scipy.optimizer   detachcpur   tensortodevicegathermean)r   r   mmtx	best_permbest_metricr   r   r   (_find_best_perm_by_linear_sum_assignment   s   *r'   c                 C   s   | j dd \}}t|t| j }|tvr*tjttt|| jdj	}|t|< nt| }|j d }|d 
|||}t| d|}|jdd}	||	dd\}
}| }|j	|ddf }|
|fS )a  Solves the linear sum assignment problem using exhaustive method, i.e. exhaustively calculates the metric
    values of all possible permutations, and returns the best metric values and the corresponding permutations.

    Args:
        metric_mtx: the metric matrix, shape ``[batch_size, spk_num, spk_num]``
        eval_func: the function to reduce the metric values of different the permutations

    Returns:
        best_metric: shape ``[batch]``
        best_perm: shape ``[batch, spk]``
    Nr   )r!   r   )N.r   )dim)shapestrr!   r   r   r   listr   rangeTexpandr"   r#   r   )r   r   
batch_sizespk_numkeypsperm_numbpsmetric_of_ps_detailsmetric_of_psr&   best_indexesr%   r   r   r   $_find_best_perm_by_exhaustive_method5   s   

r8   r   predstargetmetric_funcr   minkwargsc                 K   s  | j dd |j dd krtd|dvrtd| |jdk r/td|j  d| j  d|j dd \}}d	}t|D ]Y}t|D ]R}	|d	uri|| d	d	|	d
f |d	d	|d
f fi ||d	d	||	f< qD|| d	d	|	d
f |d	d	|d
f fi |}
tj|||f|
j|
jd}|
|d	d	||	f< qDq>|dkrtj	ntj
}|dk sts|dkrtstd| d t||\}}||fS t||\}}||fS )a  Calculates `Permutation invariant training`_ (PIT) that can evaluate models for speaker independent multi-
    talker speech separation in a permutation invariant way.

    Args:
        preds: float tensor with shape ``(batch_size,num_speakers,...)``
        target: float tensor with shape ``(batch_size,num_speakers,...)``
        metric_func: a metric function accept a batch of target and estimate,
            i.e. ``metric_func(preds[:, i, ...], target[:, j, ...])``, and returns a batch of metric
            tensors ``(batch,)``
        eval_func: the function to find the best permutation, can be ``'min'`` or ``'max'``,
            i.e. the smaller the better or the larger the better.
        kwargs: Additional args for metric_func

    Returns:
        Tuple of two float tensors. First tensor with shape ``(batch,)`` contains the best metric value for each sample
        and second tensor with shape ``(batch,)`` contains the best permutation.

    Example:
        >>> from torchmetrics.functional.audio import scale_invariant_signal_distortion_ratio
        >>> # [batch, spk, time]
        >>> preds = torch.tensor([[[-0.0579,  0.3560, -0.9604], [-0.1719,  0.3205,  0.2951]]])
        >>> target = torch.tensor([[[ 1.0958, -0.1648,  0.5228], [-0.4100,  1.1942, -0.5103]]])
        >>> best_metric, best_perm = permutation_invariant_training(
        ...     preds, target, scale_invariant_signal_distortion_ratio, 'max')
        >>> best_metric
        tensor([-5.1091])
        >>> best_perm
        tensor([[0, 1]])
        >>> pit_permutate(preds, best_perm)
        tensor([[[-0.0579,  0.3560, -0.9604],
                 [-0.1719,  0.3205,  0.2951]]])
    r   r   z_Predictions and targets are expected to have the same shape at the batch and speaker dimensionsr<   z-eval_func can only be "max" or "min" but got z/Inputs must be of shape [batch, spk, ...], got z and z insteadN.)dtyper!   r      zIn pit metric for speaker-num z8>3, we recommend installing scipy for better performance)r)   RuntimeError
ValueErrorndimr,   r   emptyr?   r!   r   r=   r
   r   r8   r'   )r9   r:   r;   r   r>   r/   r0   r   
target_idx	preds_idx	first_eleopr&   r%   r   r   r   permutation_invariant_training`   s<   #
 .rI   permc                 C   s   t dd t| |D }|S )a!  Permutate estimate according to perm.

    Args:
        preds: the estimates you want to permutate, shape [batch, spk, ...]
        perm: the permutation returned from permutation_invariant_training, shape [batch, spk]

    Returns:
        Tensor: the permutated version of estimate
    c                 S   s   g | ]\}}t |d |qS )r   )r   index_select)r   predpr   r   r   r      s    z!pit_permutate.<locals>.<listcomp>)r   stackzip)r9   rJ   preds_pmtedr   r   r   pit_permutate   s   
rQ   )r   )	itertoolsr   typingr   r   r   r   warningsr   r   r   typing_extensionsr	   torchmetrics.utilities.importsr
   r   dict__annotations__r=   r   r'   r8   rI   rQ   r   r   r   r   <module>   sJ   



,

G