o
    .wij&                     @   s  U d dl mZ d dlmZmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ i Zeed< d	ed
ejdefddZdededeeef fddZdededeeef fddZ		d"dededede
d de
d dedeeef fddZdededefd d!ZdS )#    )permutations)AnyCallableN)Tensor)Literal)rank_zero_warn)_SCIPY_AVAILABLE_ps_dictspk_numdevicereturnc                 C   sJ   t | t | }|tvrtjttt| |d}|t|< |S t| }|S )N)r   )strr	   torchtensorlistr   range)r
   r   keyps r   ^/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/functional/audio/pit.py_gen_permutations   s   r   
metric_mtx	eval_funcc                    sv   ddl m |   }tt fdd|D }|| j	}t
| d|dddddf ddg}||fS )	a  Solves the linear sum assignment problem.

    This implementation uses scipy and input is therefore transferred to cpu during calculations.

    Args:
        metric_mtx: the metric matrix, shape [batch_size, spk_num, spk_num]
        eval_func: the function to reduce the metric values of different the permutations

    Returns:
        best_metric: shape ``[batch]``
        best_perm: shape ``[batch, spk]``

    r   )linear_sum_assignmentc                    s    g | ]}| t jkd  qS )   )r   max).0pwmr   r   r   r   
<listcomp>>   s     z<_find_best_perm_by_linear_sum_assignment.<locals>.<listcomp>   N)scipy.optimizer   detachcpur   r   nparraytor   gathermean)r   r   mmtx	best_permbest_metricr   r   r   (_find_best_perm_by_linear_sum_assignment*   s    *r.   c                 C   s   | j dd \}}t|| jd}|j d }|jd |||}t| d|}|jdd}||dd\}	}
|
 }
||
ddf }|	|fS )a  Solves the linear sum assignment problem using exhaustive method.

    This is done by exhaustively calculating the metric values of all possible permutations, and returns the best metric
    values and the corresponding permutations.

    Args:
        metric_mtx: the metric matrix, shape ``[batch_size, spk_num, spk_num]``
        eval_func: the function to reduce the metric values of different the permutations

    Returns:
        best_metric: shape ``[batch]``
        best_perm: shape ``[batch, spk]``

    Nr    r
   r   r   )N.r   dim)	shaper   r   Texpandr   r)   r*   r$   )r   r   
batch_sizer
   r   perm_numbpsmetric_of_ps_detailsmetric_of_psr-   best_indexesr,   r   r   r   $_find_best_perm_by_exhaustive_methodD   s   
r;   speaker-wiser   predstargetmetric_funcmoder<   permutation-wiser   minkwargsc                 K   sx  | j dd |j dd krtd|dvrtd| |dvr(td| |jdk r:td|j  d	| j  d
|dkrAtjntj}|j dd \}}|dkrt|| jd}	|	j d }
tj	| d|	
ddj
||
 g| j dd R  }|j|
dd}|||fi |}tj|
|t|	ddd}||dd\}}| }|	|ddf }||fS || ddddf |ddddf fi |}tj|||f|j|jd}||ddddf< t|D ]0}t|D ])}|dkr|dkrq|| dd|df |dd|df fi ||dd||f< qq|dk sts1|dkr&ts&td| d t||\}}||fS t||\}}||fS )aU  Calculate `Permutation invariant training`_ (PIT).

    This metric can evaluate models for speaker independent multi-talker speech separation in a permutation
    invariant way.

    Args:
        preds: float tensor with shape ``(batch_size,num_speakers,...)``
        target: float tensor with shape ``(batch_size,num_speakers,...)``
        metric_func: a metric function accept a batch of target and estimate.
            if `mode`==`'speaker-wise'`, then ``metric_func(preds[:, i, ...], target[:, j, ...])`` is called
            and expected to return a batch of metric tensors ``(batch,)``;

            if `mode`==`'permutation-wise'`, then ``metric_func(preds[:, p, ...], target[:, :, ...])`` is called,
            where `p` is one possible permutation, e.g. [0,1] or [1,0] for 2-speaker case, and expected to return
            a batch of metric tensors ``(batch,)``;

        mode: can be `'speaker-wise'` or `'permutation-wise'`.
        eval_func: the function to find the best permutation, can be ``'min'`` or ``'max'``,
            i.e. the smaller the better or the larger the better.
        kwargs: Additional args for metric_func

    Returns:
        Tuple of two float tensors. First tensor with shape ``(batch,)`` contains the best metric value for each sample
        and second tensor with shape ``(batch,)`` contains the best permutation.

    Example:
        >>> from torchmetrics.functional.audio import scale_invariant_signal_distortion_ratio
        >>> # [batch, spk, time]
        >>> preds = torch.tensor([[[-0.0579,  0.3560, -0.9604], [-0.1719,  0.3205,  0.2951]]])
        >>> target = torch.tensor([[[ 1.0958, -0.1648,  0.5228], [-0.4100,  1.1942, -0.5103]]])
        >>> best_metric, best_perm = permutation_invariant_training(
        ...     preds, target, scale_invariant_signal_distortion_ratio,
        ...     mode="speaker-wise", eval_func="max")
        >>> best_metric
        tensor([-5.1091])
        >>> best_perm
        tensor([[0, 1]])
        >>> pit_permutate(preds, best_perm)
        tensor([[[-0.0579,  0.3560, -0.9604],
                 [-0.1719,  0.3205,  0.2951]]])

    r   r    z_Predictions and targets are expected to have the same shape at the batch and speaker dimensionsrC   z-eval_func can only be "max" or "min" but got rA   z>mode can only be "speaker-wise" or "permutation-wise" but got z/Inputs must be of shape [batch, spk, ...], got z and z insteadr   rB   r/   r   r!   )r1   indexN)repeatsr1   r0   .)dtyper      zIn pit metric for speaker-num z8>3, we recommend installing scipy for better performance)r2   RuntimeError
ValueErrorndimr   r   rD   r   r   index_selectreshaperepeat_interleaver*   lenr$   emptyrH   r   r   r   r;   r.   )r=   r>   r?   r@   r   rE   eval_opr5   r
   permsr6   ppredsptargetr9   r-   r:   r,   	first_eler   
target_idx	preds_idxr   r   r   permutation_invariant_trainingk   sb   2

. 
rY   permc                 C   s   t dd t| |D S )a"  Permutate estimate according to perm.

    Args:
        preds: the estimates you want to permutate, shape [batch, spk, ...]
        perm: the permutation returned from permutation_invariant_training, shape [batch, spk]

    Returns:
        Tensor: the permutated version of estimate

    c                 S   s   g | ]\}}t |d |qS )r   )r   rM   )r   predpr   r   r   r      s    z!pit_permutate.<locals>.<listcomp>)r   stackzip)r=   rZ   r   r   r   pit_permutate   s   r_   )r<   r   )	itertoolsr   typingr   r   numpyr&   r   r   typing_extensionsr   torchmetrics.utilitiesr   torchmetrics.utilities.importsr   r	   dict__annotations__intr   r   tupler.   r;   rY   r_   r   r   r   r   <module>   sT   



+

m