o
    pi<                     @   s  d Z ddlmZmZmZmZmZ ddlmZ ddl	m
Z
 ddlZddlZddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ d,ddZ	d-dededejdejdejf
ddZ			d.dededejdejdejdejfddZdeeeef  d eeeef  fd!d"Z 		#			$d/dejd%ed&eeef deeeef  d eeeef  d'efd(d)Z!d0d*d+Z"dS )1z
==============================================================
Hierarchical clustering (:mod:`pyannote.core.utils.hierarchy`)
==============================================================

    )TextCallableListTupleUnion)Counter)	signatureN   )to_condensed)
to_squared)l2_normalize)pdist)cdist)
squareform)
csr_matrix)connected_componentssingle	euclideanc                 K   s`   |dkrt | fd|i|S |dkr|dv rt| } d}t| |d}tjjj|f||d|S )zJSame as scipy.cluster.hierarchy.linkage with more metrics and methods
    poolmetricr   )centroidmedianwardr   )methodr   )r   r   r   scipycluster	hierarchylinkage)Xr   r   kwargsdistance r"   Q/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/core/utils/hierarchy.pyr   4   s   r   uvSCreturnc                 K   s0   ||  ||   || ||   ||  ||   S )a  Compute average of newly merged cluster

    Parameters
    ----------
    u : int
        Cluster index.
    v : int
        Cluster index.
    C : (2 x n_observations - 1, dimension) np.ndarray
        Cluster average.
    S : (2 x n_observations - 1, ) np.ndarray
        Cluster size.

    Returns
    -------
    Cuv : (dimension, ) np.ndarray
        Average of newly formed cluster.
    r"   )r$   r%   r&   r'   r    r"   r"   r#   _average_pooling_funcK   s   0r)   r   dKc                 K   sJ   t t || |gd }t t j|| dd|f dd}|||  S )a  Compute centroid of newly merged cluster

    Parameters
    ----------
    u : int
        Cluster index.
    v : int
        Cluster index.
    X : (n_observations, dimension) np.ndarray
        Observations.
    d : (n_observations, n_obversations) np.ndarray
        Distance between observations.
    K : (n_observations, ) np.ndarray, optional
        Cluster assignment.

    Returns
    -------
    Cuv : (dimension, ) np.ndarray
        Centroid of newly formed cluster.
    r   N)axis)npwhereisinargminmean)r$   r%   r   r*   r+   r    u_or_vir"   r"   r#   _centroid_pooling_funcc   s   $r4   cannot_link	must_linkc           	      C   s   t dd | D } 	 t }|D ]6\}}| D ]/\}}tt||h||h}|s5d| d| d}t|t|dkrD|| vrD|| qq|rN| | n	 t| S q
)Nc                 s   s    | ]	}t t|V  qd S )N)tuplesorted).0uvr"   r"   r#   	<genexpr>   s    z(propagate_constraints.<locals>.<genexpr>TzMFound a conflict between 'must_link' and 'cannot_link' constraints for pair (z, z).   )	setlistr7   r8   symmetric_difference
ValueErrorlenappendupdate)	r5   r6   new_cannot_linkxyr$   r%   ijmsgr"   r"   r#   propagate_constraints   s.   

rI   averagebothr   pooling_funcmust_link_methodc                    s  
dkrt 
n
dkrt
nt
tr
 d}t|du r"g |du r(g }j\	}tdt	tj	d	 d tj
ddd	< t	d	 d |f  d	ddf< t		d d	ftjtd	 d d	 d  d  td	 d gt	t		d  d R  < d
t
jv rtd 	
fdd	}d}	r|dv rt|t \}
}tjtd	 d |
|< |dv r,|r,tj			ftjd}|D ]
\}
}d||
|f< qtt|ddd\}}t| D ])\}}|dk rqt||kd ^}
}|D ]}||
||	dd}
|	d7 }	qqt|		d D ].}	t}| tjkrOtdkd ^}
}}ntd	 d |\}
}||
||	}q3S )a|  'pool' linkage

    Parameters
    ----------
    X : np.ndarray
        (n_samples, dimension) obversations.
    metric : {"euclidean", "cosine", "angular"}, optional
        Distance metric. Defaults to "euclidean"
    pooling_func: callable, optional
        Defaults to "average".
    cannot_link : list of pairs, optional
        Pairs of indices of observations that cannot be linked. For instance,
        [(1, 2), (5, 6)] means that first and second observations cannot end up
        in the same cluster, as well as 5th and 6th obversations.
    must_link : list of pairs, optional
        Pairs of indices of observations that must be linked. For instance,
        [(1, 2), (5, 6)] means that first and second observations must end up
        in the same cluster, as well as 5th and 6th obversations.
    must_link_method : {"merge", "propagate", "both"}, optional
        Method used for taking "must link" constraints into account.
        * use "merge" to initialize clusters by merging "must link" observations
          before any other regular clustering iterations.
        * use "propagate" to infer additional "cannot link" constraints by
          applying the following propagation rule:
                if u and v cannot be linked and v and w must be linked,
                then u and w cannot be linked.
        * use "both" to apply both methods.
        Defaults to "both".
    rJ   r   za pooling is not supported. Choose between 'average' and 'centroid', or provide your own function.Nr   r<   r	   )dtype   r*   Fc              	      s  t d	 d | |}|r%| tjkr%| 	k r| n|}d| d}t|| |  kr/|n||df< |df |kr?| n||df< |rIdn| |df< |  |  |df< |  |  	| < 
| | d 	| < d| < d|< 	| | k< 	| |k< d		|  dk}t d	 d 	| t	| |  }t tj	| d	d	f  d		| d	d	f | d	d	f d
|< t d	 d | t| }t d	 d | t| d 	| }	t d	 d |t|}
t d	 d |t|d 	| }rtd	 d || tjk \}}tjt d	 d 	| |< td	 d |	|	 tjk \}}tjt d	 d 	| |< td	 d |
|
 tjk \}}tjt d	 d 	| |< td	 d || tjk \}}tjt d	 d 	| |< tj|< tj|	< tj|
< tj|< t d	 d 	| t	| | }tj|< 	| S )aW  Merge two clusters

        Parameters
        ----------
        u, v : int
            Indices of clusters to merge.
        iteration : int
            Current clustering iteration.
        constraint : bool
            Set to True to indicate that this merge is coming from a 'must_link'
            constraint. This will artificially set Z[iteration, 2] to 0.0.

        Returns
        -------
        uv : int
            Indices of resulting cluster.

        Raises
        ------
        "ValueError" in case of conflict between "must_link" and "cannot_link"
        constraints.

        r<   r	   zSFound a conflict between 'must_link' and 'cannot_link' constraints for observation .r   g           )r   r*   r+   r&   r'   Nr   )r
   r-   inftyr@   aranger   newaxisr   )r$   r%   	iteration
constraintkwrH   empty_uu__vv_rE   _r'   Dr+   r&   r   Zr5   r*   r   nrL   r"   r#   merge   sT     ($
$$$$$$



&
zpool.<locals>.merger   )	propagaterK   )rc   rK   T)directedreturn_labels)rV   )F)r)   r4   
isinstancer   r@   shaper   r-   rS   zerosint16rR   onesr
   r   r   
parametersr   rI   zipint8r   r   r   itemsr.   ranger0   )r   r   rL   r5   r6   rM   rH   	dimensionrc   rU   r$   r%   graphr^   K_initrW   countothersr"   r_   r#   r      sj   &


(4"]




r   c              	   C   sN  g }|dddf D ]>}t jjj||dd}g }t|D ]}| ||k }tj|ddd}	|t|	||d	d	 q|tt
|d  q
t|}t| }
|
d
 |d }}d
|d	 }}|| ||  }d
}|| ||  ||  }t|td
|
 ||  | t|d |d   }|t|df }t jjj||ddS )a+  Forms flat clusters using within-class sum of square elbow criterion

    Parameters
    ----------
    X : `np.ndarray`
        (n_samples, n_dimensions) feature vectors.
    Z : `np.ndarray`
        The hierarchical clustering encoded with the matrix returned by the
        `linkage` function.
    metric : `str`
        The distance metric to use. See `pdist` function for a list of valid
        distance metrics.

    Returns
    -------
    T : ndarray
        An array of length n. T[i] is the flat cluster number to which
        original observation i belongs.

    Reference
    ---------
    H. Delgado, X. Anguerra, C. Fredouille, J. Serrano. "Fast Single- and
    Cross-Show Speaker Diarization Using Binary Key Speaker Modeling".
    IEEE Transactions on Audio Speech and Language Processing
    Nr<   r!   )	criterionr   T)r,   keepdimsr   r	   )r   r   r   fclusterr-   uniquer1   rB   r   reshapehstackarrayrA   absrS   sqrtargmax)r   ra   r   wcss	thresholdy_tr`   rW   XkCkrb   x1y1x2y2abcr!   r"   r"   r#   fcluster_auto  s*   
8r   )r   r   )NN)NNN)r   rJ   NNrK   )r   )#__doc__typingr   r   r   r   r   collectionsr   inspectr   numpyr-   scipy.cluster.hierarchyr   r!   r
   r   r   r   r   scipy.spatial.distancer   scipy.sparser   scipy.sparse.csgraphr   r   intndarrayr)   r4   rI   r   r   r"   r"   r"   r#   <module>   s   


!


 m