o
    pim                     @   s(  d Z ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZmZ ddlmZ ddlm Z  ddl!m"Z" G dd deZ#G dd de#Z$G dd de#Z%G dd de#Z&G dd de#Z'G dd deZ(dS )zClustering pipelines    )EnumN)	rearrange)	AudioFile)PLDA)oracle_segmentation)	permutate)cluster_vbx)SlidingWindowSlidingWindowFeature)Pipeline)CategoricalIntegerUniform)fclusterlinkage)linear_sum_assignment)cdist)KMeansc                       s  e Zd Z		d dedef fddZ			d!ded	edB d
edB dedB fddZ		d"dej	de
dB dedeej	ej	ej	f fddZdej	dej	fddZ	d#dej	dej	dej	dej	def
ddZ				d$dej	de
dB d	edB d
edB dedB dej	fddZ  ZS )%BaseClusteringcosineFmetricconstrained_assignmentc                    s   t    || _|| _d S )N)super__init__r   r   selfr   r   	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/audio/pipelines/clustering.pyr   -   s   

zBaseClustering.__init__Nnum_embeddingsnum_clustersmin_clustersmax_clustersc                 C   sp   |p|pd}t dt||}|p|p|}t dt||}||kr-td|dd|dd||kr3|}|||fS )N   zQmin_clusters must be smaller than (or equal to) max_clusters (here: min_clusters=gz and max_clusters=z).)maxmin
ValueError)r   r    r!   r"   r#   r   r   r   set_num_clusters6   s   
zBaseClustering.set_num_clusters皙?
embeddingssegmentationsmin_active_ratioreturnc                 C   s~   |j j\}}}tj|j ddddk}tj|j | dd}||| k}tjt|dd }	t||	 \}
}||
|f |
|fS )a   Filter embeddings before clustering

        Embeddings that are removed:
        * NaN embeddings
        * embeddings speaking less than `min_active_ratio` times the chunk duration

        Parameters
        ----------
        embeddings : (num_chunks, num_speakers, dimension) array
            Sequence of embeddings.
        segmentations : (num_chunks, num_frames, num_speakers) array
            Binary segmentations.
        min_active_ratio : float, optional
            Minimum active ratio for a speaker to be considered active
            during clustering.

        Returns
        -------
        filtered_embeddings : (num_embeddings, dimension) array
        chunk_idx : (num_embeddings, ) array
        speaker_idx : (num_embeddings, ) array
           Taxiskeepdimsr$   r1   )datashapenpsumanyisnanwhere)r   r+   r,   r-   _
num_framessingle_active_masknum_clean_framesactivevalid	chunk_idxspeaker_idxr   r   r   filter_embeddingsM   s   z BaseClustering.filter_embeddingssoft_clustersc                 C   s~   t j|t |d}|j\}}}dt j||ft jd }t|D ]\}}t|dd\}}	t||	D ]
\}
}||||
f< q1q |S )N)nandtypeT)maximize)	r6   
nan_to_numnanminr5   onesint8	enumerater   zip)r   rD   
num_chunksnum_speakersr!   hard_clustersccostspeakersclustersskr   r   r   constrained_argmax   s   z!BaseClustering.constrained_argmaxtrain_chunk_idxtrain_speaker_idxtrain_clustersconstrainedc                    s   t  d }|j\}}}	|||f t  fddt|D }
ttt|d|
| jdd||d}d| }|r?| |}nt j	|dd	}|||
fS )
a  Assign embeddings to the closest centroid

        Cluster centroids are computed as the average of the train embeddings
        previously assigned to them.

        Parameters
        ----------
        embeddings : (num_chunks, num_speakers, dimension)-shaped array
            Complete set of embeddings.
        train_chunk_idx : (num_embeddings,)-shaped array
        train_speaker_idx : (num_embeddings,)-shaped array
            Indices of subset of embeddings used for "training".
        train_clusters : (num_embedding,)-shaped array
            Clusters of the above subset
        constrained : bool, optional
            Use constrained_argmax, instead of (default) argmax.

        Returns
        -------
        soft_clusters : (num_chunks, num_speakers, num_clusters)-shaped array
        hard_clusters : (num_chunks, num_speakers)-shaped array
        centroids : (num_clusters, dimension)-shaped array
            Clusters centroids
        r$   c                    "   g | ]}t j |k d dqS r   r3   r6   mean.0rX   r\   train_embeddingsr   r   
<listcomp>       z4BaseClustering.assign_embeddings.<locals>.<listcomp>c s d -> (c s) dr   (c s) k -> c s krS   rW   r/   r3   )
r6   r&   r5   vstackranger   r   r   rY   argmax)r   r+   rZ   r[   r\   r]   r!   rP   rQ   	dimension	centroidse2k_distancerD   rR   r   rd   r   assign_embeddings   s.   #

z BaseClustering.assign_embeddingsc                 K   s   | j ||d\}}}	|j\}
}| j|
|||d\}}}|dk rD|j\}}}tj||ftjd}t||df}tj|ddd}|||fS | j||||d	}| j	|||	|| j
d
\}}}|||fS )a  Apply clustering

        Parameters
        ----------
        embeddings : (num_chunks, num_speakers, dimension) array
            Sequence of embeddings.
        segmentations : (num_chunks, num_frames, num_speakers) array
            Binary segmentations.
        num_clusters : int, optional
            Number of clusters, when known. Default behavior is to use
            internal threshold hyper-parameter to decide on the number
            of clusters.
        min_clusters : int, optional
            Minimum number of clusters. Has no effect when `num_clusters` is provided.
        max_clusters : int, optional
            Maximum number of clusters. Has no effect when `num_clusters` is provided.

        Returns
        -------
        hard_clusters : (num_chunks, num_speakers) array
            Hard cluster assignment (hard_clusters[c, s] = k means that sth speaker
            of cth chunk is assigned to kth cluster)
        soft_clusters : (num_chunks, num_speakers, num_clusters) array
            Soft cluster assignment (the higher soft_clusters[c, s, k], the most likely
            the sth speaker of cth chunk belongs to kth cluster)
        centroids : (num_clusters, dimension) array
            Centroid vectors of each cluster
        r,   )r!   r"   r#   r/   rG   r$   r   Tr0   )r"   r#   r!   )r]   )rC   r5   r)   r6   zerosrM   rL   ra   clusterrr   r   )r   r+   r,   r!   r"   r#   kwargsre   rZ   r[   r    r;   rP   rQ   rR   rD   rp   r\   r   r   r   __call__   s>   &


zBaseClustering.__call__r   FNNN)Nr*   )FNNNN)__name__
__module____qualname__strboolr   intr)   r6   ndarrayr
   floattuplerC   rY   rr   rw   __classcell__r   r   r   r   r   ,   sz    

2
Kr   c                
       sn   e Zd ZU dZdZeed< 		ddedef fddZ						dd
e	j
ded	B ded	B ded	B fddZ  ZS )AgglomerativeClusteringa  Agglomerative clustering

    Parameters
    ----------
    metric : {"cosine", "euclidean", ...}, optional
        Distance metric to use. Defaults to "cosine".

    Hyper-parameters
    ----------------
    method : {"average", "centroid", "complete", "median", "single", "ward"}
        Linkage method.
    threshold : float in range [0.0, 2.0]
        Clustering threshold.
    min_cluster_size : int in range [1, 20]
        Minimum cluster size
    Fexpects_num_clustersr   r   r   c                    s:   t  j||d tdd| _tg d| _tdd| _d S )Nr   r   g        g       @)averagecentroidcompletemediansinglewardweightedr$      )r   r   r   	thresholdr   methodr   min_cluster_sizer   r   r   r   r   8  s   z AgglomerativeClustering.__init__Nr+   r"   r#   r!   c                    s  j \}}t| jtdtd| }|dkrtjdtjdS | jdkrS| j	dv rStj
ddd tjjd	d
d W d   n1 sEw   Y  t| j	dd}n	t| j	| jd}t|| jddd  tj d
d\}	}
|	|
|k }t|}||k r|}n||kr|}|dur%||kr%t|}t|d |dddf< |d }d}tt|dddf | j D ]?}||df }||k rqt||ddd  tj d
d\}	}
|	|
|k }t|}t|| t|| k r|}|}||kr nq||kr%t||ddd  tj d
d\}	}
|	|
|k }t|}td| d| d |dkr2d dd<  S |	|
|k  }t|dkrA S t fdd|D }t fdd|D }t||| jd}ttj|ddD ]\}}||   || k< qltj d
d\}  S )a@  

        Parameters
        ----------
        embeddings : (num_embeddings, dimension) array
            Embeddings
        min_clusters : int
            Minimum number of clusters
        max_clusters : int
            Maximum number of clusters
        num_clusters : int, optional
            Actual number of clusters. Default behavior is to estimate it based
            on values provided for `min_clusters`,  `max_clusters`, and `threshold`.

        Returns
        -------
        clusters : (num_embeddings, ) array
            0-indexed cluster indices.
        r$   g?)r$   rG   r   )r   r   r   ignoredivideinvalidTr0   N	euclideanr   r   distance	criterion)return_countsr/      zFound only z& clusters. Using a smaller value than z# for `min_cluster_size` might help.r   c                    r^   r_   r`   )rc   large_krV   r+   r   r   rf     rg   z3AgglomerativeClustering.cluster.<locals>.<listcomp>c                    r^   r_   r`   )rc   small_kr   r   r   rf     rg   ri   r3   return_inverse)r5   r'   r   r&   roundr6   rt   uint8r   r   errstatelinalgnormr   r   r   uniquelencopyarangeargsortabsprintrl   r   rN   argmin)r   r+   r"   r#   r!   r    r;   r   
dendrogramcluster_uniquecluster_countslarge_clustersnum_large_clusters_dendrogrambest_iterationbest_num_large_clusters	iterationnew_cluster_sizesmall_clusterslarge_centroidssmall_centroidscentroids_cdistr   r   r   r   r   ru   J  s   



&

zAgglomerativeClustering.clusterrx   ry   r{   r|   r}   __doc__r   r   __annotations__r~   r   r6   r   r   ru   r   r   r   r   r   r   $  s,   
 r   c                
       sh   e Zd ZU dZdZeed< 	ddef fddZ			dd	e	j
d
edB dedB dedB fddZ  ZS )KMeansClusteringzKMeans clustering

    Parameters
    ----------
    metric : {"cosine", "euclidean"}, optional
        Distance metric to use. Defaults to "cosine".

    Hyper-parameters
    ----------------
    None
    Tr   r   r   c                    s*   |dvrt d| dt j|d d S )N)r   r   zUnsupported metric: z". Must be 'cosine' or 'euclidean'.ri   )r(   r   r   )r   r   r   r   r   r     s
   
zKMeansClustering.__init__Nr+   r"   r#   r!   c                 C   s   |du rt d|j\}}||k rtj|tjdS | jdkr@tjddd |tjj|ddd	 }W d   n1 s;w   Y  t	|d
ddd
|S )aY  Perform KMeans clustering

        Parameters
        ----------
        embeddings : (num_embeddings, dimension) array
            Embeddings
        num_clusters : int, optional
            Expected number of clusters.

        Returns
        -------
        clusters : (num_embeddings, ) array
            0-indexed cluster indices.
        Nz `num_clusters` must be provided.rG   r   r   r   r   Tr0   r   *   F
n_clustersn_initrandom_statecopy_x)r(   r5   r6   r   int32r   r   r   r   r   fit_predict)r   r+   r"   r#   r!   r    r;   r   r   r   ru     s   

zKMeansClustering.cluster)r   ry   r   r   r   r   r   r     s&   
 r   c                       s~   e Zd ZU dZeed< 		ddededef fdd	Z	
	
	
	
dde	j
ded
B ded
B ded
B ded
B de	j
fddZ  ZS )VBxClusteringFr   r   Tpldar   r   c                    s>   t  j||d || _tdd| _tdd| _tdd| _d S )Nr   g      ?g?g{Gz?g      .@)r   r   r   r   r   FaFb)r   r   r   r   r   r   r   r   +  s   zVBxClustering.__init__Nr+   r,   r!   r"   r#   r.   c                    s  | j }| j||d\}}jd dk r9|j\}	}
}tj|	|
ftjd}t|	|
df}tjddd}|||fS tjj	ddd }t
|dd	d
}t|| jddd }tj|dd\}}| }t||| jj| j| jdd\}}|j\}	}
}|d d |dkf }|jd| |jdddj }|j\}}||k r|}n||kr|}|r||krd}t|dddd| t fddt|D }ttt|d|| jdd|	|
d}d| }|r| d }|||jddk< | |}ntj |dd}||	|
}|||fS )Nrs   r   r/   rG   r$   Tr0   r   r   r   r   r   r   r   )r   r   maxItersgHz>r   )r2   Fr   r   r   c                    r^   r_   r`   rb   kmeans_clustersre   r   r   rf     rg   z*VBxClustering.__call__.<locals>.<listcomp>rh   ri   rj   rk         ?r3   )!r   rC   r5   r6   rt   rM   rL   ra   r   r   r   r   r   r   r   r   phir   r   Treshaper7   r   r   rl   rm   r   r   r   r'   r4   rY   rn   )r   r+   r,   r!   r"   r#   rv   r   r;   rP   rQ   rR   rD   rp   train_embeddings_normedr   ahc_clustersfeaqspro   Wauto_num_clustersrq   constr   r   r   rw   <  s   



	"


zVBxClustering.__call__)r   Trz   )r{   r|   r}   r   r   r   r   r~   r   r6   r   r
   r   rw   r   r   r   r   r   r   &  s8   
 r   c                   @   sZ   e Zd ZU dZdZeed< 				ddejdB de	dB de
dB dedB d	ejf
d
dZdS )OracleClusteringzOracle clusteringTr   Nr+   r,   fileframesr.   c                    s\  |j j\}}}|j}	t||	|d}
|
|d< |
j j\}}}|j dddt||f }|
j dddt||f }
dtj||ftjd }t|||f}t	t
||
D ]-\}\}}t|tj |\}^}}t	|D ]\}}|du rrqi||||f< d||||f< qiqS|du r||dfS | j||d\}}|||f  t fdd	t|D }|||fS )
a  Apply oracle clustering

        Parameters
        ----------
        embeddings : (num_chunks, num_speakers, dimension) array, optional
            Sequence of embeddings. When provided, compute speaker centroids
            based on these embeddings.
        segmentations : (num_chunks, num_frames, num_speakers) array
            Binary segmentations.
        file : AudioFile
        frames : SlidingWindow

        Returns
        -------
        hard_clusters : (num_chunks, num_speakers) array
            Hard cluster assignment (hard_clusters[c, s] = k means that sth speaker
            of cth chunk is assigned to kth cluster)
        soft_clusters : (num_chunks, num_speakers, num_clusters) array
            Soft cluster assignment (the higher soft_clusters[c, s, k], the most likely
            the sth speaker of cth chunk belongs to kth cluster)
        centroids : (num_clusters, dimension), optional
            Clusters centroids if `embeddings` is provided, None otherwise.
        )r   oracle_segmentationsNrF   rG   r   rs   c                    r^   r_   r`   rb   rd   r   r   rf     rg   z-OracleClustering.__call__.<locals>.<listcomp>)r4   r5   sliding_windowr   r'   r6   rL   rM   rt   rN   rO   r   newaxisrC   rl   rm   )r   r+   r,   r   r   rv   rP   r<   rQ   windowr   r;   oracle_num_framesr!   rR   rD   rS   segmentationoraclepermutationjirZ   r[   rp   r   rd   r   rw     sL    

zOracleClustering.__call__rz   )r{   r|   r}   r   r   r   r   r6   r   r
   r   r	   rw   r   r   r   r   r     s$   
 r   c                   @   s   e Zd ZeZeZeZeZdS )
ClusteringN)r{   r|   r}   r   r   r   r   r   r   r   r   r     s
    r   ))r   enumr   numpyr6   einopsr   pyannote.audio.core.ior   pyannote.audio.core.pldar   pyannote.audio.pipelines.utilsr    pyannote.audio.utils.permutationr   pyannote.audio.utils.vbxr   pyannote.corer	   r
   pyannote.pipeliner   pyannote.pipeline.parameterr   r   r   scipy.cluster.hierarchyr   r   scipy.optimizer   scipy.spatial.distancer   sklearn.clusterr   r   r   r   r   r   r   r   r   r   r   <module>   s0    y @CzW