o
    9wiS                     @   s   d Z ddlZddlmZ ddlmZmZ ddlZddl	m
Z
 ddlmZmZ ddlmZ ddlmZmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlm Z  G dd deZ!G dd de!Z"G dd de!Z#G dd deZ$dS )zClustering pipelines    N)Enum)OptionalTuple)	rearrange)SlidingWindowSlidingWindowFeature)Pipeline)CategoricalIntegerUniform)fclusterlinkage)linear_sum_assignment)cdist)	AudioFile)oracle_segmentation)	permutatec                       s  e Zd Z			d dededef fddZ						d!d
edee dee dee fddZ		d"de	j
dee dee	j
e	j
e	j
f fddZde	j
de	j
fddZ	d#de	j
de	j
de	j
de	j
def
ddZ								d$de	j
dee dee dee dee de	j
fddZ  ZS )%BaseClusteringcosine  Fmetricmax_num_embeddingsconstrained_assignmentc                    s    t    || _|| _|| _d S N)super__init__r   r   r   selfr   r   r   	__class__ `/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/pyannote/audio/pipelines/clustering.pyr   -   s   

zBaseClustering.__init__Nnum_embeddingsnum_clustersmin_clustersmax_clustersc                 C   sp   |p|pd}t dt||}|p|p|}t dt||}||kr-td|dd|dd||kr3|}|||fS )N   zQmin_clusters must be smaller than (or equal to) max_clusters (here: min_clusters=gz and max_clusters=z).)maxmin
ValueError)r   r"   r#   r$   r%   r    r    r!   set_num_clusters8   s   
zBaseClustering.set_num_clusters
embeddingssegmentationsreturnc           	      C   s   t j|jdddk}t jt |dd }t || \}}t|}|| jkrCtt	|}t
| t|d| j }|| }|| }|||f ||fS )a  Filter NaN embeddings and downsample embeddings

        Parameters
        ----------
        embeddings : (num_chunks, num_speakers, dimension) array
            Sequence of embeddings.
        segmentations : (num_chunks, num_frames, num_speakers) array
            Binary segmentations.

        Returns
        -------
        filtered_embeddings : (num_embeddings, dimension) array
        chunk_idx : (num_embeddings, ) array
        speaker_idx : (num_embeddings, ) array
        r&   axisr      N)npsumdataanyisnanwherelenr   listrangerandomshufflesorted)	r   r,   r-   activevalid	chunk_idxspeaker_idxr"   indicesr    r    r!   filter_embeddingsO   s   

z BaseClustering.filter_embeddingssoft_clustersc                 C   s~   t j|t |d}|j\}}}dt j||ft jd }t|D ]\}}t|dd\}}	t||	D ]
\}
}||||
f< q1q |S )N)nandtypeT)maximize)	r2   
nan_to_numnanminshapeonesint8	enumerater   zip)r   rD   
num_chunksnum_speakersr#   hard_clustersccostspeakersclustersskr    r    r!   constrained_argmaxw   s   z!BaseClustering.constrained_argmaxtrain_chunk_idxtrain_speaker_idxtrain_clustersconstrainedc                    s   t  d }|j\}}}	|||f t  fddt|D }
ttt|d|
| jdd||d}d| }|r?| |}nt j	|dd	}|||
fS )
a  Assign embeddings to the closest centroid

        Cluster centroids are computed as the average of the train embeddings
        previously assigned to them.

        Parameters
        ----------
        embeddings : (num_chunks, num_speakers, dimension)-shaped array
            Complete set of embeddings.
        train_chunk_idx : (num_embeddings,)-shaped array
        train_speaker_idx : (num_embeddings,)-shaped array
            Indices of subset of embeddings used for "training".
        train_clusters : (num_embedding,)-shaped array
            Clusters of the above subset
        constrained : bool, optional
            Use constrained_argmax, instead of (default) argmax.

        Returns
        -------
        soft_clusters : (num_chunks, num_speakers, num_clusters)-shaped array
        hard_clusters : (num_chunks, num_speakers)-shaped array
        centroids : (num_clusters, dimension)-shaped array
            Clusters centroids
        r&   c                    "   g | ]}t j |k d dqS r   r/   r2   mean.0rY   r]   train_embeddingsr    r!   
<listcomp>       z4BaseClustering.assign_embeddings.<locals>.<listcomp>zc s d -> (c s) dr   z(c s) k -> c s k)rT   rX   r1   r/   )
r2   r(   rL   vstackr:   r   r   r   rZ   argmax)r   r,   r[   r\   r]   r^   r#   rQ   rR   	dimension	centroidse2k_distancerD   rS   r    re   r!   assign_embeddings   s.   #

z BaseClustering.assign_embeddingsc                 K   s   | j ||d\}}}	|j\}
}| j|
|||d\}}}|dk rD|j\}}}tj||ftjd}t||df}tj|ddd}|||fS | j||||d	}| j	|||	|| j
d
\}}}|||fS )a  Apply clustering

        Parameters
        ----------
        embeddings : (num_chunks, num_speakers, dimension) array
            Sequence of embeddings.
        segmentations : (num_chunks, num_frames, num_speakers) array
            Binary segmentations.
        num_clusters : int, optional
            Number of clusters, when known. Default behavior is to use
            internal threshold hyper-parameter to decide on the number
            of clusters.
        min_clusters : int, optional
            Minimum number of clusters. Has no effect when `num_clusters` is provided.
        max_clusters : int, optional
            Maximum number of clusters. Has no effect when `num_clusters` is provided.

        Returns
        -------
        hard_clusters : (num_chunks, num_speakers) array
            Hard cluster assignment (hard_clusters[c, s] = k means that sth speaker
            of cth chunk is assigned to kth cluster)
        soft_clusters : (num_chunks, num_speakers, num_clusters) array
            Soft cluster assignment (the higher soft_clusters[c, s, k], the most likely
            the sth speaker of cth chunk belongs to kth cluster)
        centroids : (num_clusters, dimension) array
            Centroid vectors of each cluster
        r-   )r#   r$   r%   r1   rG   r&   r   Tr0   keepdims)r#   )r^   )rC   rL   r+   r2   zerosrN   rM   rb   clusterro   r   )r   r,   r-   r#   r$   r%   kwargsrf   r[   r\   r"   _rQ   rR   rS   rD   rm   r]   r    r    r!   __call__   s>   &


zBaseClustering.__call__)r   r   F)NNNr   )FNNNN)__name__
__module____qualname__strintboolr   r   r+   r2   ndarrayr   r   rC   rZ   ro   rw   __classcell__r    r    r   r!   r   ,   sz    

(
Kr   c                
       s\   e Zd ZdZdejdfdededef fddZ			dd
ej
dededee fddZ  ZS )AgglomerativeClusteringa  Agglomerative clustering

    Parameters
    ----------
    metric : {"cosine", "euclidean", ...}, optional
        Distance metric to use. Defaults to "cosine".

    Hyper-parameters
    ----------------
    method : {"average", "centroid", "complete", "median", "single", "ward"}
        Linkage method.
    threshold : float in range [0.0, 2.0]
        Clustering threshold.
    min_cluster_size : int in range [1, 20]
        Minimum cluster size
    r   Fr   r   r   c                    s<   t  j|||d tdd| _tg d| _tdd| _d S )N)r   r   r   g        g       @)averagecentroidcompletemediansinglewardweightedr&      )r   r   r   	thresholdr	   methodr
   min_cluster_sizer   r   r    r!   r   -  s   z AgglomerativeClustering.__init__Nr,   r$   r%   r#   c                    s  j \}}t| jtdtd| }|dkrtjdtjdS | jdkrS| j	dv rStj
ddd tjjd	d
d W d   n1 sEw   Y  t| j	dd}n	t| j	| jd}t|| jddd  tj d
d\}	}
|	|
|k }t|}||k r|}n||kr|}|dur%||kr%t|}t|d |dddf< |d }d}tt|dddf | j D ]?}||df }||k rqt||ddd  tj d
d\}	}
|	|
|k }t|}t|| t|| k r|}|}||kr nq||kr%t||ddd  tj d
d\}	}
|	|
|k }t|}td| d| d |dkr2d dd<  S |	|
|k  }t|dkrA S t fdd|D }t fdd|D }t||| jd}ttj|ddD ]\}}||   || k< qltj d
d\}  S )a@  

        Parameters
        ----------
        embeddings : (num_embeddings, dimension) array
            Embeddings
        min_clusters : int
            Minimum number of clusters
        max_clusters : int
            Maximum number of clusters
        num_clusters : int, optional
            Actual number of clusters. Default behavior is to estimate it based
            on values provided for `min_clusters`,  `max_clusters`, and `threshold`.

        Returns
        -------
        clusters : (num_embeddings, ) array
            0-indexed cluster indices.
        r&   g?)r&   rG   r   )r   r   r   ignore)divideinvalidTrq   N	euclidean)r   r   distance)	criterion)return_countsr1      zFound only z& clusters. Using a smaller value than z# for `min_cluster_size` might help.r   c                    r_   r`   ra   )rd   large_krW   r,   r    r!   rg     rh   z3AgglomerativeClustering.cluster.<locals>.<listcomp>c                    r_   r`   ra   )rd   small_kr   r    r!   rg     rh   ri   r/   )return_inverse)rL   r)   r   r(   roundr2   rs   uint8r   r   errstatelinalgnormr   r   r   uniquer8   copyarangeargsortabsprintrj   r   rO   argmin)r   r,   r$   r%   r#   r"   rv   r   
dendrogramcluster_uniquecluster_countslarge_clustersnum_large_clusters_dendrogrambest_iterationbest_num_large_clusters	iterationnew_cluster_sizesmall_clusterslarge_centroidssmall_centroidscentroids_cdistr   r   r    r   r!   rt   A  s   



&

zAgglomerativeClustering.clusterr   )ry   rz   r{   __doc__r2   infr|   r}   r~   r   r   r   rt   r   r    r    r   r!   r     s,    r   c                   @   sL   e Zd ZdZ				d
deej dee dee dee	 dejf
dd	Z
dS )OracleClusteringzOracle clusteringNr,   r-   fileframesr.   c                    s\  |j j\}}}|j}	t||	|d}
|
|d< |
j j\}}}|j dddt||f }|
j dddt||f }
dtj||ftjd }t|||f}t	t
||
D ]-\}\}}t|tj |\}^}}t	|D ]\}}|du rrqi||||f< d||||f< qiqS|du r||dfS | j||d\}}|||f  t fdd	t|D }|||fS )
a  Apply oracle clustering

        Parameters
        ----------
        embeddings : (num_chunks, num_speakers, dimension) array, optional
            Sequence of embeddings. When provided, compute speaker centroids
            based on these embeddings.
        segmentations : (num_chunks, num_frames, num_speakers) array
            Binary segmentations.
        file : AudioFile
        frames : SlidingWindow

        Returns
        -------
        hard_clusters : (num_chunks, num_speakers) array
            Hard cluster assignment (hard_clusters[c, s] = k means that sth speaker
            of cth chunk is assigned to kth cluster)
        soft_clusters : (num_chunks, num_speakers, num_clusters) array
            Soft cluster assignment (the higher soft_clusters[c, s, k], the most likely
            the sth speaker of cth chunk belongs to kth cluster)
        centroids : (num_clusters, dimension), optional
            Clusters centroids if `embeddings` is provided, None otherwise.
        )r   oracle_segmentationsNrF   rG   g      ?rp   c                    r_   r`   ra   rc   re   r    r!   rg   &  rh   z-OracleClustering.__call__.<locals>.<listcomp>)r4   rL   sliding_windowr   r)   r2   rM   rN   rs   rO   rP   r   newaxisrC   rj   r:   )r   r,   r-   r   r   ru   rQ   
num_framesrR   windowr   rv   oracle_num_framesr#   rS   rD   rT   segmentationoraclepermutationjir[   r\   rm   r    re   r!   rw     sL    

zOracleClustering.__call__rx   )ry   rz   r{   r   r   r2   r   r   r   r   rw   r    r    r    r!   r     s"    r   c                   @   s   e Zd ZeZeZdS )
ClusteringN)ry   rz   r{   r   r   r    r    r    r!   r   /  s    r   )%r   r;   enumr   typingr   r   numpyr2   einopsr   pyannote.corer   r   pyannote.pipeliner   pyannote.pipeline.parameterr	   r
   r   scipy.cluster.hierarchyr   r   scipy.optimizer   scipy.spatial.distancer   pyannote.audio.core.ior   pyannote.audio.pipelines.utilsr    pyannote.audio.utils.permutationr   r   r   r   r   r    r    r    r!   <module>   s*    p @U