o
    ib                     @   sl   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ G dd dZ	G dd dZ
G dd	 d	ejjZdS )
    Nk_means)HDBSCANc                   @   sV   e Zd ZdZdddZddd	Zd
d Zdd Zdd ZdddZ	dd Z
dd ZdS )SpectralClusterzA spectral clustering mehtod using unnormalized Laplacian of affinity matrix.
    This implementation is adapted from https://github.com/speechbrain/speechbrain.
          I+?c                 C   s   || _ || _|| _d S N)min_num_spksmax_num_spkspval)selfr
   r   r    r   Z/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/campplus/cluster_backend.py__init__   s   
zSpectralCluster.__init__Nc           
      C   sL   |  |}| |}d||j  }| |}| ||\}}| ||}	|	S )Ng      ?)get_sim_mat	p_pruningTget_laplacianget_spec_embscluster_embs)
r   X
oracle_numsim_matprunned_sim_matsym_prund_sim_mat	laplacianemb
num_of_spklabelsr   r   r   __call__   s   


zSpectralCluster.__call__c                 C   s   t jj||}|S r	   )sklearnmetricspairwisecosine_similarity)r   r   Mr   r   r   r   /   s   zSpectralCluster.get_sim_matc                 C   s   |j d | j dk rd|j d  }n| j}td| |j d  }t|j d D ]}t||d d f }|d| }d|||f< q'|S )Nr      g      @r   )shaper   intrangenpargsort)r   Ar   n_elemsilow_indexesr   r   r   r   4   s   zSpectralCluster.p_pruningc                 C   s>   d|t |jd < t jt |dd}t |}|| }|S )Nr   r   axis)r*   diag_indicesr'   sumabsdiag)r   r%   DLr   r   r   r   E   s
   
zSpectralCluster.get_laplacianc                 C   sh   t j|\}}|d ur|}n| || jd | jd  }t|| j }|d d d |f }||fS Nr   )scipylinalgeighgetEigenGapsr
   r   r*   argmax)r   r7   k_oraclelambdaseig_vecsr   lambda_gap_listr   r   r   r   r   L   s   zSpectralCluster.get_spec_embsc                 C   s   t ||\}}}|S r	   r   )r   r   k_r   r   r   r   r   Z   s   zSpectralCluster.cluster_embsc                 C   sD   g }t t|d D ]}t||d  t||  }|| q
|S r8   )r)   lenfloatappend)r   eig_valseig_vals_gap_listr.   gapr   r   r   r<   ^   s
   zSpectralCluster.getEigenGaps)r   r   r   r	   )__name__
__module____qualname____doc__r   r    r   r   r   r   r   r<   r   r   r   r   r      s    


r   c                   @   s$   e Zd ZdZ	dddZdd	 Zd
S )UmapHdbscanz
    Reference:
    - Siqi Zheng, Hongbin Suo. Reformulating Speaker Diarization as Community Detection With
      Emphasis On Topological Structure. ICASSP2022
       <   
   cosinec                 C   s"   || _ || _|| _|| _|| _d S r	   )n_neighborsn_componentsmin_samplesmin_cluster_sizemetric)r   rS   rT   rU   rV   rW   r   r   r   r   m   s
   
zUmapHdbscan.__init__c                 C   sV   dd l m} |j| jdt| j|jd d | jd|}t	| j
| jdd|}|S )Nr   g           )rS   min_distrT   rW   T)rU   rV   allow_single_cluster)
umap.umap_umap_UMAPrS   minrT   r'   rW   fit_transformr   rU   rV   fit_predict)r   r   umapumap_Xr   r   r   r   r    v   s"   zUmapHdbscan.__call__N)rO   rP   rQ   rQ   rR   )rJ   rK   rL   rM   r   r    r   r   r   r   rN   f   s
    
	rN   c                       s2   e Zd ZdZd	 fdd	Zdd Zdd Z  ZS )
ClusterBackendzPerfom clustering for input embeddings and output the labels.
    Args:
        model_dir: A model dir.
        model_config: The model config.
    (\?c                    s(   t    d|i| _t | _t | _d S )N	merge_thr)superr   model_configr   spectral_clusterrN   umap_hdbscan_cluster)r   re   	__class__r   r   r      s   

zClusterBackend.__init__c                 K   s   d|v r|d nd }t |jdksJ d|jd dk r&tj|jd ddS |jd dk s1|d ur8| ||}n| |}|d u rPd	| jv rP| ||| jd	 }|S )
Nr   rX   z5modelscope error: the shape of input should be [N, C]r   rO   r(   )dtypei   re   )rD   r'   r*   zerosrh   ri   rg   merge_by_cos)r   r   paramsrB   r   r   r   r   forward   s   
zClusterBackend.forwardc                 C   s$  |dkr|dks
J 	 |  d }|dkr	 |S g }t|D ]}|||k d}|| qt|dks7J tj|dd}|tjj|ddd }t	||j
}	t|	d}	tt|	|	j}
|	|
 |k ri	 |S tt|D ]!}|| |
d kr|
d ||< qo|| |
d kr||  d8  < qoq)Nr   r   Tr0   )r1   keepdims)maxr)   meanrF   rD   r*   stackr:   normmatmulr   triuunravel_indexr=   r'   )r   r   embscos_thrspk_num
spk_centerr.   spk_embnorm_spk_centeraffinityspksr   r   r   rn      s4   zClusterBackend.merge_by_cos)rd   )rJ   rK   rL   rM   r   rp   rn   __classcell__r   r   rj   r   rc      s
    rc   )r9   torchr!   numpyr*   sklearn.cluster._kmeansr   sklearn.clusterr   r   rN   nnModulerc   r   r   r   r   <module>   s   V!