o
    ei0                     @   s  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddlm
Z ddlmZ ejd zddlZddlmZ dd	lmZ dd
lmZ W n- eyy   dZed7 Zed7 Zed7 Zed7 Zed7 Zed7 Zed7 Zed7 Zeew dd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Z d$d% Z!d&d' Z"d(d) Z#d*d+ Z$d,d- Z%	.	/	/dAd0d1Z&	.			2dBd3d4Z'G d5d6 d6eZ(G d7d8 d8Z)d9d: Z*	<dCd=d>Z+dCd?d@Z,dS )Du  
This script contains basic functions used for speaker diarization.
This script has an optional dependency on open source scikit-learn (sklearn) library.
A few scikit-learn functions are modified in this script as per requirement.

Reference
---------
This code is written using the following:

- Von Luxburg, U. A tutorial on spectral clustering. Stat Comput 17, 395–416 (2007).
  https://doi.org/10.1007/s11222-007-9033-z

- https://github.com/scikit-learn/scikit-learn/blob/0fb307bf3/sklearn/cluster/_spectral.py

- https://github.com/tango4j/Auto-Tuning-Spectral-Clustering/blob/master/spectral_opt.py

Authors
 * Nauman Dawalatabad 2020
    N)sparse)connected_components)	laplacian)eigsh  )SpectralClustering)k_means)kneighbors_graphzFThe optional dependency scikit-learn (sklearn) is used in this module
zCannot import scikit-learn. 
z%Please follow the below instructions
z=============================
zUsing pip:
zpip install scikit-learn
z"================================ 
zUsing conda:
zconda install scikit-learnc                 C   sX   g }t | ddd}|D ]}|dd }|| qW d   |S 1 s%w   Y  |S )zReads and returns RTTM in list format.

    Arguments
    ---------
    rttm_file_path : str
        Path to the RTTM file to be read.

    Returns
    -------
    rttm : list
        List containing rows of RTTM file.
    rutf-8encodingN)openappend)rttm_file_pathrttmflineentry r   `/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/processing/diarization.py	read_rttm4   s   
r   c                 C   s   t | }ttdd |}g }d}t|dddR}|D ]/}|d}	|	d }
|
|vrJ|
tt|| d	g}||
 d|}|	d
|  |d7 }qdtt|| d	g}d|}|	d
|  W d   dS 1 smw   Y  dS )zWrite the final DERs for individual recording.

    Arguments
    ---------
    ref_rttm : str
        Reference RTTM file.
    DER : array
        Array containing DER values of each recording.
    out_der_file : str
        File to write the DERs.
    c                 S   s
   |  dS )Nz	SPKR-INFO)
startswithxr   r   r   <lambda>V   s   
 z!write_ders_file.<locals>.<lambda>r   wr   r          %s
zOVERALL N)
r   listfilterr   splitstrroundr   joinwrite)ref_rttmDERout_der_filer   	spkr_inforec_id_listcountr   rowarec_idr
   line_strr   r   r   write_ders_fileI   s&   



"r3   c           
      C   s   | d g}g }| D ]}|d  |r|| q	|| }t|dddd}tj|ddtjd}|D ]}	||	 q1W d	   d	S 1 sDw   Y  d	S )
a  Prepares csv for a given recording ID.

    Arguments
    ---------
    full_diary_csv : csv
        Full csv containing all the recordings
    rec_id : str
        The recording ID for which csv has to be prepared
    out_csv_file : str
        Path of the output csv file.
    r   r    r   )modenewliner   ,")	delimiter	quotecharquotingN)r   r   r   csvwriterQUOTE_MINIMALwriterow)
full_diary_csvr1   out_csv_fileout_csv_headr   r/   out_csvcsv_file
csv_writerr
   r   r   r   prepare_subset_csvj   s   


"rF   c                 C   s   || krdS dS )a  Returns True if segments are overlapping.

    Arguments
    ---------
    end1 : float
        End time of the first segment.
    start2 : float
        Start time of the second segment.

    Returns
    -------
    overlapped : bool
        True of segments overlapped else False.

    Example
    -------
    >>> from speechbrain.processing import diarization as diar
    >>> diar.is_overlapped(5.5, 3.4)
    True
    >>> diar.is_overlapped(5.5, 6.4)
    False
    FTr   )end1start2r   r   r   is_overlapped   s   rI   c                 C   s   g }| d }d}t dt| D ]4}| | }t|d |d r<|d |d kr<|d |d< |t| d kr;d}|| q|| |}q|du rO|| d  |S )a  Merge adjacent sub-segs from the same speaker.

    Arguments
    ---------
    lol : list of list
        Each list contains [rec_id, sseg_start, sseg_end, spkr_id].

    Returns
    -------
    new_lol : list of list
        new_lol contains adjacent segments merged from the same speaker ID.

    Example
    -------
    >>> from speechbrain.processing import diarization as diar
    >>> lol=[['r1', 5.5, 7.0, 's1'],
    ... ['r1', 6.5, 9.0, 's1'],
    ... ['r1', 8.0, 11.0, 's1'],
    ... ['r1', 11.5, 13.0, 's2'],
    ... ['r1', 14.0, 15.0, 's2'],
    ... ['r1', 14.5, 15.0, 's1']]
    >>> diar.merge_ssegs_same_speaker(lol)
    [['r1', 5.5, 11.0, 's1'], ['r1', 11.5, 13.0, 's2'], ['r1', 14.0, 15.0, 's2'], ['r1', 14.5, 15.0, 's1']]
    r   Fr   r       Tr   rangelenrI   r   )lolnew_lolssegflagi	next_ssegr   r   r   merge_ssegs_same_speaker   s    "

rT   c                 C   s   g }| d }t dt| D ]^}| | }t|d |d rR|d |d  }|d |d  |d< |d |d  |d< t|dkrD|| n|d |krO|| |}qt|dkr^|| n|d |kri|| |}q|| |S )a	  Distributes the overlapped speech equally among the adjacent segments
    with different speakers.

    Arguments
    ---------
    lol : list of list
        It has each list structure as [rec_id, sseg_start, sseg_end, spkr_id].

    Returns
    -------
    new_lol : list of list
        It contains the overlapped part equally divided among the adjacent
        segments with different speaker IDs.

    Example
    -------
    >>> from speechbrain.processing import diarization as diar
    >>> lol = [['r1', 5.5, 9.0, 's1'],
    ... ['r1', 8.0, 11.0, 's2'],
    ... ['r1', 11.5, 13.0, 's2'],
    ... ['r1', 12.0, 15.0, 's1']]
    >>> diar.distribute_overlap(lol)
    [['r1', 5.5, 8.5, 's1'], ['r1', 8.5, 11.0, 's2'], ['r1', 11.5, 12.5, 's2'], ['r1', 12.5, 15.0, 's1']]
    r   r   r    g       @r   rK   )rN   rO   rP   rR   rS   overlapr   r   r   distribute_overlap   s(   


rV   c           	      C   s   g }| d d }| D ]'}d|dt t|d dt t|d |d  ddd|d ddg
}|| q
t|d	d
d}|D ]}d|}|d|  q<W d   dS 1 sVw   Y  dS )a  Writes the segment list in RTTM format (A standard NIST format).

    Arguments
    ---------
    segs_list : list of list
        Each list contains [rec_id, sseg_start, sseg_end, spkr_id].
    out_rttm_file : str
        Path of the output RTTM file.
    r   SPEAKER0r      r    z<NA>rJ   r   r   r   r   r!   N)r%   r&   r   r   r'   r(   )		segs_listout_rttm_filer   r1   segnew_rowr   r/   r2   r   r   r   
write_rttm%  s*   

"r^   c           
      C   s   | j d }t| r|  } tj|td}tj|td}d||< t|D ]B}| }tj	|||d || kr; |S t
|d }|d |D ]}t| rY| |   }	n| | }	tj	||	|d qIq$|S )a?  Find the largest graph connected components that contains one
    given node.

    Arguments
    ---------
    graph : array-like, shape: (n_samples, n_samples)
        Adjacency matrix of the graph, non-zero weight means an edge
        between the nodes.
    node_id : int
        The index of the query node of the graph.

    Returns
    -------
    connected_components_matrix : array-like
        shape - (n_samples,).
        An array of bool value indicating the indexes of the nodes belonging
        to the largest connected components of the given query node.
    r   )dtypeT)outF)shaper   issparsetocsrnpzerosboolrL   sum
logical_orwherefilltoarrayravel)
graphnode_idn_nodeconnected_nodesnodes_to_explore_last_num_componentindicesrR   	neighborsr   r   r   _graph_connected_componentJ  s*   

	

rv   c                 C   s6   t | rt| \}}|dkS t| d | jd kS )ar  Return whether the graph is connected (True) or Not (False)

    Arguments
    ---------
    graph : array-like or sparse matrix, shape: (n_samples, n_samples)
        Adjacency matrix of the graph, non-zero weight means an edge between the nodes.

    Returns
    -------
    is_connected : bool
        True means the graph is fully connected and False means not.
    r   r   )r   
isspmatrixr   rv   rg   ra   )rm   n_connected_componentsrr   r   r   r   _graph_is_connectedt  s   
ry   c                 C   s   | j d }t| s|r|| jdd|d < | S |  } |r)| j| jk}|| j|< t	| j| j j
}|dkr=|  } | S |  } | S )aC  
    Set the diagonal of the laplacian matrix and convert it to a sparse
    format well suited for eigenvalue decomposition.

    Arguments
    ---------
    laplacian : array or sparse matrix
        The graph laplacian.
    value : float
        The value of the diagonal.
    norm_laplacian : bool
        Whether the value of the diagonal should be changed or not.

    Returns
    -------
    laplacian : array or sparse matrix
        An array of matrix in a form that is well suited to fast eigenvalue
        decomposition, depending on the bandwidth of the matrix.
    r   Nr      )ra   r   rw   flattocoor/   coldatard   uniquesizetodiarc   )r   valuenorm_laplaciann_nodesdiag_idxn_diagsr   r   r   	_set_diag  s   


r   c                 C   sJ   t jt | dd}t | t| jd |f }| |ddt jf 9 } | S )a  Modify the sign of vectors for reproducibility. Flips the sign of
    elements of all the vectors (rows of u) such that the absolute
    maximum element of each vector is positive.

    Arguments
    ---------
    u : ndarray
        Array with vectors as its rows.

    Returns
    -------
    u_flipped : ndarray
        Array with the sign flipped vectors as its rows. The same shape as `u`.
    r   axisr   N)rd   argmaxabssignrL   ra   newaxis)umax_abs_rowssignsr   r   r   _deterministic_vector_sign_flip  s   r   c                 C   sR   | du s	| t ju rt jjjS t| tjrt j| S t| t jjr#| S td|  )a  Turn seed into a np.random.RandomState instance.

    Arguments
    ---------
    seed : None | int | instance of RandomState
        If seed is None, return the RandomState singleton used by np.random.
        If seed is an int, return a new RandomState instance seeded with seed.
        If seed is already a RandomState instance, return it.
        Otherwise raise ValueError.

    Returns
    -------
    np.random.RandomState
    Nz:%r cannot be used to seed a np.random.RandomState instance)	rd   randommtrand_rand
isinstancenumbersIntegralRandomState
ValueError)seedr   r   r   _check_random_state  s   
r   c                 C   s"   d}|D ]
}| |v r|d7 }q|S )au  
    Returns actual number of speakers in a recording from the ground-truth.
    This can be used when the condition is oracle number of speakers.

    Arguments
    ---------
    rec_id : str
        Recording ID for which the number of speakers have to be obtained.
    spkr_info : list
        Header of the RTTM file. Starting with `SPKR-INFO`.

    Returns
    -------
    num_spkrs : int

    Example
    -------
    >>> from speechbrain.processing import diarization as diar
    >>> spkr_info = ['SPKR-INFO ES2011a 0 <NA> <NA> <NA> unknown ES2011a.A <NA> <NA>',
    ... 'SPKR-INFO ES2011a 0 <NA> <NA> <NA> unknown ES2011a.B <NA> <NA>',
    ... 'SPKR-INFO ES2011a 0 <NA> <NA> <NA> unknown ES2011a.C <NA> <NA>',
    ... 'SPKR-INFO ES2011a 0 <NA> <NA> <NA> unknown ES2011a.D <NA> <NA>',
    ... 'SPKR-INFO ES2011b 0 <NA> <NA> <NA> unknown ES2011b.A <NA> <NA>',
    ... 'SPKR-INFO ES2011b 0 <NA> <NA> <NA> unknown ES2011b.B <NA> <NA>',
    ... 'SPKR-INFO ES2011b 0 <NA> <NA> <NA> unknown ES2011b.C <NA> <NA>']
    >>> diar.get_oracle_num_spkrs('ES2011a', spkr_info)
    4
    >>> diar.get_oracle_num_spkrs('ES2011b', spkr_info)
    3
    r   r   r   )r1   r,   	num_spkrsr   r   r   r   get_oracle_num_spkrs  s   r      Tc           	      C   s   |r|d }t | std t| |dd\}}t|d|}|d9 }t||ddd\}}|j|d	d }|r:|| }t|}|rG|d| jS |d	| jS )
a  Returns spectral embeddings.

    Arguments
    ---------
    adjacency : array-like or sparse graph
        shape - (n_samples, n_samples)
        The adjacency matrix of the graph to embed.
    n_components : int
        The dimension of the projection subspace.
    norm_laplacian : bool
        If True, then compute normalized Laplacian.
    drop_first : bool
        Whether to drop the first eigenvector.

    Returns
    -------
    embedding : array
        Spectral embeddings for each sample.

    Example
    -------
    >>> import numpy as np
    >>> from speechbrain.processing import diarization as diar
    >>> affinity = np.array([[1, 1, 1, 0.5, 0, 0, 0, 0, 0, 0.5],
    ... [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ... [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ... [0.5, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0.5, 0, 0, 0, 0, 0, 1, 1, 1, 1]])
    >>> embs = diar.spectral_embedding_sb(affinity, 3)
    >>> # Notice similar embeddings
    >>> print(np.around(embs , decimals=3))
    [[ 0.075  0.244  0.285]
     [ 0.083  0.356 -0.203]
     [ 0.083  0.356 -0.203]
     [ 0.26  -0.149  0.154]
     [ 0.29  -0.218 -0.11 ]
     [ 0.29  -0.218 -0.11 ]
     [-0.198 -0.084 -0.122]
     [-0.198 -0.084 -0.122]
     [-0.198 -0.084 -0.122]
     [-0.167 -0.044  0.316]]
    r   zJGraph is not fully connected, spectral embedding may not work as expected.T)normedreturn_diagr   g      ?LM)ksigmawhichN)ry   warningswarncsgraph_laplacianr   r   Tr   )		adjacencyn_componentsr   
drop_firstr   ddvalsdiffusion_map	embeddingr   r   r   spectral_embedding_sb  s0   6

r   
   c                 C   s@   t |}|du r
|n|}t| |dd}t||||d\}}}|S )a  Performs spectral clustering.

    Arguments
    ---------
    affinity : matrix
        Affinity matrix.
    n_clusters : int
        Number of clusters for kmeans.
    n_components : int
        Number of components to retain while estimating spectral embeddings.
    random_state : int
        A pseudo random number generator used by kmeans.
    n_init : int
        Number of time the k-means algorithm will be run with different centroid seeds.

    Returns
    -------
    labels : array
        Cluster label for each sample.

    Example
    -------
    >>> import numpy as np
    >>> from speechbrain.processing import diarization as diar
    >>> affinity = np.array([[1, 1, 1, 0.5, 0, 0, 0, 0, 0, 0.5],
    ... [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ... [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ... [0.5, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0.5, 0, 0, 0, 0, 0, 1, 1, 1, 1]])
    >>> labs = diar.spectral_clustering_sb(affinity, 3)
    >>> # print (labs) # [2 2 2 1 1 1 0 0 0 0]
    NF)r   r   )random_staten_init)r   r   r   )affinity
n_clustersr   r   r   mapsrr   labelsr   r   r   spectral_clustering_sbk  s   ,r   c                   @   s   e Zd ZdZdddZdS )Spec_Clusterz9Performs spectral clustering using sklearn on embeddings.r   c                 C   s4   t ||dd}d||j  | _t| j| jd| _| S )a  
        Performs spectral clustering using sklearn on embeddings.

        Arguments
        ---------
        X : array (n_samples, n_features)
            Embeddings to be clustered.
        n_neighbors : int
            Number of neighbors in estimating affinity matrix.

        Returns
        -------
        Spec_Cluster

        Reference
        ---------
        https://github.com/scikit-learn/scikit-learn/blob/0fb307bf3/sklearn/cluster/_spectral.py
        T)n_neighborsinclude_self      ?)r   )r	   r   affinity_matrix_r   r   labels_)selfXr   connectivityr   r   r   
perform_sc  s   zSpec_Cluster.perform_scN)r   )__name__
__module____qualname____doc__r   r   r   r   r   r     s    r   c                   @   sT   e Zd ZdZdddZdd Zdd	 Zd
d Zdd ZdddZ	dd Z
dd ZdS )Spec_Clust_unormu
  
    This class implements the spectral clustering with unnormalized affinity matrix.
    Useful when affinity matrix is based on cosine similarities.

    Arguments
    ---------
    min_num_spkrs : int
        Minimum number of expected speakers.
    max_num_spkrs : int
        Maximum number of expected speakers.

    Reference
    ---------
    Von Luxburg, U. A tutorial on spectral clustering. Stat Comput 17, 395–416 (2007).
    https://doi.org/10.1007/s11222-007-9033-z

    Example
    -------
    >>> from speechbrain.processing import diarization as diar
    >>> clust = diar.Spec_Clust_unorm(min_num_spkrs=2, max_num_spkrs=10)
    >>> emb = [[ 2.1, 3.1, 4.1, 4.2, 3.1],
    ... [ 2.2, 3.1, 4.2, 4.2, 3.2],
    ... [ 2.0, 3.0, 4.0, 4.1, 3.0],
    ... [ 8.0, 7.0, 7.0, 8.1, 9.0],
    ... [ 8.1, 7.1, 7.2, 8.1, 9.2],
    ... [ 8.3, 7.4, 7.0, 8.4, 9.0],
    ... [ 0.3, 0.4, 0.4, 0.5, 0.8],
    ... [ 0.4, 0.3, 0.6, 0.7, 0.8],
    ... [ 0.2, 0.3, 0.2, 0.3, 0.7],
    ... [ 0.3, 0.4, 0.4, 0.4, 0.7],]
    >>> # Estimating similarity matrix
    >>> sim_mat = clust.get_sim_mat(emb)
    >>> print (np.around(sim_mat[5:,5:], decimals=3))
    [[1.    0.957 0.961 0.904 0.966]
     [0.957 1.    0.977 0.982 0.997]
     [0.961 0.977 1.    0.928 0.972]
     [0.904 0.982 0.928 1.    0.976]
     [0.966 0.997 0.972 0.976 1.   ]]
    >>> # Pruning
    >>> pruned_sim_mat = clust.p_pruning(sim_mat, 0.3)
    >>> print (np.around(pruned_sim_mat[5:,5:], decimals=3))
    [[1.    0.    0.    0.    0.   ]
     [0.    1.    0.    0.982 0.997]
     [0.    0.977 1.    0.    0.972]
     [0.    0.982 0.    1.    0.976]
     [0.    0.997 0.    0.976 1.   ]]
    >>> # Symmetrization
    >>> sym_pruned_sim_mat = 0.5 * (pruned_sim_mat + pruned_sim_mat.T)
    >>> print (np.around(sym_pruned_sim_mat[5:,5:], decimals=3))
    [[1.    0.    0.    0.    0.   ]
     [0.    1.    0.489 0.982 0.997]
     [0.    0.489 1.    0.    0.486]
     [0.    0.982 0.    1.    0.976]
     [0.    0.997 0.486 0.976 1.   ]]
    >>> # Laplacian
    >>> laplacian = clust.get_laplacian(sym_pruned_sim_mat)
    >>> print (np.around(laplacian[5:,5:], decimals=3))
    [[ 1.999  0.     0.     0.     0.   ]
     [ 0.     2.468 -0.489 -0.982 -0.997]
     [ 0.    -0.489  0.975  0.    -0.486]
     [ 0.    -0.982  0.     1.958 -0.976]
     [ 0.    -0.997 -0.486 -0.976  2.458]]
    >>> # Spectral Embeddings
    >>> spec_emb, num_of_spk = clust.get_spec_embs(laplacian, 3)
    >>> print(num_of_spk)
    3
    >>> # Clustering
    >>> clust.cluster_embs(spec_emb, num_of_spk)
    >>> # print (clust.labels_) # [0 0 0 2 2 2 1 1 1 1]
    >>> # Complete spectral clustering
    >>> clust.do_spec_clust(emb, k_oracle=3, p_val=0.3)
    >>> # print(clust.labels_) # [0 0 0 2 2 2 1 1 1 1]
    r    r   c                 C   s   || _ || _d S )Nmin_num_spkrsmax_num_spkrs)r   r   r   r   r   r   __init__  s   
zSpec_Clust_unorm.__init__c           
      C   sN   |  |}| ||}d||j  }| |}| ||\}}	| ||	 dS )a_  Function for spectral clustering.

        Arguments
        ---------
        X : array
            (n_samples, n_features).
            Embeddings extracted from the model.
        k_oracle : int
            Number of speakers (when oracle number of speakers).
        p_val : float
            p percent value to prune the affinity matrix.
        r   N)get_sim_mat	p_pruningr   get_laplacianget_spec_embscluster_embs)
r   r   k_oraclep_valsim_matpruned_sim_matsym_pruned_sim_matr   emb
num_of_spkr   r   r   do_spec_clust   s   

zSpec_Clust_unorm.do_spec_clustc                 C   s   t jj||}|S )a  Returns the similarity matrix based on cosine similarities.

        Arguments
        ---------
        X : array
            (n_samples, n_features).
            Embeddings extracted from the model.

        Returns
        -------
        M : array
            (n_samples, n_samples).
            Similarity matrix with cosine similarities between each pair of embedding.
        )sklearnmetricspairwisecosine_similarity)r   r   Mr   r   r   r   ?  s   zSpec_Clust_unorm.get_sim_matc                 C   s\   t d| |jd  }t|jd D ]}t||ddf }|d| }d|||f< q|S )a  Refine the affinity matrix by zeroing less similar values.

        Arguments
        ---------
        A : array
            (n_samples, n_samples).
            Affinity matrix.
        pval : float
            p-value to be retained in each row of the affinity matrix.

        Returns
        -------
        A : array
            (n_samples, n_samples).
            pruned affinity matrix based on p_val.
        r   r   N)intra   rL   rd   argsort)r   Apvaln_elemsrR   low_indexesr   r   r   r   R  s   zSpec_Clust_unorm.p_pruningc                 C   s>   d|t |jd < t jt |dd}t |}|| }|S )a6  Returns the un-normalized laplacian for the given affinity matrix.

        Arguments
        ---------
        M : array
            (n_samples, n_samples)
            Affinity matrix.

        Returns
        -------
        L : array
            (n_samples, n_samples)
            Laplacian matrix.
        r   r   r   )rd   diag_indicesra   rg   r   diag)r   r   DLr   r   r   r   o  s
   
zSpec_Clust_unorm.get_laplacianrY   c                 C   s   t j|\}}|dur|}n'| |d| j }|r*t|dt| jt| ndd }|| j	k r6| j	}|ddd|f }||fS )a  Returns spectral embeddings and estimates the number of speakers
        using maximum Eigen gap.

        Arguments
        ---------
        L : array (n_samples, n_samples)
            Laplacian matrix.
        k_oracle : int
            Number of speakers when the condition is oracle number of speakers,
            else None.

        Returns
        -------
        emb : array (n_samples, n_components)
            Spectral embedding for each sample with n Eigen components.
        num_of_spk : int
            Estimated number of speakers. If the condition is set to the oracle
            number of speakers then returns k_oracle.
        Nr   r   r    )
scipylinalgeighgetEigenGapsr   rd   r   minrM   r   )r   r   r   lambdaseig_vecsr   lambda_gap_listr   r   r   r   r     s"   

zSpec_Clust_unorm.get_spec_embsc                 C   s   t ||\}| _}dS )a  Clusters the embeddings using kmeans.

        Arguments
        ---------
        emb : array (n_samples, n_components)
            Spectral embedding for each sample with n Eigen components.
        k : int
            Number of clusters to kmeans.
        N)r   r   )r   r   r   rr   r   r   r   r     s   
zSpec_Clust_unorm.cluster_embsc                 C   sD   g }t t|d D ]}t||d  t||  }|| q
|S )a&  Returns the difference (gaps) between the Eigen values.

        Arguments
        ---------
        eig_vals : list
            List of eigen values

        Returns
        -------
        eig_vals_gap_list : list
            List of differences (gaps) between adjacent Eigen values.
        r   )rL   rM   floatr   )r   eig_valseig_vals_gap_listrR   gapr   r   r   r     s
   zSpec_Clust_unorm.getEigenGapsN)r    r   )rY   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s    
J
-r   c                 C   s  |dkrt ddd}|}|| j|| |j}	nt|dddd}|| j| |j}	| j}
g }t|	jd	 D ]3}|d
 t	|	|  }|
| }|
d
d}t	|d	 }t|d }t|d }||||g}|| q6|jdd d t|}t|}t|| dS )a  Performs spectral clustering on embeddings. This function calls specific
    clustering algorithms as per affinity.

    Arguments
    ---------
    diary_obj : StatObject_SB type
        Contains embeddings in diary_obj.stat1 and segment IDs in diary_obj.segset.
    out_rttm_file : str
        Path of the output RTTM file.
    rec_id : str
        Recording ID for the recording under processing.
    k : int
        Number of speaker (None, if it has to be estimated).
    pval : float
        `pval` for pruning affinity matrix.
    affinity_type : str
        Type of similarity to be used to get affinity matrix (cos or nn).
    n_neighbors : int
        Number of neighbors to use for clustering
    cosr    r   r   kmeansr   nearest_neighbors)r   assign_labelsr   r   r   rr   r   c                 S      t | d S Nr   r   r   r   r   r   r         z$do_spec_clustering.<locals>.<lambda>keyN)r   r   stat1r   r   r   segsetrL   ra   r%   rsplitr   r   sortrT   rV   r^   )	diary_objr[   r1   r   r   affinity_typer   	clust_objr   r   
subseg_idsrN   rR   spkr_idsub_segsplitted
sseg_startsseg_endr0   r   r   r   do_spec_clustering  s8   r  rY   333333?c                 C   s  |dur|}n&t ddd}|| j}|||}d||j  }	||	}
||
|\}}t| j|\}}}| j}g }t	|j
d D ]3}|d t||  }|| }|dd}t|d }t|d }t|d }||||g}|| qB|jd	d
 d t|}t|}t|| dS )ax  Performs kmeans clustering on embeddings.

    Arguments
    ---------
    diary_obj : StatObject_SB type
        Contains embeddings in diary_obj.stat1 and segment IDs in diary_obj.segset.
    out_rttm_file : str
        Path of the output RTTM file.
    rec_id : str
        Recording ID for the recording under processing.
    k_oracle : int
        Number of speaker (None, if it has to be estimated).
    p_val : float
        `pval` for pruning affinity matrix. Used only when number of speakers
        are unknown. Note that this is just for experiment. Prefer Spectral clustering
        for better clustering results.
    Nr    r   r   r   r   rr   r   c                 S   r   r   r   r   r   r   r   r   ^  r   z&do_kmeans_clustering.<locals>.<lambda>r   )r   r   r   r   r   r   r   r   r   rL   ra   r%   r   r   r   r  rT   rV   r^   )r  r[   r1   r   r   r   r  r   r   r   r   rr   r   r  rN   rR   r  r  r  r	  r
  r0   r   r   r   do_kmeans_clustering  s0   
r  c                 C   s
  ddl m} |   |dur|}||ddd| j}|j}n|ddd|d| j}|j}| j}	g }
t|jd D ]3}|d t	||  }|	| }|
dd	}t	|d }t|d
 }t|d	 }||||g}|
| q:|
jdd d t|
}
t|
}
t|
| dS )a  Performs Agglomerative Hierarchical Clustering on embeddings.

    Arguments
    ---------
    diary_obj : StatObject_SB type
        Contains embeddings in diary_obj.stat1 and segment IDs in diary_obj.segset.
    out_rttm_file : str
        Path of the output RTTM file.
    rec_id : str
        Recording ID for the recording under processing.
    k_oracle : int
        Number of speaker (None, if it has to be estimated).
    p_val : float
        `pval` for pruning affinity matrix. Used only when number of speakers
        are unknown. Note that this is just for experiment. Prefer Spectral clustering
        for better clustering results.
    r   )AgglomerativeClusteringNcosineward)r   r   linkage)r   r   r  distance_thresholdrr   r    r   c                 S   r   r   r   r   r   r   r   r     r   zdo_AHC.<locals>.<lambda>r   )sklearn.clusterr  
norm_stat1fitr   r   r   rL   ra   r%   r   r   r   r  rT   rV   r^   )r  r[   r1   r   r   r  r   
clusteringr   r  rN   rR   r  r  r  r	  r
  r0   r   r   r   do_AHCl  sH   r  )r   TT)r   NNr   )rY   r  )-r   r<   r   r   numpyrd   r   r   scipy.sparse.csgraphr   r   r   scipy.sparse.linalgr   r   r   r   r  r   sklearn.cluster._kmeansr   sklearn.neighborsr	   ImportErrorerr_msgr   r3   rF   rI   rT   rV   r^   rv   ry   r   r   r   r   r   r   r   r   r  r  r  r   r   r   r   <module>   st    !4N%*-*
\
<)  G
P