o
    pi!                     @   sd   d dl Zd dlmZ d dlmZmZ 												ddd	ZdddZdd Z	dd Z
dS )    N)eigh)	logsumexpsoftmax      ?
   -C6?Fc                 C   sZ  	 | j d }t|tu rt|| }|du r/tjj|| j d t|fd}||jddd }|j d t|krB|j d | j d ksDJ dtj| d ddd	|t	dtj
    }t|}| | }g }d}t|D ]}|dksy|du sy|du rd
d|| |jddd	j |   }|| | |j| }|||jd||d  |  |  }d}t	|| }t|| dd}tj|dd}t|| |dddf  }tj|dd}||  }||d tt	|| |d  d   }||g |dkr||d d  |k r||d d  dk rtd  nqk|||f|r*||f S d S )a  
    Inputs:
    X           - T x D array, where columns are D dimensional feature vectors
                  (e.g. x-vectors) for T frames
    Phi         - D array with across-class covariance matrix diagonal.
                  The model assumes zero mean, diagonal across-class and
                  identity within-class covariance matrix.
    Fa          - Scale sufficient statistics
    Fb          - Speaker regularization coefficient Fb controls the final number of speakers
    pi          - If integer value, it sets the maximum number of speakers
                  that can be found in the utterance.
                  If vector, it is the initialization for speaker priors (see Outputs: pi)
    gamma       - An initialization for the matrix of responsibilities (see Outputs: gamma)
    maxIters    - The maximum number of VB iterations
    epsilon     - Stop iterating, if the obj. fun. improvement is less than epsilon
    alphaQInit  - Dirichlet concentraion parameter for initializing gamma
    ref         - T dim. integer vector with per frame reference speaker IDs (0:maxSpeakers)
    plot        - If set to True, plot per-frame marginal speaker posteriors 'gamma'
    return_model- Return also speaker model parameter
    alpha, invL - If provided, these are speaker model parameters used in the first iteration

    Outputs:
    gamma       - S x T matrix of responsibilities (marginal posteriors)
                  attributing each frame to one of S possible speakers
                  (S is defined by input parameter pi)
    pi          - S dimensional column vector of ML learned speaker priors.
                  This allows us to estimate the number of speaker in the
                  utterance as the probabilities of the redundant speaker
                  converge to zero.
    Li          - Values of auxiliary function (and DER and frame cross-entropy
                  between gamma and reference, if 'ref' is provided) over iterations.
    alpha, invL - Speaker model parameters returned only if return_model=True

    Reference:
      Landini F., Profant J., Diez M., Burget L.: Bayesian HMM clustering of
      x-vector sequences (VBx) in speaker diarization: theory, implementation
      and analysis on standard tasks
       Nr   )sizeT)keepdimsg         )axisr
   r   g      ?g:0yE>r   z3WARNING: Value of auxiliary function has decreased! )shapetypeintnponesrandomgammalensumlogpisqrtrangeTdotr   expappendprint)XPhiFaFbr   r   maxItersepsilon
alphaQInitrefplotreturn_modelalphainvLDGVrhoLiELBOiilog_p_epslpilog_p_xlog_pX_r   r   L/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/audio/utils/vbx.pyVBx   sR   6
*&
$, r<            @c              
   C   s   t t| |  d f}d|tt| | tf< |dk r |nt|| dd}t|||||j	d ||dd\}}	}
}
}
||	fS )zahc_init (T x N_clusters)r   r   r   r   T)r%   r&   r   r   r'   r,   )
r   zerosr   maxr   astyper   r   r<   r   )ahc_initfear$   r%   r&   r'   init_smoothingqinitr   r   _r   r   r;   cluster_vbx   s   
rG   c                 C   sb   t | jdkr| tj|  S t | jdkr(| tjj| dddddtjf  S tdt | j )zL2 normalization of vector array.

    Args:
        vec_or_matrix (np.array): one vector or array of vectors

    Returns:
        np.array: normalized vector or array of normalized vectors
    r   r   )r   ordNz8Wrong number of dimensions, 1 or 2 is supported, not %i.)r   r   r   linalgnormnewaxis
ValueError)vec_or_matrixr   r   r;   l2_norm   s   	"rN   c                    s   t | }|d |d |d  t |}|d |d |d }t jj}t jj| }t||\}}|ddd }|jddd  fd	d
}	 jd ffdd
	}
|	|
|fS )a'  
    Loads the transformation pipeline for x-vectors into the PLDA space for VBx.

    Args:
        transform_npz (str | Path): Path to the x-vector transformation file.
        plda_npz (str | Path): Path to the PLDA file.

    Returns:
        xvec_tf (function): Transformation function to preprocess x-vectors (centering, whitening, LDA).
        plda_tf (function): Transformation function to map x-vectors into the PLDA latent space.
        plda_psi (np.ndarray): Eigenvalues of the between-class covariance in the PLDA space (reordered).
    mean1mean2ldamutrpsiNr   c                    s@   t  jd t jt  jd t|  j j  S )Nr   r   )r   r   r   rN   r   r   )x)rQ   rO   rP   r   r;   <lambda>   s    *zvbx_setup.<locals>.<lambda>r   c                    s    |    jd d d |f S )N)r   r   )x0lda_dim)plda_muplda_trr   r;   rV      s    )r   loadrI   invr   r   r   r   )transform_npzplda_npzrU   pplda_psiWBacvarwccnxvec_tfplda_tfr   )rQ   rO   rP   rY   rZ   r;   	vbx_setup   s   


rg   )r   r   r   Nr   r   r   NFFNN)r=   r>   )numpyr   scipy.linalgr   scipy.specialr   r   r<   rG   rN   rg   r   r   r   r;   <module>   s&   

q