o
    pi=$                     @   s   d dl mZmZmZmZmZ d dlZd dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ 			ddee d	ee d
ee fddZG dd dZdS )    )DictMappingOptionalTupleUnionN)
AnnotationSlidingWindowSlidingWindowFeature)Label)DiarizationErrorRate)	InferenceBinarizenum_speakersmin_speakersmax_speakersc                 C   sR   | p|pd}| p|pt j}||krtd|dd|dd||kr$|} | ||fS )av  Validate number of speakers

    Parameters
    ----------
    num_speakers : int, optional
        Number of speakers.
    min_speakers : int, optional
        Minimum number of speakers.
    max_speakers : int, optional
        Maximum number of speakers.

    Returns
    -------
    num_speakers : int or None
    min_speakers : int
    max_speakers : int or np.inf
       zQmin_speakers must be smaller than (or equal to) max_speakers (here: min_speakers=gz and max_speakers=z).)npinf
ValueErrorr   r   r    r   ^/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/audio/pipelines/utils/diarization.pyset_num_speakers"   s   
r   c                   @   s   e Zd ZdZe			d!dee dee dee fddZe	d"d	ee	e
f d
e
dedee
ee
eeef f f fddZe	d#dededeeef defddZe		d$dededede
fddZedededefddZdd  ZdS )%SpeakerDiarizationMixinzBDefines a bunch of methods common to speaker diarization pipelinesNr   r   r   c                 C   s   t | ||dS )a  Validate number of speakers

        Parameters
        ----------
        num_speakers : int, optional
            Number of speakers.
        min_speakers : int, optional
            Minimum number of speakers.
        max_speakers : int, optional
            Maximum number of speakers.

        Returns
        -------
        num_speakers : int or None
        min_speakers : int
        max_speakers : int or np.inf
        r   )r   r   r   r   r   r   K   s
   z(SpeakerDiarizationMixin.set_num_speakersF	reference
hypothesisreturn_mappingreturnc                 C   sZ   t | tr| d } d| v r| d nd}nd}t j| ||d}|j|d}|r+||fS |S )a  Find the optimal bijective mapping between reference and hypothesis labels

        Parameters
        ----------
        reference : Annotation or Mapping
            Reference annotation. Can be an Annotation instance or
            a mapping with an "annotation" key.
        hypothesis : Annotation
            Hypothesized annotation.
        return_mapping : bool, optional
            Return the label mapping itself along with the mapped annotation. Defaults to False.

        Returns
        -------
        mapped : Annotation
            Hypothesis mapped to reference speakers.
        mapping : dict, optional
            Mapping between hypothesis (key) and reference (value) labels
            Only returned if `return_mapping` is True.
        
annotation	annotatedN)uem)mapping)
isinstancer   r   optimal_mappingrename_labels)r   r   r   r!   r#   mapped_hypothesisr   r   r   r%   h   s   
z'SpeakerDiarizationMixin.optimal_mapping皙?r)   binarized_segmentationsframeswarm_upc                 C   sH   t j| |d}t jtj|ddd|dddd}t|jtj|_|S )a  Estimate frame-level number of instantaneous speakers

        Parameters
        ----------
        binarized_segmentations : SlidingWindowFeature
            (num_chunks, num_frames, num_classes)-shaped binarized scores.
        warm_up : (float, float) tuple, optional
            Left/right warm up ratio of chunk duration.
            Defaults to (0.1, 0.1), i.e. 10% on both sides.
        frames : SlidingWindow
            Frames resolution. Defaults to estimate it automatically based on
            `segmentations` shape and chunk size. Providing the exact frame
            resolution (when known) leads to better temporal precision.

        Returns
        -------
        count : SlidingWindowFeature
            (num_frames, 1)-shaped instantaneous speaker count
        )r,   T)axiskeepdimsF        hammingmissingskip_average)	r   trim	aggregater   sumrintdataastypeuint8)r*   r+   r,   trimmedcountr   r   r   speaker_count   s   z%SpeakerDiarizationMixin.speaker_countr0   discrete_diarizationmin_duration_onmin_duration_offc                 C   s   t dd||d}|| S )a  

        Parameters
        ----------
        discrete_diarization : SlidingWindowFeature
            (num_frames, num_speakers)-shaped discrete diarization
        min_duration_on : float, optional
            Defaults to 0.
        min_duration_off : float, optional
            Defaults to 0.

        Returns
        -------
        continuous_diarization : Annotation
            Continuous diarization, with speaker labels as integers,
            corresponding to the speaker indices in the discrete diarization.
        g      ?)onsetoffsetr@   rA   r   )r?   r@   rA   binarizer   r   r   to_annotation   s   z%SpeakerDiarizationMixin.to_annotationsegmentationsr=   c                 C   s   t j| |jdddd}|jj\}}t|j}||k r)t|jdd|| ff|_|j|j@ }|j	|dd}|j	|dd}tj
| dd	}t|j}tt||D ]\}	\\}}
}t|
 D ]
}d
||	|| f< q`qRt||jS )a  Build diarization out of preprocessed segmentation and precomputed speaker count

        Parameters
        ----------
        segmentations : SlidingWindowFeature
            (num_chunks, num_frames, num_speakers)-shaped segmentations
        count : SlidingWindow_feature
            (num_frames, 1)-shaped speaker count

        Returns
        -------
        discrete_diarization : SlidingWindowFeature
            Discrete (0s and 1s) diarization.
        Fr0   Tr1   )r   r   r   )return_datar-   )r.   g      ?)r   r6   sliding_windowr9   shaper   maxpadextentcropargsort
zeros_like	enumerateziprangeitemr	   )rF   r=   activations_r   max_speakers_per_framerL   sorted_speakersbinarytcspeakersir   r   r   to_diarization   s.   	z&SpeakerDiarizationMixin.to_diarizationc                 c   s     d}	 d|dV  |d7 }q)Nr   TSPEAKER_02dr   r   )selfspeakerr   r   r   classes  s   zSpeakerDiarizationMixin.classesNNN)F)r(   )r0   r0   )__name__
__module____qualname____doc__staticmethodr   intr   r   r   r   boolr   r   r
   r%   r	   r   floatr>   rE   r]   rb   r   r   r   r   r   H   sp    
,
% 1r   rc   )typingr   r   r   r   r   numpyr   pyannote.corer   r   r	   pyannote.core.utils.typesr
   pyannote.metrics.diarizationr   pyannote.audio.core.inferencer   pyannote.audio.utils.signalr   ri   r   r   r   r   r   r   <module>   s$   
&