o
    9wi!                     @   sz   d dl mZmZmZmZmZ d dlZd dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ G dd	 d	ZdS )
    )DictMappingOptionalTupleUnionN)
AnnotationSlidingWindowSlidingWindowFeature)Label)DiarizationErrorRate)	Inference)Binarizec                   @   s   e Zd ZdZe			d!dee dee dee fddZe	d"d	ee	e
f d
e
dedee
ee
eeef f f fddZe	d#dededeeef defddZe		d$dededede
fddZedededefddZdd  ZdS )%SpeakerDiarizationMixinzBDefines a bunch of methods common to speaker diarization pipelinesNnum_speakersmin_speakersmax_speakersc                 C   sR   | p|pd}| p|pt j}||krtd|dd|dd||kr$|} | ||fS )a  Validate number of speakers

        Parameters
        ----------
        num_speakers : int, optional
            Number of speakers.
        min_speakers : int, optional
            Minimum number of speakers.
        max_speakers : int, optional
            Maximum number of speakers.

        Returns
        -------
        num_speakers : int or None
        min_speakers : int
        max_speakers : int or np.inf
           zQmin_speakers must be smaller than (or equal to) max_speakers (here: min_speakers=gz and max_speakers=z).)npinf
ValueError)r   r   r    r   g/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/pyannote/audio/pipelines/utils/diarization.pyset_num_speakers&   s   
z(SpeakerDiarizationMixin.set_num_speakersF	reference
hypothesisreturn_mappingreturnc                 C   sZ   t | tr| d } d| v r| d nd}nd}t j| ||d}|j|d}|r+||fS |S )a  Find the optimal bijective mapping between reference and hypothesis labels

        Parameters
        ----------
        reference : Annotation or Mapping
            Reference annotation. Can be an Annotation instance or
            a mapping with an "annotation" key.
        hypothesis : Annotation
            Hypothesized annotation.
        return_mapping : bool, optional
            Return the label mapping itself along with the mapped annotation. Defaults to False.

        Returns
        -------
        mapped : Annotation
            Hypothesis mapped to reference speakers.
        mapping : dict, optional
            Mapping between hypothesis (key) and reference (value) labels
            Only returned if `return_mapping` is True.
        
annotation	annotatedN)uem)mapping)
isinstancer   r   optimal_mappingrename_labels)r   r   r   r   r!   mapped_hypothesisr   r   r   r#   L   s   
z'SpeakerDiarizationMixin.optimal_mapping皙?r'   binarized_segmentationsframeswarm_upc                 C   sH   t j| |d}t jtj|ddd|dddd}t|jtj|_|S )a  Estimate frame-level number of instantaneous speakers

        Parameters
        ----------
        binarized_segmentations : SlidingWindowFeature
            (num_chunks, num_frames, num_classes)-shaped binarized scores.
        warm_up : (float, float) tuple, optional
            Left/right warm up ratio of chunk duration.
            Defaults to (0.1, 0.1), i.e. 10% on both sides.
        frames : SlidingWindow
            Frames resolution. Defaults to estimate it automatically based on
            `segmentations` shape and chunk size. Providing the exact frame
            resolution (when known) leads to better temporal precision.

        Returns
        -------
        count : SlidingWindowFeature
            (num_frames, 1)-shaped instantaneous speaker count
        )r*   T)axiskeepdimsF        hammingmissingskip_average)	r   trim	aggregater   sumrintdataastypeuint8)r(   r)   r*   trimmedcountr   r   r   speaker_county   s   z%SpeakerDiarizationMixin.speaker_countr.   discrete_diarizationmin_duration_onmin_duration_offc                 C   s    t dd||d}|| jddS )a  

        Parameters
        ----------
        discrete_diarization : SlidingWindowFeature
            (num_frames, num_speakers)-shaped discrete diarization
        min_duration_on : float, optional
            Defaults to 0.
        min_duration_off : float, optional
            Defaults to 0.

        Returns
        -------
        continuous_diarization : Annotation
            Continuous diarization, with speaker labels as integers,
            corresponding to the speaker indices in the discrete diarization.
        g      ?)onsetoffsetr>   r?   string)	generator)r   rename_tracks)r=   r>   r?   binarizer   r   r   to_annotation   s   z%SpeakerDiarizationMixin.to_annotationsegmentationsr;   c                 C   s   t j| |jdddd}|jj\}}t|j}||k r)t|jdd|| ff|_|j|j@ }|j	|dd}|j	|dd}tj
| dd	}t|j}tt||D ]\}	\\}}
}t|
 D ]
}d
||	|| f< q`qRt||jS )a  Build diarization out of preprocessed segmentation and precomputed speaker count

        Parameters
        ----------
        segmentations : SlidingWindowFeature
            (num_chunks, num_frames, num_speakers)-shaped segmentations
        count : SlidingWindow_feature
            (num_frames, 1)-shaped speaker count

        Returns
        -------
        discrete_diarization : SlidingWindowFeature
            Discrete (0s and 1s) diarization.
        Fr.   Tr/   )r   r   r   )return_datar+   )r,   g      ?)r   r4   sliding_windowr7   shaper   maxpadextentcropargsort
zeros_like	enumerateziprangeitemr	   )rG   r;   activations_r   max_speakers_per_framerM   sorted_speakersbinarytcspeakersir   r   r   to_diarization   s.   	z&SpeakerDiarizationMixin.to_diarizationc                 c   s     d}	 d|dV  |d7 }q)Nr   TSPEAKER_02dr   r   )selfspeakerr   r   r   classes   s   zSpeakerDiarizationMixin.classes)NNN)F)r&   )r.   r.   )__name__
__module____qualname____doc__staticmethodr   intr   r   r   r   boolr   r   r
   r#   r	   r   floatr<   rF   r^   rc   r   r   r   r   r   #   sp    %
,
% 1r   )typingr   r   r   r   r   numpyr   pyannote.corer   r   r	   pyannote.core.utils.typesr
   pyannote.metrics.diarizationr   pyannote.audio.core.inferencer   pyannote.audio.utils.signalr   r   r   r   r   r   <module>   s   