o
    9wi*                     @   s   d Z ddlmZ ddlmZmZmZmZ ddlZ	ddl
mZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ dedefddZG dd deZ G dd deZ!dS )z%Overlapped speech detection pipelines    )partial)CallableOptionalTextUnionN)
AnnotationSlidingWindowFeatureTimeline)get_annotated) DetectionPrecisionRecallFMeasure)Uniform)	Inference)	AudioFile)Pipeline)PipelineModel	get_model)Binarize
annotationreturnc                 C   sj   t | jd}| | D ] \\}}\}}| ||f }| ||f }||kr$q|||@  q| jdddS )zGet overlapped speech regions

    Parameters
    ----------
    annotation : Annotation
        Speaker annotation.

    Returns
    -------
    overlap : Annotation
        Overlapped speech annotation.
    )uristringoverlap)	generatormodality)r	   r   co_iteraddsupportto_annotation)r   r   s1t1s2t2l1l2 r$   q/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/pyannote/audio/pipelines/overlapped_speech_detection.py
to_overlap)   s   r&   c                   @   s"   e Zd ZdZdedefddZdS )OracleOverlappedSpeechDetectionz+Oracle overlapped speech detection pipelinefiler   c                 C   s   t |d S )a
  Return groundtruth overlapped speech detection

        Parameter
        ---------
        file : AudioFile
            Must provide a "annotation" key.

        Returns
        -------
        hypothesis : Annotation
            Overlapped speech regions.
        r   )r&   )selfr(   r$   r$   r%   applyD   s   z%OracleOverlappedSpeechDetection.applyN)__name__
__module____qualname____doc__r   r   r*   r$   r$   r$   r%   r'   A   s    r'   c                       s   e Zd ZdZ				ddedee dee deedf f fdd	Z	d
d Z
dd Zdd ZdZddedee defddZdefddZdededefddZdd Z  ZS )OverlappedSpeechDetectiona  Overlapped speech detection pipeline

    Parameters
    ----------
    segmentation : Model, str, or dict, optional
        Pretrained segmentation (or overlapped speech detection) model.
        Defaults to "pyannote/segmentation".
        See pyannote.audio.pipelines.utils.get_model for supported format.
    precision : float, optional
        Optimize recall at target precision.
        Defaults to optimize precision/recall fscore.
    recall : float, optional
        Optimize precision at target recall
        Defaults to optimize precision/recall fscore
    use_auth_token : str, optional
        When loading private huggingface.co models, set `use_auth_token`
        to True or to a string containing your hugginface.co authentication
        token that can be obtained by running `huggingface-cli login`
    inference_kwargs : dict, optional
        Keywords arguments passed to Inference.

    Hyper-parameters
    ----------------
    onset, offset : float
        Onset/offset detection thresholds
    min_duration_on : float
        Remove speech regions shorter than that many seconds.
    min_duration_off : float
        Fill non-speech regions shorter than that many seconds.
    pyannote/segmentationNsegmentation	precisionrecalluse_auth_tokenc                    s   t    || _t||d}|jdkrdd |d< t|fi || _|jjr-d | _	| _
ntdd| _	tdd| _
tdd| _tdd| _|d urQ|d urQtd	|| _|| _d S )
N)r4      c                 S   s&   t j| dddd d d d dt jf S )N)axis)np	partitionnewaxis)scoresr$   r$   r%   <lambda>   s
    z4OverlappedSpeechDetection.__init__.<locals>.<lambda>pre_aggregation_hookg      ?        g      ?zIOne must choose between optimizing for target precision or target recall.)super__init__r1   r   	dimensionr   _segmentationspecificationspowersetonsetoffsetr   min_duration_onmin_duration_off
ValueErrorr2   r3   )r)   r1   r2   r3   r4   inference_kwargsmodel	__class__r$   r%   rA   t   s$   


z"OverlappedSpeechDetection.__init__c                 C   s2   | j dkrdddddS | j dkrddd	S t )
Nr0   gQ?g{Gz?gK?g;On?rF   rG   rH   rI   zpyannote/segmentation-3.0.0r?   )rH   rI   )r1   NotImplementedErrorr)   r$   r$   r%   default_parameters   s   

z,OverlappedSpeechDetection.default_parametersc                 C   s   dgS )NOVERLAPr$   rQ   r$   r$   r%   classes   s   z!OverlappedSpeechDetection.classesc                 C   s   t | j| j| j| jd| _dS )z2Initialize pipeline with current set of parametersrO   N)r   rF   rG   rH   rI   	_binarizerQ   r$   r$   r%   
initialize   s   z$OverlappedSpeechDetection.initializezcache/segmentation/inferencer(   hookr   c                 C   s   | j ||d}| jr&| j|v r|| j }n| j|t|ddd}||| j< n| j|t|ddd}|d| | |}|d |_|dd | D S )a  Apply overlapped speech detection

        Parameters
        ----------
        file : AudioFile
            Processed file.
        hook : callable, optional
            Callback called after each major steps of the pipeline as follows:
                hook(step_name,      # human-readable name of current step
                     step_artefact,  # artifact generated by current step
                     file=file)      # file being processed
            Time-consuming steps call `hook` multiple times with the same `step_name`
            and additional `completed` and `total` keyword arguments usable to track
            progress of current step.

        Returns
        -------
        overlapped_speech : Annotation
            Overlapped speech regions.
        )rW   r1   Nr   c                 S   s   i | ]}|d qS )rS   r$   ).0labelr$   r$   r%   
<dictcomp>   s    z3OverlappedSpeechDetection.apply.<locals>.<dictcomp>)	
setup_hooktrainingCACHED_SEGMENTATIONrC   r   rU   r   rename_labelslabels)r)   r(   rW   segmentationsoverlapped_speechr$   r$   r%   r*      s"   



zOverlappedSpeechDetection.applyc                 K   s2   | j dus
| jdurtdG dd dt}| S )zGet overlapped speech detection metric

        Returns
        -------
        metric : DetectionPrecisionRecallFMeasure
            Detection metric.
        Nz4pyannote.pipeline should use `loss` method fallback.c                	       s6   e Zd Z	ddededee def fddZ  ZS )	z5OverlappedSpeechDetection.get_metric.<locals>._MetricN	reference
hypothesisuemr   c                    s   t  jt||fd|i|S )Nrd   )r@   compute_componentsr&   )_selfrb   rc   rd   kwargsrM   r$   r%   re      s   zHOverlappedSpeechDetection.get_metric.<locals>._Metric.compute_componentsN)	r+   r,   r-   r   r   r	   dictre   __classcell__r$   r$   rM   r%   _Metric   s    rk   )r2   r3   rP   r   )r)   rg   rk   r$   r$   r%   
get_metric   s   	z$OverlappedSpeechDetection.get_metricrc   c           	      C   s   t  }d|v r|d }n|d }t|}||d< |||t|d}| \}}}| jdur9|| jk r7|| j S |S | jdurJ|| jk rH|| j S |S dS )a  Compute recall at target precision (or vice versa)

        Parameters
        ----------
        file : AudioFile
            Processed file.
        hypothesis : Annotation
            Hypothesized overlapped speech regions.

        Returns
        -------
        recall (or purity) : float
            When optimizing for target precision:
                If precision < target_precision, returns (precision - target_precision).
                If precision > target_precision, returns recall.
            When optimizing for target recall:
                If recall < target_recall, returns (recall - target_recall).
                If recall > target_recall, returns precision.
        overlap_referencer   )rd   N)r   r&   r
   compute_metricsr2   r3   )	r)   r(   rc   fmeasurerm   rb   _r2   r3   r$   r$   r%   loss  s"   






zOverlappedSpeechDetection.lossc                 C   s   dS )Nmaximizer$   rQ   r$   r$   r%   get_direction9  s   z'OverlappedSpeechDetection.get_direction)r0   NNNrh   )r+   r,   r-   r.   r   r   floatr   r   rA   rR   rT   rV   r]   r   r   r   r*   r   rl   rq   rs   rj   r$   r$   rM   r%   r/   T   s.    !
*
0.r/   )"r.   	functoolsr   typingr   r   r   r   numpyr9   pyannote.corer   r   r	   pyannote.databaser
   pyannote.metrics.detectionr   pyannote.pipeline.parameterr   pyannote.audior   pyannote.audio.core.ior   pyannote.audio.core.pipeliner   pyannote.audio.pipelines.utilsr   r   pyannote.audio.utils.signalr   r&   r'   r/   r$   r$   r$   r%   <module>   s    