o
    piy                     @   s   d Z ddlmZ ddlmZ ddlmZmZmZm	Z	 ddl
ZddlmZmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ G dd deZG dd deZ dS )z"Voice activity detection pipelines    )partial)Path)CallableOptionalTextUnionN)
AnnotationSlidingWindowFeature)DetectionErrorRate DetectionPrecisionRecallFMeasure)Uniform)	Inference)	AudioFile)Pipeline)PipelineModel	get_model)Binarizec                   @   s&   e Zd ZdZededefddZdS )OracleVoiceActivityDetectionz(Oracle voice activity detection pipelinefilereturnc                 C   s   | d    }|jdddS )a  Return groundtruth voice activity detection

        Parameter
        ---------
        file : AudioFile
            Must provide a "annotation" key.

        Returns
        -------
        hypothesis : `pyannote.core.Annotation`
            Speech regions
        
annotationstringspeech)	generatormodality)get_timelinesupportto_annotation)r   r    r   e/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/audio/pipelines/voice_activity_detection.pyapply/   s   z"OracleVoiceActivityDetection.applyN)__name__
__module____qualname____doc__staticmethodr   r   r    r   r   r   r   r   ,   s    r   c                       s   e Zd ZdZ				ddededeedf deeedf f fd	d
Z	dd Z
dd Zdd ZdZddedee defddZdeeef fddZdd Z  ZS )VoiceActivityDetectiona  Voice activity detection pipeline

    Parameters
    ----------
    segmentation : Model, str, or dict, optional
        Pretrained segmentation (or voice activity detection) model.
        Defaults to "pyannote/segmentation".
        See pyannote.audio.pipelines.utils.get_model for supported format.
    fscore : bool, optional
        Optimize (precision/recall) fscore. Defaults to optimizing detection
        error rate.
    token : str or bool, optional
        Huggingface token to be used for downloading from Huggingface hub.
    cache_dir: Path or str, optional
        Path to the folder where files downloaded from Huggingface hub are stored.
    inference_kwargs : dict, optional
        Keywords arguments passed to Inference.

    Hyper-parameters
    ----------------
    onset, offset : float
        Onset/offset detection thresholds
    min_duration_on : float
        Remove speech regions shorter than that many seconds.
    min_duration_off : float
        Fill non-speech regions shorter than that many seconds.
    pyannote/segmentationFNsegmentationfscoretoken	cache_dirc                    s   t    || _|| _t|||d}dd |d< t|fi || _|jjr,d | _	| _
ntdd| _	tdd| _
tdd| _tdd| _d S )N)r*   r+   c                 S   s   t j| dddS )NT)axiskeepdims)npmax)scoresr   r   r   <lambda>o   s    z1VoiceActivityDetection.__init__.<locals>.<lambda>pre_aggregation_hookg      ?        g      ?)super__init__r(   r)   r   r   _segmentationspecificationspowersetonsetoffsetr   min_duration_onmin_duration_off)selfr(   r)   r*   r+   inference_kwargsmodel	__class__r   r   r6   _   s   
zVoiceActivityDetection.__init__c                 C   s2   | j dkrdddddS | j dkrddd	S t )
Nr'   g%C?gT㥛 ?g rh?gx&?r:   r;   r<   r=   zpyannote/segmentation-3.0.0r4   )r<   r=   )r(   NotImplementedErrorr>   r   r   r   default_parameters   s   

z)VoiceActivityDetection.default_parametersc                 C   s   dgS )NSPEECHr   rE   r   r   r   classes   s   zVoiceActivityDetection.classesc                 C   s   t | j| j| j| jd| _dS )z2Initialize pipeline with current set of parametersrC   N)r   r:   r;   r<   r=   	_binarizerE   r   r   r   
initialize   s   z!VoiceActivityDetection.initializezcache/segmentation/inferencer   hookr   c                 C   s   | j ||d}| jr&| j|v r|| j }n| j|t|ddd}||| j< n| j|t|ddd}|d| | |}|d |_|dd | D S )a  Apply voice activity detection

        Parameters
        ----------
        file : AudioFile
            Processed file.
        hook : callable, optional
            Callback called after each major steps of the pipeline as follows:
                hook(step_name,      # human-readable name of current step
                     step_artefact,  # artifact generated by current step
                     file=file)      # file being processed
            Time-consuming steps call `hook` multiple times with the same `step_name`
            and additional `completed` and `total` keyword arguments usable to track
            progress of current step.

        Returns
        -------
        speech : Annotation
            Speech regions.
        )rK   r(   Nuric                 S   s   i | ]}|d qS )rG   r   ).0labelr   r   r   
<dictcomp>   s    z0VoiceActivityDetection.apply.<locals>.<dictcomp>)	
setup_hooktrainingCACHED_SEGMENTATIONr7   r   rI   rL   rename_labelslabels)r>   r   rK   segmentationsr   r   r   r   r       s   



zVoiceActivityDetection.applyc                 C   s   | j r	tdddS tdddS )z'Return new instance of detection metricr4   F)collarskip_overlap)r)   r   r
   rE   r   r   r   
get_metric   s   z!VoiceActivityDetection.get_metricc                 C   s   | j rdS dS )Nmaximizeminimize)r)   rE   r   r   r   get_direction   s   z$VoiceActivityDetection.get_direction)r'   FNN)N)r!   r"   r#   r$   r   boolr   r   r   r6   rF   rH   rJ   rR   r   r   r   r   r    r
   r   rX   r[   __classcell__r   r   rA   r   r&   B   s,    
!
.r&   )!r$   	functoolsr   pathlibr   typingr   r   r   r   numpyr/   pyannote.corer   r	   pyannote.metrics.detectionr
   r   pyannote.pipeline.parameterr   pyannote.audior   pyannote.audio.core.ior   pyannote.audio.core.pipeliner   pyannote.audio.pipelines.utilsr   r   pyannote.audio.utils.signalr   r   r&   r   r   r   r   <module>   s   