o
    9wi)                     @   s   d Z ddlmZ ddlmZmZmZmZ ddlZ	ddl
mZmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZmZ ddl m!Z! G dd deeZ"dS )zResegmentation pipeline    )partial)CallableOptionalTextUnionN)
AnnotationSegmentSlidingWindowFeature)GreedyDiarizationErrorRate)Uniform)	InferenceModel)	AudioFile)Pipeline)PipelineModelSpeakerDiarizationMixin	get_model)mae_cost_func	permutate)binarizec                       s   e Zd ZdZ				ddededee deedf f fdd	Z	d
d Z
dd ZdZ		ddedee dee defddZdefddZ  ZS )Resegmentationa  Resegmentation pipeline

    This pipeline relies on a pretrained segmentation model to improve an existing diarization
    hypothesis. Resegmentation is done locally by sliding the segmentation model over the whole
    file. For each position of the sliding window, we find the optimal mapping between the input
    diarization and the output of the segmentation model and permutate the latter accordingly.
    Permutated local segmentations scores are then aggregated over time and postprocessed using
    hysteresis thresholding.

    It can also be used with `diarization` set to "annotation" to find a good estimate of optimal
    values for `onset`, `offset`, `min_duration_on`, and `min_duration_off` for any speaker
    diarization pipeline based on the `segmentation` model.

    Parameters
    ----------
    segmentation : Model, str, or dict, optional
        Pretrained segmentation model. Defaults to "pyannote/segmentation".
        See pyannote.audio.pipelines.utils.get_model for supported format.
    diarization : str, optional
        File key to use as input diarization. Defaults to "diarization".
    der_variant : dict, optional
        Optimize for a variant of diarization error rate.
        Defaults to {"collar": 0.0, "skip_overlap": False}. This is used in `get_metric`
        when instantiating the metric: GreedyDiarizationErrorRate(**der_variant).
    use_auth_token : str, optional
        When loading private huggingface.co models, set `use_auth_token`
        to True or to a string containing your hugginface.co authentication
        token that can be obtained by running `huggingface-cli login`

    Hyper-parameters
    ----------------
    onset, offset : float
        Onset/offset detection thresholds
    min_duration_on : float
        Remove speaker turn shorter than that many seconds.
    min_duration_off : float
        Fill same-speaker gaps shorter than that many seconds.
    pyannote/segmentationdiarizationNsegmentationder_variantuse_auth_tokenc                    s   t    || _|| _t||d}t|| _|j| _t	|j
j| _|p'ddd| _tdd| _tdd| _tdd| _tdd| _tdd| _d S )N)r           F)collarskip_overlapg?g      ?)super__init__r   r   r   r   _segmentationaudio_audiolenspecificationsclasses_num_speakersr   r   warm_uponsetoffsetmin_duration_onmin_duration_off)selfr   r   r   r   model	__class__ d/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/pyannote/audio/pipelines/resegmentation.pyr    U   s   

zResegmentation.__init__c                 C   s    | j dkrddddddS t )Nr   g?gQ?gbX9?g)\(?gJ+?)r(   r)   r*   r+   r,   )r   NotImplementedErrorr-   r1   r1   r2   default_parametersw   s   
z!Resegmentation.default_parametersc                 C   s   t  )N)r3   r4   r1   r1   r2   r&      s   zResegmentation.classeszcache/segmentation/inferencefilehookreturnc                 C   s:  | j ||d}| jr&| j|v r|| j }n| j|t|ddd}||| j< n| j|t|ddd}|d| t|| j| jdd}| j|| jj	j
| j| jfd}|d| |pZ|| j }|jtd| j|| jj | jj	j
d	}|d
| tj|| j| jfd}|d| |jj\}}|| jkrt|jddd|| j ff|_n|| jk rt|jdd| j| ff|_| j}t|jtj}	|	j\}}
}t|D ]\}\}}||tjd|
f }t||td\\|	|< }qt |	|j!}	|d|	 | "|	|}| j#|| j$| j%d}|d |_&d|v r|d r| '|d |}|S )aa  Apply speaker diarization

        Parameters
        ----------
        file : AudioFile
            Processed file.
        diarization : Annotation, optional
            Input diarization. Defaults to file[self.diarization].
        hook : callable, optional
            Callback called after each major steps of the pipeline as follows:
                hook(step_name,      # human-readable name of current step
                     step_artefact,  # artifact generated by current step
                     file=file)      # file being processed
            Time-consuming steps call `hook` multiple times with the same `step_name`
            and additional `completed` and `total` keyword arguments usable to track
            progress of current step.

        Returns
        -------
        diarization : Annotation
            Speaker diarization
        )r7   r   NF)r)   r*   initial_state)r(   speaker_countingr   )support
resolutionz@resegmentation/originalz@resegmentation/trim)r   r   r   )	cost_funcz@resegmentation/permutated)r+   r,   uri
annotation)(
setup_hooktrainingCACHED_SEGMENTATIONr!   r   r   r)   r*   speaker_countr.   receptive_fieldr(   r   
discretizer   r#   get_durationstepr   trimdatashaper'   nppad	full_likeNAN	enumeratecropnewaxisr   r   r	   sliding_windowto_diarizationto_annotationr+   r,   r>   optimal_mapping)r-   r6   r   r7   segmentationsbinarized_segmentationscount_num_speakerspermutated_segmentations
num_framescchunkr   local_diarizationdiscrete_diarizationresegmentationr1   r1   r2   apply   s   









zResegmentation.applyc                 C   s   t di | jS )Nr1   )r
   r   r4   r1   r1   r2   
get_metric	  s   zResegmentation.get_metric)r   r   NN)NN)__name__
__module____qualname____doc__r   r   r   dictr   r    r5   r&   rB   r   r   r   rb   r
   rc   __classcell__r1   r1   r/   r2   r   -   s>    )
"
 r   )#rg   	functoolsr   typingr   r   r   r   numpyrK   pyannote.corer   r   r	   pyannote.metrics.diarizationr
   pyannote.pipeline.parameterr   pyannote.audior   r   pyannote.audio.core.ior   pyannote.audio.core.pipeliner   pyannote.audio.pipelines.utilsr   r   r    pyannote.audio.utils.permutationr   r   pyannote.audio.utils.signalr   r   r1   r1   r1   r2   <module>   s   