o
    pis                      @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZmZmZ d d	lmZ d d
lmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( G dd de)eZ*G dd de)eZ+G dd de)eZ,G dd de)eZ-de+dej.fddZ/defddZ0e1 Z2e23de*j4e+j5dde,j5e-jfde(eej6d d!d"d!d!d!d#f d$e(e)ej6d%d&f d'e(e*ej7d(d"d)f de(e+ej7d*d&f d+e(ee ej7d,d!d"d!d!d-f d.e(ee8 ej7d/d&f d0e(e,ej7d1d&f d2e(e-ej7d3d"d)f fd4dZ9e23d5			dXde(e)ej6d6d&f d7e(ee) ej7d8d&f d9e(ee) ej6d:d&f d;e(ee ej7d<d!d!d"d!d!d#f fd=d5Z:e23d>dddde+j5fde(e)ej6d6d&f d?e(eej6d@d!d!d!d!dAf dBe(ee ej7dCd"d!d!d!d!d#f d7e(ee) ej7d8d&f d9e(ee) ej6d:d&f d;e(ee ej7d<d!d!d"d!d!d#f de(e+ej7d*d&f fdDd>Z;G dEdF dFZ<e23dGe*j=ddde+j5de,j5d"d"d"f
de(e)ej6d6d&f d$e(e)ej6dHd&f dBe(eej6dId!d!d"d!d!d#f d'e(e*ej7dJd"d)f d7e(ee) ej7d8d&f d9e(ee) ej6d:d&f d;e(ee ej7d<d!d!d"d!d!d#f de(e+ej7d*d&f d+e(ee ej7d,d!d"d!d!d-f d0e(e,ej7d1d&f de(e>ej7dKd&f dLe(e>ej7dMd&f dNe(e>ej7dOd&f fdPdGZ?e23dQdRe(eej6dSd!d"d!d!dTf dBe(eej6dUd"d"d!d!d!d#f fdVdQZ@eAdWkre2  dS dS )Y    N)nullcontext)datetime)Enum)partial)Path)Optional)AudioModelPipeline)
Annotation)
BaseMetricDiarizationErrorRateJaccardErrorRate)	Optimizer)track)minimize_scalar)	Annotatedc                   @   s   e Zd ZdZdZdZdS )SubsettraindevelopmenttestN)__name__
__module____qualname__r   r   r    r   r   K/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/audio/__main__.pyr   5   s    r   c                   @   s   e Zd ZdZdZdZdZdS )DevicecpucudampsautoN)r   r   r   CPUCUDAMPSAUTOr   r   r   r   r   ;   s
    r   c                   @   s   e Zd ZdZdZdS )NumSpeakersoracler!   N)r   r   r   ORACLEr%   r   r   r   r   r&   B   s    r&   c                   @   s&   e Zd ZdZdZedefddZdS )Metricr   r   metricc                 C   s    |dkrt  S |dkrt S dS )z(Convert a string to a Metric enum value.r   r   Nr   )clsr*   r   r   r   from_strK   s
   zMetric.from_strN)r   r   r   r   r   classmethodstrr,   r   r   r   r   r)   G   s
    r)   devicereturnc                 C   sB   | t jkrtj rt j} ntjj rt j} nt j	} t
| jS N)r   r%   torchr   is_availabler#   backendsr    r$   r"   r/   value)r/   r   r   r   parse_deviceU   s   

r6   c                 C   s&   t | tr| S t| dr| jS td)Nspeaker_diarizationz1Could not find speaker diarization in prediction.)
isinstancer   hasattrr7   
ValueError)
predictionr   r   r   get_diarizationc   s
   

r<   optimizepipelinez(Path to pipeline YAML configuration fileTF)helpexistsdir_okay	file_okaywritableresolve_pathprotocolzProtocol used for optimization)r?   subsetzSubset used for optimization)r?   case_sensitivez#Accelerator to use (CPU, CUDA, MPS)registryzLoaded registry)r?   r@   rA   rB   readablemax_iterationsz:Number of iterations to run. Defaults to run indefinitely.num_speakersz#Number of speakers (oracle or auto)r*   zMetric to optimize againstc              	      sV  t | d}tj|tjd}	W d   n1 sw   Y  t| }
|
du r4td|  d tjddt	|}|

| |rFtjj| dtj i}|tjkrXd	d
 |d< tjjj||d}tt||j } fdd}t||
|
_| d|j }|tjkr|d7 }| d}| d| d}t|
||dddd}|
 dkrdnd}|j}|}z|
 }W n ty   d}Y nw |j||d}t |D ]\\}}|d }|||  dk r|}||j }kr|d |	d< ||j|t!" # dd|	d< t |d}t$|	| W d   n	1 sw   Y  |}|r(|d |kr( dS qdS )z
    Optimize a PIPELINE
    r)LoaderNzCould not load pipeline from .   codeaudioc                 S      dt | d  iS NrK   
annotationlenlabelsprotocol_filer   r   r   <lambda>      zoptimize.<locals>.<lambda>pipeline_kwargspreprocessorsc                    s
   t  S r1   )r)   r,   )selfr*   r   r   _get_metric   s   
zoptimize.<locals>._get_metric.OracleNumSpeakersz.journalz.yamlF)db
study_namesamplerpruneraverage_caseminimize)
warm_startlossr   params)	best_losslast_updated)rE   rF   statusoptimizationw)%openyamlload
SafeLoaderr
   from_pretrainedprinttyperexitr6   topyannotedatabaserH   load_database
FileFinderr&   r(   get_protocollistgetattrr5   types
MethodType
get_metricwith_suffixr   get_directionrn   default_parametersNotImplementedError	tune_iter	enumerater   now	isoformatdump)r>   rE   rF   r/   rH   rJ   rK   r*   fporiginal_configoptimized_pipelinetorch_devicer_   loaded_protocolfilesrb   re   journalresult	optimizer	directionglobal_best_losslocal_best_lossrk   
iterationsirp   rl   r   ra   r   r=   r   s   :




	

downloadzCPretrained pipeline (e.g. pyannote/speaker-diarization-community-1)revisionzPretrained pipeline revision.tokenzHuggingface token.cachezFPath to the folder where files downloaded from Huggingface are stored.c                 C   s:   t j| |||d}|du rtd|  d tjdddS )zG
    Download a pretrained PIPELINE to disk for later offline use.
    r   r   	cache_dirN(Could not load pretrained pipeline from rN   rO   rP   )r
   rw   rx   ry   rz   )r>   r   r   r   pretrained_pipeliner   r   r   r     s   #applyrR   zPath to audio file or directory)r?   r@   rB   rA   rI   intoz2Path to file or directory where results are saved.c              	      s  t j| |||d}|du rtd|  d tjddt|}|| | rY du s0  s;td tjddt	dd	 |
 D }	 fd
d|	D }
 fdd|	D }n# du sl  sltd tjdd|g}	 g}
 ry dndg}t|	|
|D ]V\}}}||}t|}|rt|dnttj}|| W d   n1 sw   Y  t|dr|r| }t|d}tj||dd W d   n1 sw   Y  qdS )zC
    Apply a pretrained PIPELINE to an AUDIO file or directory
    r   Nr   rN   rO   rP   z9When AUDIO is a directory, INTO must also be a directory.c                 s   s    | ]	}|  r|V  qd S r1   )is_file.0pathr   r   r   	<genexpr>  s    zapply.<locals>.<genexpr>c                       g | ]	} |j d   qS ).rttmstemr   r   r   r   
<listcomp>      zapply.<locals>.<listcomp>c                    r   ).jsonr   r   r   r   r   r     r   z/When AUDIO is a file, INTO must also be a file.r   rr   	serialize   indent)r
   rw   rx   ry   rz   r6   r{   is_direchosortediterdirr   r   zipr<   rs   r   sysstdout
write_rttmr9   r   jsonr   )r>   rR   r   r   r   r   r/   r   r   inputsrttmsjsonscurrent_inputcurrent_rttmcurrent_jsonr;   r7   rL   
serializedjr   r   r   r   C  sD   ;


c                	   @   sL   e Zd ZdZdedefddZ	ddedeeef deed	f fd
dZdS )MinDurationOffOptimizeram  Utility to optimize `min_duration_off`

    Depending on the pipeline used for speaker diarization, short breaks within speaker turns
    (e.g. between each word) might lead to unfair missed detection rates.

    This utility aims at finding the best value for `min_duration_off` parameter that controls
    how short a within-speaker gap must be to be filled.
    collarr0   c                 C   s   |   |D ]}|d j|d|d< ||d |d |d d}q| | j|< t|}|| jk r?|| _|D ]	}|d|d< q5|S )Nr7   )r   temporary_speaker_diarizationrU   	annotateduembest_speaker_diarization)resetsupportreport_reportsabs_best_metricpop)r`   r   r*   r   file_current_metricr   r   r   _compute_metric  s&   


z'MinDurationOffOptimizer._compute_metric        g      ?r*   bounds	DataFramec                 C   s`   t d| _t | _| ||d}tt| j|||dd}|| jkr$d}nt |j}|| j| fS )a  Optimize 'min_duration_off' value for `metric`

        Parameters
        ----------
        files : list[dict]
            List of dictionaries containing 'annotation', 'annotated',
            and 'speaker_diarization' keys.
        metric : BaseMetric
            Metric to optimize against (usually a DiarizationErrorRate instance).
        bounds : tuple[float, float], optional
            Lower and upper bounds for the `min_duration_off` parameter (in seconds).
            Defaults to (0.0, 1.0).

        Returns
        -------
        best_min_duration_off : float
            Optimized min_duration_off parameter.
        best_report: pandas.DataFrame
            Corresponding pyannote.metrics report.
        infr   Bounded)r   method)floatr   dictr   r   r   r   x)r`   r   r*   r   no_collar_metricresbest_min_duration_offr   r   r   __call__  s   


z MinDurationOffOptimizer.__call__N)r   )	r   r   r   __doc__r   r   r   tupler   r   r   r   r   r     s    	

r   	benchmarkzBenchmarked protocolz0Directory into which benchmark results are savedzBenchmarked subsetz6Evaluate both original and post-processed predictions.progresszShow progressper_filez1Save one RTTM/JSON file per processed audio file.c           @   
      s  t j| |||d}|du rtd|  d tjddt|}|| |r-tjj	
| dtj i}|	tjkr?dd	 |d
< tjj	j||d}tt||j }d}tdd |D rjtd| d|j d d}| d|j }|	tjkr{|d7 }t }t }t }|st }t  |r|| }| rt| d|d }|jdd n|| d }| rt| dt|| dD ]}|d }t |||< t }||fi |d
i }t }|| ||< t|dr(|r"|d } | jdd t| | d d}!tj |! |!dd  W d   n	1 sw   Y  n|! ||< t"|}"|r6|| d }t||r=dnd!}#|"#|# W d   n	1 sQw   Y  |sf||d" |"|d#dd$}$t$|"% }%t$|d" % }& &|&t &|%d%  |& |%  d7  < |
r|"|d&< q|r|st|| d d}!tj ||!dd  W d   n	1 sw   Y  t }'t'|( }(t'|( })|(|)d'  |'d(< |)|( |'d)< |(|'d*< |j)d+kr#t*j+,|}*i }+t-|*D ]},|,.d,s
t|*|,}-t/|-t0t1t2t3t4tfr
|-|+|,< q|+|'d-< |+d. 5dd/}.|| d|. d0 }/n|| d0 }/t|/d}0t6 |'|0 W d   n	1 sAw   Y  |rMt t|| d1 d}1|7 8|1 W d   n	1 sjw   Y  t|| d2 d}2|29t2| W d   n	1 sw   Y  t: ; }3t: fd3d ; D }4t<j=|3d |4d ft0d4}5 > D ]\}6}7|7> D ]\}8}9|9|5|6|8f< qqt<'d5d6  > D t<'|5 }:t<'t<?|5t<'|5 };t<j@|| d7 |5d8d9d:|;d;d<|:d=d>d? |
rtA }<|<||\}=}>t|| d@ d}1|>8|1 W d   n	1 s)w   Y  t|| dA d}2|29|>jBddBd	 dC W d   n	1 sPw   Y  t|| dD d}0t6 dE|=i|0 W d   n	1 ssw   Y  |s|| dF }?|? rt|? d|D ]3}|r||d  dF }?t|?|rdnd!}#|dG #|# W d   n	1 sw   Y  qdS dS )Ha~  
    Benchmark a pretrained diarization PIPELINE

    This will run the pipeline on all files in the specified protocol and subset,
    save the results in RTTM format, and compute the Diarization Error Rate (DER)
    for each file. If `--optimize` is used, it will also post-process predictions
    by filling short within speaker gaps and save the results in a separate file.
    r   Nr   rN   rO   rP   rR   c                 S   rS   rT   rV   rY   r   r   r   r[   m  r\   zbenchmark.<locals>.<lambda>r]   r^   Fc                 s   s     | ]}| d ddu V  qdS )rU   N)get)r   r   r   r   r   r   z  s    zbenchmark.<locals>.<genexpr>z0Manual annotation is not available for files in  z& subset so skipping metric evaluation.Trc   z already exists.rttm)parentsr   )disableurir   r   )exist_okr   rr   r   r   arU   r   r   r   r7   i  seconds_per_hourtimes_faster_than_realtimetotal_processing_timer   r   r/   name-z.ymlz.csvz.txtc                 3   s     | ]}t  |  V  qd S r1   )maxkeys)r   true_speakersspeaker_countr   r   r     s
    
)dtypec                 S   s2   g | ]\}}|  D ]\}}t|| | q
qS r   )itemsr   )r   r   pred_countspred_speakerscountr   r   r   r     s    zbenchmark.<locals>.<listcomp>z.SpeakerCount.csv,z%3dzAccuracy = z.1%z / Average error = z.2fz speakers off)	delimiterfmtfooterz.OptimizedMinDurationOff.csvz.OptimizedMinDurationOff.txtc                 S   s
   d | S )Nz{0:.2f})format)fr   r   r   r[   8  s   
 )sparsifyfloat_formatz.OptimizedMinDurationOff.ymlmin_duration_offz.OptimizedMinDurationOff.rttmr   )Cr
   rw   rx   ry   rz   r6   r{   r|   r}   rH   r~   r   r&   r(   r   r   r   r5   anyr   r   r@   FileExistsErrormkdirr   r   get_durationtimer   r9   rs   r   r   r   r<   r   rW   rX   
setdefaultsumvaluestyper2   r   get_device_propertiesdir
startswithr8   intr   r.   boolr   replacert   r   to_csvwriter   r   npzerosr  diagsavetxtr   	to_string)@r>   rE   r   rF   r   r   r   r/   rH   rK   r=   r   r   r   r   r_   r   r   skip_metricbenchmark_nameprocessing_timeplaying_timeserialized_predictionsr*   benchmark_dirrttm_dir	rttm_filer   r   ticr;   tacjson_dirr
  r7   r   r   pred_num_speakerstrue_num_speakers
processingr   total_playing_timeprops
props_dictattrr5   device_name	speed_ymlymlcsvtxtmax_true_speakersmax_pred_speakersspeaker_count_matrixr   r  r  r  speaker_count_errorspeaker_count_accuracyminDurationOffOptimizerr   best_reportoptimized_rttm_filer   r   r   r     sX  ]




		
strip
checkpointz'Path to pyannote.audio model checkpoint)r?   r@   rA   rB   rD   zPath to the stripped checkpointc              
      s   g d t j| t ddd} fdd| D }t || zt|}W dS  tyC } zt	d|  W Y d}~dS d}~ww )	zT
    Strip a pretrained CHECKPOINT to only keep the parts needed for inference.
    )zpytorch-lightning_versionhparams_namehyper_parameters
state_dictzpyannote.audior   F)map_locationweights_onlyc                    s   i | ]\}}| v r||qS r   r   )r   keyr5   r   r   r   
<dictcomp>z  s    zstrip.<locals>.<dictcomp>zQSomething went wrong while stripping the checkpoint as it could not be reloaded: N)
r2   ru   r/   r  saver	   rw   	Exceptionr   rz   )rD  r   old_checkpointnew_checkpointr   er   rK  r   rC  S  s    
__main__)NNN)Br   r   r  r   
contextlibr   r   enumr   	functoolsr   pathlibr   typingr   numpyr  pyannote.databaser|   r2   ry   rt   pyannote.audior   r	   r
   pyannote.corer   pyannote.metrics.baser   pyannote.metrics.diarizationr   r   pyannote.pipeline.optimizerr   rich.progressr   scipy.optimizer   typing_extensionsr   r.   r   r   r&   r)   r/   r6   r<   Typerappcommandr   r%   ArgumentOptionr  r=   r   r   r   r   r  r   rC  r   r   r   r   r   <module>   s  

$
(+ &
*"
&1eS

#
'25?BHN  X
4
