o
    pi^                     @   s  d Z ddlZddlZddlZddlZddlZddlZddl	m	Z	 ddl
mZ ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl!m#Z# ddl!m$Z$ ddl!m%Z% ddl&m'Z' ej(Z)dd Z(e(e_(de*defddZ+d d! Z,d8d"d#Z-d$d% Z.d&d' Z/d9d*d+Z0d:d-d.Z1	)d;d/d0Z2d9d1d2Z3d<d3d4Z4d5d6 Z5e6d7kre5  dS dS )=aq  
Evaluation

Usage:
  pyannote-metrics detection [--subset=<subset> --collar=<seconds> --skip-overlap] <database.task.protocol> <hypothesis.rttm>
  pyannote-metrics segmentation [--subset=<subset> --tolerance=<seconds>] <database.task.protocol> <hypothesis.rttm>
  pyannote-metrics overlap [--subset=<subset> --collar=<seconds>] <database.task.protocol> <hypothesis.rttm>
  pyannote-metrics diarization [--subset=<subset> --greedy --collar=<seconds> --skip-overlap] <database.task.protocol> <hypothesis.rttm>
  pyannote-metrics identification [--subset=<subset> --collar=<seconds> --skip-overlap] <database.task.protocol> <hypothesis.rttm>
  pyannote-metrics spotting [--subset=<subset> --latency=<seconds>... --filter=<expression>...] <database.task.protocol> <hypothesis.json>
  pyannote-metrics -h | --help
  pyannote-metrics --version

Options:
  <database.task.protocol>   Set evaluation protocol (e.g. "Etape.SpeakerDiarization.TV")
  --subset=<subset>          Evaluated subset (train|developement|test) [default: test]
  --collar=<seconds>         Collar, in seconds [default: 0.0].
  --skip-overlap             Do not evaluate overlap regions.
  --tolerance=<seconds>      Tolerance, in seconds [default: 0.5].
  --greedy                   Use greedy diarization error rate.
  --latency=<seconds>        Evaluate with fixed latency.
  --filter=<expression>      Filter out target trials that do not match the
                             expression; e.g. use --filter="speech>10" to skip
                             target trials with less than 10s of speech from
                             the target.
  -h --help                  Show this screen.
  --version                  Show version.

All modes but "spotting" expect hypothesis using the RTTM file format.
RTTM files contain one line per speech turn, using the following convention:

SPEAKER {uri} 1 {start_time} {duration} <NA> <NA> {speaker_id} <NA> <NA>

    * uri: file identifier (as given by pyannote.database protocols)
    * start_time: speech turn start time in seconds
    * duration: speech turn duration in seconds
    * speaker_id: speaker identifier

"spotting" mode expects hypothesis using the following JSON file format.
It should contain a list of trial hypothesis, using the same trial order as
pyannote.database speaker spotting protocols (e.g. protocol.test_trial())

[
    {'uri': '<uri>', 'model_id': '<model_id>', 'scores': [[<t1>, <v1>], [<t2>, <v2>], ... [<tn>, <vn>]]},
    {'uri': '<uri>', 'model_id': '<model_id>', 'scores': [[<t1>, <v1>], [<t2>, <v2>], ... [<tn>, <vn>]]},
    {'uri': '<uri>', 'model_id': '<model_id>', 'scores': [[<t1>, <v1>], [<t2>, <v2>], ... [<tn>, <vn>]]},
    ...
    {'uri': '<uri>', 'model_id': '<model_id>', 'scores': [[<t1>, <v1>], [<t2>, <v2>], ... [<tn>, <vn>]]},
]

    * uri: file identifier (as given by pyannote.database protocols)
    * model_id: target identifier (as given by pyannote.database protocols)
    * [ti, vi]: [time, value] pair indicating that the system has output the
                score vi at time ti (e.g. [10.2, 0.2] means that the system
                gave a score of 0.2 at time 10.2s).

Calling "spotting" mode will create a bunch of files.
* <hypothesis.det.txt> contains DET curve using the following raw file format:
    <threshold> <fpr> <fnr>
* <hypothesis.lcy.txt> contains latency curves using this format:
    <threshold> <fpr> <fnr> <speaker_latency> <absolute_latency>

    N)docopt)
Annotation)Timeline)get_protocol)get_annotated)	load_rttm)tabulate)DetectionAccuracy)DetectionErrorRate)DetectionPrecision)DetectionRecall)DiarizationCoverage)DiarizationErrorRate)DiarizationPurity)GreedyDiarizationErrorRate)IdentificationErrorRate)IdentificationPrecision)IdentificationRecall)SegmentationCoverage)SegmentationPrecision)SegmentationPurity)SegmentationRecall)LowLatencySpeakerSpottingc                 O   s   t |jd t|  d S )N:)print__name__str)messagecategoryargskwargs r!   H/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/metrics/cli.pyshowwarning   s   r#   current_filereturnc           	      C   sl   | d }t |jd}||D ] \\}}\}}|||f }|||f }||kr(q|||@  q|  S )a  Get overlapped speech reference annotation

    Parameters
    ----------
    current_file : `dict`
        File yielded by pyannote.database protocols.

    Returns
    -------
    overlap : `pyannote.core.Annotation`
        Overlapped speech reference.
    
annotationuri)r   r(   co_iteraddsupportto_annotation)	r$   	referenceoverlaps1t1s2t2l1l2r!   r!   r"   
to_overlap   s   r5   c                    s   |d   | v r|   S  fdd| D }t |dkr,d  d}t| t ddS t |d	kr=| |d  } |_|S d
  d| d}t|j |d)an  Get hypothesis for given file

    Parameters
    ----------
    hypotheses : `dict`
        Speaker diarization hypothesis provided by `load_rttm`.
    current_file : `dict`
        File description as given by pyannote.database protocols.

    Returns
    -------
    hypothesis : `pyannote.core.Annotation`
        Hypothesis corresponding to `current_file`.
    r(   c                    s   g | ]}| v r|qS r!   r!   ).0ur'   r!   r"   
<listcomp>       z"get_hypothesis.<locals>.<listcomp>r   z$Could not find hypothesis for file "z"; assuming empty file.speaker)r(   modality   z)Found too many hypotheses matching file "z" (z).)r(   uris)lenwarningswarnr   r(   
ValueErrorformat)
hypothesesr$   tmp_urimsg
hypothesisr!   r'   r"   get_hypothesis   s   
rG   c                    s4   | d t ||  t|  fdd| D S )Nr&   c                    s    i | ]\}}|| d qS ))uemr!   r6   keymetricrF   r-   rH   r!   r"   
<dictcomp>   s    zprocess_one.<locals>.<dictcomp>)rG   r   items)itemrC   metricsr!   rL   r"   process_one   s   
rQ   c                 C   s<   t jt||d}t| | D ]}|| qdd | D S )N)rC   rP   c                 S   s   i | ]\}}||j d dqS )FdisplayreportrI   r!   r!   r"   rM      s    zget_reports.<locals>.<dictcomp>)	functoolspartialrQ   getattrrN   )protocolsubsetrC   rP   processrO   r!   r!   r"   get_reports   s   
r\   c                 C   s<   t | j}|d}| |d| ||d d  dg S )z.Reindex report so that 'TOTAL' is the last rowTOTALNr<   )listindexreindex)rU   r_   ir!   r!   r"   r`      s   

(r`           Fc                    sv  ||d}t di |tdi |tdi |tdi |d}t| |||}|d jdd |d jdd}|d jdd}	|d jdd}
||d jd	f  d
< |	|d jd	f  d< |
|d jd	f  d< t  t j	} |d g|dd   |dd    d
d| |rdnd}|g fddtdD  dd  j	dd  D  }tt |dddddddd	 d S )Ncollarskip_overlap)erroraccuracy	precisionrecallrf   FrR   rg   rh   ri   %)rg   rj   rh   rj   ri   rj   r   r<   z Detection (collar = {0:g} ms{1})  , no overlap c                       g | ]	} j | d  qS r   columnsr6   ra   rT   r!   r"   r8         zdetection.<locals>.<listcomp>   c                 S   $   g | ]}|d  dkrdn|d qS r<   rj   r   r!   r6   cr!   r!   r"   r8        $ simple.2fdecimalleftdefaultheaderstablefmtfloatfmtnumalignstralign
missingval	showindexdisable_numparser!   )r
   r	   r   r   r\   rU   namer`   r^   rt   rB   ranger   r   )rY   rZ   rC   rd   re   optionsrP   reportsrg   rh   ri   rt   summaryr   r!   rT   r"   	detection   sN   

&r         ?c                 C   s  d|i}t di |tdi |tdi |tdi |d}t| |||}|d jdd}|d jdd}|d jdd}	|d jdd}
||d j }||d j }|	|d j }	|
|d j }
tj|||	|
gd	d
}t	|}d
d| ddddg}tt||dddddddd	 d S )N	tolerance)coveragepurityrh   ri   r   FrR   r   rh   ri   r<   )axisz#Segmentation (tolerance = {0:g} ms)rn   r}   r~   r   r   rp   r   r   r!   )r   r   r   r   r\   rU   r   pdconcatr`   rB   r   r   )rY   rZ   rC   r   r   rP   r   r   r   rh   ri   rU   r   r!   r!   r"   segmentation  sH   r   c                    sl  ||d}t di |tdi |d}|r tdi ||d< n	tdi ||d< t| |||}|d jdd |d jdd}	|d jdd}
|	|d jdf  d	< |
|d jdf  d
< t j} |d g|dd   |dd    t	  d
|rdndd| |rdnd}|g fddtdD  dd  jdd  D  }tt |dddddddd	 d S )Nrc   )r   r   rf   FrR   r   r   rj   )r   rj   )r   rj   r   r<   z'Diarization ({0:s}collar = {1:g} ms{2})zgreedy, rp   rn   ro   c                    rq   rr   rs   ru   rT   r!   r"   r8   l  rv   zdiarization.<locals>.<listcomp>   c                 S   rx   ry   r!   rz   r!   r!   r"   r8   m  r|   r}   r~   r   r   r   r   r!   )r   r   r   r   r\   rU   r   r^   rt   r`   rB   r   r   r   )rY   rZ   rC   greedyrd   re   r   rP   r   r   r   rt   r   r   r!   rT   r"   diarizationG  sP   

&

r   c                    sD  ||d}t di |tdi |tdi |d}t| |||}|d jdd |d jdd}|d jdd}	||d jdf  d	< |	|d jdf  d
< t j}
 |
d g|
dd   |
dd    t  d	d| |rtdnd}|g fddt
dD  dd  jdd  D  }tt |dddddddd	 d S )Nrc   )rf   rh   ri   rf   FrR   rh   ri   rj   rk   rl   r   r   r<   z%Identification (collar = {0:g} ms{1})rn   ro   rp   c                    rq   rr   rs   ru   rT   r!   r"   r8     rv   z"identification.<locals>.<listcomp>r   c                 S   rx   ry   r!   rz   r!   r!   r"   r8     r|   r}   r~   r   r   r   r   r!   )r   r   r   r\   rU   r   r^   rt   r`   rB   r   r   r   )rY   rZ   rC   rd   re   r   rP   r   rh   ri   rt   r   r   r!   rT   r"   identification  sH   

&r   c           /      C   sp  |sg }d| _ t| dj|d }tt||D ]\}\}	}
z|	d |
d ks)J W n tyF } zd}t|j||
d |	d dd }~ww z|	d |
d ksRJ W n tyo } zd}t|j||
d |	d dd }~ww zt|
d	 d
ks{J W n ty } z
d}t|j|dd }~ww t|
d	  \}}|s|| |	d }zt	||j
ksJ W q ty } zd}t|j|t	||j
dd }~ww |st|}tdd tdddD }t|tdddd|d d d  g}t||}|st|d}nt|d}t| dj|d }tt||D ]+\}\}	}
|d ur:|	d  }|d
k}|r:||r:q|	d }|||
d	  q|s|jdd\}}}}}dj|d}d}t|dd'}|d  t|||D ]\}}} |j||| d!}!||! qoW d    n	1 sw   Y  td"j|d# |jd$d\}}}}}}"}#d%j|d}$d&}%t|$dd<}|d' t||||"|#D ]&\}}} }&}'|dkrҐqt|&rڐq|%j||| |&|'d(}!||! qW d    n	1 sw   Y  td)j|$d* t  td+jd| d, d S | }(g })t|(D ]r}*|(|* }+d-|*i},|D ]P}-|+|- \}}}}}||,|-< d.j||*|-d/}d}t|dd'}|d  t|||D ]\}}} |j||| d!}!||! qSW d    n	1 srw   Y  q(|)|, d0j||*d1}td"j|d# qt  dtj|)d-|  }.tt|.d2d-gd3d |D  d4d5d6d7d8dd9	 d S ):NFz{subset}_trial)rZ   model_idzFtarget mismatch in trial #{i} (found: {found}, should be: {should_be}))ra   found	should_ber(   zDfile mismatch in trial #{i} (found: {found}, should be: {should_be})scoresr   z#empty list of scores in trial #{i}.)ra   try_withzQincorrect timestamp in trial #{i} (found: {found:g}, should be: >= {should_be:g})c                 S   s*   g | ]}t d dD ]	}|d|   q	qS )r<   
   )r   )r6   enr!   r!   r"   r8     s   * zspotting.<locals>.<listcomp>rw   r<   g?g      Y@d   )
thresholds)	latenciesr-   )return_latencyz{output_prefix}.det.txt)output_prefixz{t:.9f} {p:.9f} {n:.9f}
wmodez4# threshold false_positive_rate false_negative_rate
)tpr   z> {det_path})det_pathTz{output_prefix}.lcy.txtz({t:.9f} {p:.9f} {n:.9f} {s:.6f} {a:.6f}
zU# threshold false_positive_rate false_negative_rate speaker_latency absolute_latency
)r   r   r   saz> {lcy_path})lcy_pathzEER% = {eer:.2f})eerlatencyz*{output_prefix}.det.{key}.{latency:g}s.txt)r   rJ   r   z!{output_prefix}.det.{key}.XXs.txt)r   rJ   r}   c                 S   s   g | ]}d j |dqS )zEER% @ {l:g}s)l)rB   r6   r   r!   r!   r"   r8   T  r9   r~   r   r   rp   r   )r   r   r   r   r   r   r   r   )r   rX   rB   	enumeratezipAssertionErrorrA   r>   appendminstartnpconcatenatearrayr   arange
percentiler   duration	det_curveopenwriter   isnansortedr   	DataFrame	from_dict	set_indexr   )/rY   rZ   r   rC   r   filter_funcScorestrialsra   current_trialrF   r   rE   
timestampsr   r   epsilonsr   r   rK   speechtarget_trialr-   fprfnrr   _r   det_tmplfpr   r   r   linespeaker_lcyabsolute_lcyr   lcy_tmplr   r   resultslogsrJ   resultlogr   dfr!   r!   r"   spotting  s   

	
 





r   c                     s6  t tdd} t| d }| d }t| d }| d }t }| d r.|r*d}t| d	ti}t||d
}| d }| d r| d }	t|	dd}
t	
|
}W d    n1 sVw   Y  |	d d }dd | d D }| d }|rddlmmm} |dg  fdd|D   fdd}nd }t||||||d td | d }zt|}W n" ty   d| d}t| Y n   d| d }t| Y | d! rt|||||d" | d rt|||||d" | d# rt||||d$ | d% r	| d& }t||||||d' | d( rt|||||d" d S d S ))N
Evaluation)versionz--collarz--skip-overlapz--tolerancez<database.task.protocol>r.   zSOption --skip-overlap is not supported when evaluating overlapped speech detection.r&   )preprocessorsz--subsetr   z<hypothesis.json>rr   c                 S   s   g | ]}t |qS r!   )floatr   r!   r!   r"   r8     s    zmain.<locals>.<listcomp>z	--latencyz--filterr   )sympifylambdifysymbolsr   c                    s   g | ]
} g|qS r!   r!   )r6   
expression)r   r   r   r!   r"   r8     s    c                    s   t  fddD S )Nc                 3   s    | ]}|  V  qd S Nr!   )r6   funcr   r!   r"   	<genexpr>  s    z)main.<locals>.<lambda>.<locals>.<genexpr>)anyr   )filter_funcsr   r"   <lambda>  s    zmain.<locals>.<lambda>)r   z<hypothesis.rttm>zCould not find file .zFailed to load z:, please check its format (only RTTM files are supported).r   rc   r   )r   r   z--greedy)r   rd   re   r   )r   __doc__r   dictsysexitr5   r   r   jsonloadsympyr   r   r   r   r   FileNotFoundErrorr   r   r   r   )	argumentsrd   re   r   protocol_namer   rE   rY   rZ   hypothesis_jsonr   rC   r   r   filtersr   r   hypothesis_rttmr   r!   )r   r   r   r   r"   main_  s   

	




	

r  __main__)NN)rb   F)r   )Frb   Fr   )7r   rV   r   r   r?   numpyr   pandasr   r   pyannote.corer   r   pyannote.databaser   pyannote.database.utilr   r   r   pyannote.metrics.detectionr	   r
   r   r   pyannote.metrics.diarizationr   r   r   r   pyannote.metrics.identificationr   r   r   pyannote.metrics.segmentationr   r   r   r   pyannote.metrics.spottingr   r#   showwarning_origr   r5   rG   rQ   r\   r`   r   r   r   r   r   r  r   r!   r!   r!   r"   <module>   sb   @
*		

30

8
0 1i
