o
    piA2                     @   s   d dl Z d dlmZmZmZmZmZmZ d dlZ	d dl
mZ d dlmZmZmZmZ ddlmZ ddlmZ ddlmZmZ d	Zd
ZdZdZdZdZG dd deZdS )    N)UnionIterableOptionalTupleListDict)	ArrayLike)Segment
AnnotationSlidingWindowFeatureTimeline   )
BaseMetric)	det_curve)MetricComponentsDetailstargetspeaker_latency	spk_scoreabsolute_latency	abs_scorescorec                	       s(  e Zd ZdZedefddZdeeef fddZ			d'de
e d	e
e f fd
dZdefddZdedee dee defddZdeeef dee dee defddZdeeef deeeeeef  f defddZedd Zedd Z				 d(d!ed"ed#ed$efd%d&Z  ZS ))LowLatencySpeakerSpottinga  Evaluation of low-latency speaker spotting (LLSS) systems

    LLSS systems can be evaluated in two ways: with fixed or variable latency.

    * When latency is fixed a priori (default), only scores reported by the
    system within the requested latency range are considered. Varying the
    detection threshold has no impact on the actual latency of the system. It
    only impacts the detection performance.

    * In variable latency mode, the whole stream of scores is considered.
    Varying the detection threshold will impact both the detection performance
    and the detection latency. Each trial will result in the alarm being
    triggered with a different latency. In case the alarm is not triggered at
    all (missed detection), the latency is arbitrarily set to the value one
    would obtain if it were triggered at the end of the last target speech
    turn. The reported latency is the average latency over all target trials.

    Parameters
    ----------
    latencies : float iterable, optional
        Switch to fixed latency mode, using provided `latencies`.
        Defaults to [1, 5, 10, 30, 60] (in seconds).
    thresholds : float iterable, optional
        Switch to variable latency mode, using provided detection `thresholds`.
        Defaults to fixed latency mode.
    returnc                 C   s   dS )NzLow-latency speaker spotting )clsr   r   M/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/metrics/spotting.pymetric_nameL   s   z%LowLatencySpeakerSpotting.metric_namec                 C   s   ddiS )Nr   g        r   )selfr   r   r   metric_componentsP   s   z+LowLatencySpeakerSpotting.metric_componentsN
thresholds	latenciesc                    sj   t    |d u r|d u rg d}|d ur|d urtd|d ur't|| _|d ur0t|}|| _d S )N)r      
      <   z3One must choose between fixed and variable latency.)super__init__
ValueErrornpsortr    r!   )r   r    r!   	__class__r   r   r'   S   s   


z"LowLatencySpeakerSpotting.__init__detailc                 C   s   d S )Nr   )r   r-   r   r   r   compute_metricg   s   z(LowLatencySpeakerSpotting.compute_metric	reference
timestampsscoresc              
   C   sf  |sd}t |t t| jdf }|}nd}t dd |D }g }g }tt || j| jD ]c\}}	z&|| j|| |	  }
t ||
}|dk rQt	j
j }n	t |d | }W n tyi   t |}Y nw || |d j|	 }
t ||
}|dk rt	j
j }n	t |d | }|| q1t |d}t |d}t|t| jt|t| jt|iS )NFr   Tc                 S   s   g | ]}|j qS r   )duration).0segmentr   r   r   
<listcomp>v   s    z<LowLatencySpeakerSpotting._fixed_latency.<locals>.<listcomp>r   r   )r)   maxoneslenr!   cumsumzipsearchsortedendsys
float_info
IndexErrorappendstartarrayreshapeSPOTTING_TARGETSPOTTING_SPK_LATENCYSPOTTING_SCORESPOTTING_ABS_LATENCYSPOTTING_ABS_SCORE)r   r/   r0   r1   target_trialr   r   totalilatencytup_tosr   r   r   _fixed_latencyj   sH   
z(LowLatencySpeakerSpotting._fixed_latencyc              
      sN  t jt t|df }t jt t|df }t|tr#|jdd}|rI|d j}t|D ]\}}	t	||	}
|
|
 ||< td|
j||< q.t j|d}|| jk t  fddt| jD }|rd}t j||d	d
}t j||d	d
} dd d f }| j|| < | || < nd}t j}t j}t|t|t|tt |iS )Nr   F)copyr   r6   c                    s(   g | ]\}}t  d d |f dqS )NT)r)   r=   )r3   rM   _	triggeredr   r   r5      s    z?LowLatencySpeakerSpotting._variable_latency.<locals>.<listcomp>Tclipmoder7   )r)   nanr9   r:   
isinstancer
   get_timelinerC   	enumerater	   cropr2   r8   maximum
accumulaterE   r    rD   takeextentrF   rI   rG   rH   )r   r/   r0   r1   kwargsr   r   
first_timerM   rO   so_farmaxcumindicesrK   positiver   rU   r   _variable_latency   s<   




z+LowLatencySpeakerSpotting._variable_latency
hypothesisc                 K   sJ   t |trdd |D }t| \}}| jdu r| |||S | |||S )z

        Parameters
        ----------
        reference : Timeline or Annotation
        hypothesis : SlidingWindowFeature or (time, score) iterable
        c                 S   s   g | ]	\}}|j |fqS r   )r>   )r3   windowvaluer   r   r   r5      s    z@LowLatencySpeakerSpotting.compute_components.<locals>.<listcomp>N)r[   r   r<   r!   ri   rR   )r   r/   rj   rc   r0   r1   r   r   r   compute_components   s   

z,LowLatencySpeakerSpotting.compute_componentsc                 C      dd | D }t j|ddS )Nc                 S       g | ]\}}|t  r|t qS r   )rF   rI   r3   rT   trialr   r   r   r5          z>LowLatencySpeakerSpotting.absolute_latency.<locals>.<listcomp>r   axisr)   nanmeanr   r!   r   r   r   r         z*LowLatencySpeakerSpotting.absolute_latencyc                 C   rn   )Nc                 S   ro   r   )rF   rG   rp   r   r   r   r5      rr   z=LowLatencySpeakerSpotting.speaker_latency.<locals>.<listcomp>r   rs   ru   rw   r   r   r   r      rx   z)LowLatencySpeakerSpotting.speaker_latencyd   r   {Gz?F	cost_misscost_faprior_targetreturn_latencyc                 C   s  | j du rtdd | D }tdd | D }t||dd\}}}	}
|ddd |ddd |	ddd }}}	|| | || d|   }|rtj|	| jd	d
}tj|	|dd}	tj||dd}tj||dd}tj||dd}|	|||
|| j| jfS |	|||
|fS tdd | D }tdd | D }tdd | D }i }||d	 D ]S\}}i ||< t
| j D ]E\}}t||dd|f dd\}}}}
|ddd |ddd |ddd }}}|| | || d|   }||||
|f|| |< qq|S )a  DET curve

        Parameters
        ----------
        cost_miss : float, optional
            Cost of missed detections. Defaults to 100.
        cost_fa : float, optional
            Cost of false alarms. Defaults to 1.
        prior_target : float, optional
            Target trial prior. Defaults to 0.5.
        return_latency : bool, optional
            Set to True to return latency.
            Has no effect when latencies are given at initialization time.

        Returns
        -------
        thresholds : numpy array
            Detection thresholds
        fpr : numpy array
            False alarm rate
        fnr : numpy array
            False rejection rate
        eer : float
            Equal error rate
        cdet : numpy array
            Cdet cost function
        speaker_latency : numpy array
        absolute_latency : numpy array
            Speaker and absolute latency when return_latency is set to True.
        Nc                 S      g | ]\}}|t  qS r   rF   rp   r   r   r   r5   $      z7LowLatencySpeakerSpotting.det_curve.<locals>.<listcomp>c                 S   r   r   )rH   rp   r   r   r   r5   %  r   F)	distancesr7   g      ?left)siderW   rX   c                 S   r   r   r   rp   r   r   r   r5   <  r   c                 S   r   r   )SPOTTING_SPK_SCORErp   r   r   r   r5   =  r   c                 S   r   r   )rJ   rp   r   r   r   r5   >  r   )speakerabsolute)r!   r)   rD   r   r=   r    ra   r   r   itemsr]   )r   r{   r|   r}   r~   y_truer1   fprfnrr    eercdetrg   
spk_scores
abs_scoresresultkeyrM   rN   thetar   r   r   r      sN   
$.


.
z#LowLatencySpeakerSpotting.det_curve)NN)ry   r   rz   F) __name__
__module____qualname____doc__classmethodstrr   r   floatr   r   r   r'   r   r.   r   r   r   rR   r   r
   ri   r   r   r   rm   propertyr   r   boolr   __classcell__r   r   r+   r   r   0   sf    
:
7


r   )r?   typingr   r   r   r   r   r   numpyr)   numpy.typingr   pyannote.corer	   r
   r   r   baser   binary_classificationr   typesr   r   rF   rG   r   rI   rJ   rH   r   r   r   r   r   <module>   s    