o
    siI8                     @   s   d Z ddlZddlmZ ddlZddlmZ ddlm	Z	 dej
dej
fdd	Zd
d ZdddZ	ddee fddZdd Zdd ZdS )u	  
Alignment models are given a sequence of events along with a piece of audio, and then return a
sequence of timestamps, with one timestamp for each event, indicating the position of this event
in the audio. The events are listed in order of occurrence in the audio, so that output
timestamps have to be monotonically increasing.
Evaluation usually involves taking the series of predicted and ground truth timestamps and
comparing their distance, usually on a pair-wise basis, e.g. taking the median absolute error in
seconds.

Conventions
-----------
Timestamps should be provided in the form of a 1-dimensional array of onset
times in seconds in increasing order.

Metrics
-------
* :func:`mir_eval.alignment.absolute_error`: Median absolute error and average absolute error
* :func:`mir_eval.alignment.percentage_correct`: Percentage of correct timestamps,
  where a timestamp is counted
  as correct if it lies within a certain tolerance window around the ground truth timestamp
* :func:`mir_eval.alignment.pcs`: Percentage of correct segments: Percentage of overlap between
  predicted segments and ground truth segments, where segments are defined by (start time,
  end time) pairs
* :func:`mir_eval.alignment.perceptual_metric`: metric based on human synchronicity perception as
  measured in the paper "User-centered evaluation of lyrics to audio alignment",
  N. Lizé-Masclef, A. Vaglio, M. Moussallam, ISMIR 2021

References
----------
  .. [#lizemasclef2021] N. Lizé-Masclef, A. Vaglio, M. Moussallam.
    "User-centered evaluation of lyrics to audio alignment",
    International Society for Music Information Retrieval (ISMIR) conference,
    2021.

  .. [#mauch2010] M. Mauch, F: Hiromasa, M. Goto.
    "Lyrics-to-audio alignment and phrase-level segmentation using
    incomplete internet-style chord annotations",
    Frontiers in Proceedings of the Sound Music Computing Conference (SMC), 2010.

  .. [#dzhambazov2017] G. Dzhambazov.
    "Knowledge-Based Probabilistic Modeling For Tracking Lyrics In Music Audio Signals",
    PhD Thesis, 2017.

  .. [#fujihara2011] H. Fujihara, M. Goto, J. Ogata, H. Okuno.
    "LyricSynchronizer: Automatic synchronization system between musical audio signals and lyrics",
    IEEE Journal of Selected Topics in Signal Processing, VOL. 5, NO. 6, 2011

    N)Optional)skewnorm)filter_kwargsreference_timestampsestimated_timestampsc                 C   s0  t | tjstdt|  t |tjstdt| | jdkr,td| j d|jdkr:td|j d| jdkrCtd|j| jkrVtd	|j d
| j dt| dd | dd  dksktdt|dd |dd  dkstdt| dkstdt|dkstddS )aI  Check that the input annotations to a metric look like valid onset time
    arrays, and throws helpful errors if not.

    Parameters
    ----------
    reference_timestamps : np.ndarray
        reference timestamp locations, in seconds
    estimated_timestamps : np.ndarray
        estimated timestamp locations, in seconds
    z7Reference timestamps need to be a numpy array, but got z7Estimated timestamps need to be a numpy array, but got    zBReference timestamps need to be a one-dimensional vector, but got z dimensionszBEstimated timestamps need to be a one-dimensional vector, but got r   zReference timestamps are empty.zPNumber of timestamps must be the same in prediction and ground truth, but found z in prediction and z in ground truthNz6Reference timestamps are not monotonically increasing!z6Estimated timestamps are not monotonically increasing!z(Reference timestamps can not be below 0!z(Estimated timestamps can not be below 0!)
isinstancenpndarray
ValueErrortypendimsizeall)r   r    r   F/home/ubuntu/.local/lib/python3.10/site-packages/mir_eval/alignment.pyvalidate;   sV   


""r   c                 C   s,   t | | t| | }t|t|fS )a  Compute the absolute deviations between estimated and reference timestamps,
    and then returns the median and average over all events

    Examples
    --------
    >>> reference_timestamps = mir_eval.io.load_events('reference.txt')
    >>> estimated_timestamps = mir_eval.io.load_events('estimated.txt')
    >>> mae, aae = mir_eval.align.absolute_error(reference_onsets, estimated_timestamps)

    Parameters
    ----------
    reference_timestamps : np.ndarray
        reference timestamps, in seconds
    estimated_timestamps : np.ndarray
        estimated timestamps, in seconds

    Returns
    -------
    mae : float
        Median absolute error
    aae: float
        Average absolute error
    )r   r
   absmedianmean)r   r   
deviationsr   r   r   absolute_errors   s   
r   333333?c                 C   s&   t | | t| | }t||kS )a5  Compute the percentage of correctly predicted timestamps. A timestamp is predicted
    correctly if its position doesn't deviate more than the window parameter from the ground
    truth timestamp.

    Examples
    --------
    >>> reference_timestamps = mir_eval.io.load_events('reference.txt')
    >>> estimated_timestamps = mir_eval.io.load_events('estimated.txt')
    >>> pc = mir_eval.align.percentage_correct(reference_onsets, estimated_timestamps, window=0.2)

    Parameters
    ----------
    reference_timestamps : np.ndarray
        reference timestamps, in seconds
    estimated_timestamps : np.ndarray
        estimated timestamps, in seconds
    window : float
        Window size, in seconds
        (Default value = .3)

    Returns
    -------
    pc : float
        Percentage of correct timestamps
    )r   r
   r   r   )r   r   windowr   r   r   r   percentage_correct   s   
r   durationc           
      C   sH  t | | |durat|}|dkrtd| t| |kr,tdt|  d| t||kr@tdt| d| tdg| g}t| |gg}tdg|g}t||gg}n(| d | d  }|dkrqtd| dd }| d	d }|dd }|d	d }t||}t||}tt|| d}	|	| S )
a	  Calculate the percentage of correct segments (PCS) metric.

    It constructs segments out of predicted and estimated timestamps separately
    out of each given timestamp vector and calculates the percentage of overlap between correct
    segments compared to the total duration.

    WARNING: This metrics behaves differently depending on whether "duration" is given!

    If duration is not given (default case), the computation follows the MIREX lyrics alignment
    challenge 2020. For a timestamp vector with entries (t1,t2, ... tN), segments with
    the following (start, end) boundaries are created: (t1, t2), ... (tN-1, tN).
    After the segments are created, the overlap between the reference and estimated segments is
    determined and divided by the total duration, which is the distance between the
    first and last timestamp in the reference.

    If duration is given, the segment boundaries are instead (0, t1), (t1, t2), ... (tN, duration).
    The overlap is computed in the same way, but then divided by the duration parameter given to
    this function.
    This method follows the original paper [#fujihara2011] more closely, where the metric was
    proposed.
    As a result, this variant of the metrics punishes cases where the first estimated timestamp
    is too early or the last estimated timestamp is too late, whereas the MIREX variant does not.
    On the other hand, the MIREX metric is invariant to how long the eventless beginning and end
    parts of the audio are, which might be a desirable property.

    Examples
    --------
    >>> reference_timestamps = mir_eval.io.load_events('reference.txt')
    >>> estimated_timestamps = mir_eval.io.load_events('estimated.txt')
    >>> pcs = mir_eval.align.percentage_correct_segments(reference_timestamps, estimated_timestamps)

    Parameters
    ----------
    reference_timestamps : np.ndarray
        reference timestamps, in seconds
    estimated_timestamps : np.ndarray
        estimated timestamps, in seconds
    duration : float
        Optional. Total duration of audio (seconds). WARNING: Metric is computed differently
        depending on whether this is provided or not - see documentation above!

    Returns
    -------
    pcs : float
        Percentage of time where ground truth and predicted segments overlap
    Nr   z0Positive duration needs to be provided, but got z$Expected largest reference timestampz  to not be larger than duration z%Expected largest estimated timestamp r   zCReference timestamps are all identical, can not compute PCS metric!r   )	r   floatr   r
   maxconcatenatemaximumminimumsum)
r   r   r   
ref_startsref_ends
est_startsest_endsoverlap_startsoverlap_endsoverlap_durationr   r   r   percentage_correct_segments   sP   
1r*   c                 C   sF   t | | ||  }d}d}d}d}d| tj||||d }t|S )a  Metric based on human synchronicity perception as measured in the paper
    "User-centered evaluation of lyrics to audio alignment" [#lizemasclef2021]

    The parameters of this function were tuned on data collected through a user Karaoke-like
    experiment
    It reflects human judgment of how "synchronous" lyrics and audio stimuli are perceived
    in that setup.
    Beware that this metric is non-symmetrical and by construction it is also not equal to 1 at 0.

    Examples
    --------
    >>> reference_timestamps = mir_eval.io.load_events('reference.txt')
    >>> estimated_timestamps = mir_eval.io.load_events('estimated.txt')
    >>> score = mir_eval.align.karaoke_perceptual_metric(reference_onsets, estimated_timestamps)

    Parameters
    ----------
    reference_timestamps : np.ndarray
        reference timestamps, in seconds
    estimated_timestamps : np.ndarray
        estimated timestamps, in seconds

    Returns
    -------
    perceptual_score : float
        Perceptual score, averaged over all timestamps
    gDqG?gݫl̿g-o?g-?g      ?)locscale)r   r   pdfr
   r   )r   r   offsetsskewnesslocalisationr,   normalisation_factorperceptual_scoresr   r   r   karaoke_perceptual_metric  s   


r3   c                 K   s`   t  }tt| |fi ||d< t| |\|d< |d< tt| |fi ||d< t| ||d< |S )a  Compute all metrics for the given reference and estimated annotations.

    Examples
    --------
    >>> reference_timestamps = mir_eval.io.load_events('reference.txt')
    >>> estimated_timestamps = mir_eval.io.load_events('estimated.txt')
    >>> duration = max(np.max(reference_timestamps), np.max(estimated_timestamps)) + 10
    >>> scores = mir_eval.align.evaluate(reference_onsets, estimated_timestamps, duration)

    Parameters
    ----------
    reference_timestamps : np.ndarray
        reference timestamp locations, in seconds
    estimated_timestamps : np.ndarray
        estimated timestamp locations, in seconds
    **kwargs
        Additional keyword arguments which will be passed to the
        appropriate metric or preprocessing functions.

    Returns
    -------
    scores : dict
        Dictionary of scores, where the key is the metric name (str) and
        the value is the (float) score achieved.
    pcmaeaaepcs
perceptual)collectionsOrderedDictr   r   r   r*   r3   )r   r   kwargsscoresr   r   r   evaluate8  s(   

r=   )r   )N)__doc__r9   typingr   numpyr
   scipy.statsr   mir_eval.utilr   r   r   r   r   r   r*   r3   r=   r   r   r   r   <module>   s    18
 
^+