o
    pi(                     @   s   d dl mZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ d dl	m
Z
 d d	l	mZ d d
l	mZ d dlZ	ddedee defddZ	ddedee de
fddZG dd deZdS )    )DictOptional   )Protocol)ProtocolFile)Subset)Preprocessor)Preprocessors)
Annotation)Timeline)SegmentNcurrent_fileexisting_preprocessorreturnc                 C   sn   |du r|  dd}n|| }|du rdS |  dd}|du r!|S td|}|r5| |vr5|j|ddS |S )a  Preprocessor that crops 'annotated' according to 'duration'

    Returns 'annotated' unchanged if 'duration' is not available

    Parameters
    ----------
    current_file : ProtocolFile
        Protocol file.
    existing_preprocessor : Preprocessor, optional
        When provided, this preprocessor must be used to get the initial
        'annotated' instead of getting it from 'current_file["annotated"]'

    Returns
    -------
    cropped_annotated : Timeline
        "annotated" cropped by "duration".
    N	annotatedduration        intersectionmode)getr   extentcrop)r   r   r   r    r   [/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/database/protocol/segmentation.pycrop_annotated*   s   
r   c                 C   sf   |du r|  dd}n|| }|du rdS |  dd}|du r!|S |r1|| s1|j|ddS |S )a  Preprocessor that crops 'annotation' by 'annotated'

    Returns 'annotation' unchanged if 'annotated' is not available

    Parameters
    ----------
    current_file : ProtocolFile
        Protocol file.
    existing_preprocessor : Preprocessor, optional
        When provided, this preprocessor must be used to get the initial
        'annotation' instead of getting it from 'current_file["annotation"]'

    Returns
    -------
    cropped_annotation : Annotation
        "annotation" cropped by "annotated".
    N
annotationr   r   r   )r   coversget_timeliner   )r   r   r   r   r   r   r   crop_annotationT   s   r   c                       s@   e Zd ZdZddee f fddZddedefd	d
Z	  Z
S )SegmentationProtocola  A protocol for segmentation experiments

    A segmentation protocol can be defined programmatically by creating
    a class that inherits from SegmentationProtocol and implements at
    least one of `train_iter`, `development_iter` and `test_iter` methods:

        >>> class MySegmentationProtocol(SegmentationProtocol):
        ...     def train_iter(self) -> Iterator[Dict]:
        ...         yield {"uri": "filename1",
        ...                "annotation": Annotation(...),
        ...                "annotated": Timeline(...)}
        ...         yield {"uri": "filename2",
        ...                "annotation": Annotation(...),
        ...                "annotated": Timeline(...)}

    `{subset}_iter` should return an iterator of dictionnaries with
        - "uri" key (mandatory) that provides a unique file identifier (usually
          the filename),
        - "annotation" key (mandatory for train and development subsets) that
          provides reference segmentation as a `pyannote.core.Annotation`
          instance,
        - "annotated" key (recommended) that describes which part of the file
          has been annotated, as a `pyannote.core.Timeline` instance. Any part
          of "annotation" that lives outside of the provided "annotated" will
          be removed. This is also used by `pyannote.metrics` to remove
          un-annotated regions from its evaluation report, and by
          `pyannote.audio` to not consider empty un-annotated regions as
          non-speech.
        - any other key that the protocol may provide.

    It can then be used in Python like this:

        >>> protocol = MySegmentationProtocol()
        >>> for file in protocol.train():
        ...    print(file["uri"])
        filename1
        filename2

    A segmentation protocol can also be defined using `pyannote.database`
    configuration file, whose (configurable) path defaults to "~/database.yml".

    ~~~ Content of ~/database.yml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Protocols:
      MyDatabase:
        Segmentation:
          MyProtocol:
            train:
                uri: /path/to/collection.lst
                annotation: /path/to/reference.rttm
                annotated: /path/to/reference.uem
                any_other_key: ... # see custom loader documentation
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    where "/path/to/collection.lst" contains the list of identifiers of the
    files in the collection:

    ~~~ Content of "/path/to/collection.lst ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    filename1
    filename2
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    "/path/to/reference.rttm" contains the reference segmentation using
    RTTM format:

    ~~~ Content of "/path/to/reference.rttm ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    SPEAKER filename1 1 3.168 0.800 <NA> <NA> music <NA> <NA>
    SPEAKER filename1 1 5.463 0.640 <NA> <NA> music <NA> <NA>
    SPEAKER filename1 1 5.496 0.574 <NA> <NA> speech <NA> <NA>
    SPEAKER filename1 1 10.454 0.499 <NA> <NA> speech <NA> <NA>
    SPEAKER filename2 1 2.977 0.391 <NA> <NA> noise <NA> <NA>
    SPEAKER filename2 1 18.705 0.964 <NA> <NA> noise <NA> <NA>
    SPEAKER filename2 1 22.269 0.457 <NA> <NA> music <NA> <NA>
    SPEAKER filename2 1 28.474 1.526 <NA> <NA> music <NA> <NA>
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    "/path/to/reference.uem" describes the annotated regions using UEM format:

    ~~~ Content of "/path/to/reference.uem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    filename1 NA 0.000 30.000
    filename2 NA 0.000 30.000
    filename2 NA 40.000 70.000
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    It can then be used in Python like this:

        >>> from pyannote.database import registry
        >>> protocol = registry.get_protocol('MyDatabase.SpeakerDiarization.MyProtocol')
        >>> for file in protocol.train():
        ...    print(file["uri"])
        filename1
        filename2
    Npreprocessorsc                    sT   |d u rt  }tjt|dd d|d< tjt|dd d|d< t j|d d S )Nr   )r   r   )r!   )dict	functoolspartialr   r   r   super__init__)selfr!   	__class__r   r   r&      s   

zSegmentationProtocol.__init__trainsubsetr   c                 C   s   ddl m} d}d}d}i }t| | D ]5}||}|| 7 }|d }	||	  7 }|	 D ]\}
}|
|vr<d||
< ||
  |7  < q0|d7 }q||||d}|S )ab  Obtain global statistics on a given subset

        Parameters
        ----------
        subset : {'train', 'development', 'test'}

        Returns
        -------
        stats : dict
            Dictionary with the followings keys:
            * annotated: float
            total duration (in seconds) of the parts that were manually annotated
            * annotation: float
            total duration (in seconds) of actual annotations
            * n_files: int
            number of files in the subset
            * labels: dict
            maps classes with their total duration (in seconds)
           )get_annotatedr   r   r   r   )r   r   n_fileslabels)utilr-   getattrr   r   chart)r'   r+   r-   annotated_durationannotation_durationr.   r/   itemr   r   labelr   statsr   r   r   r7      s*   
zSegmentationProtocol.statsN)r*   )__name__
__module____qualname____doc__r   r	   r&   r   r   r7   __classcell__r   r   r(   r   r    |   s    ]r    r8   )typingr   r   protocolr   r   r   r   r	   pyannote.corer
   r   r   r#   r   r   r    r   r   r   r   <module>   s6   
+
(