o
    pi                     @   sH   d dl mZmZ ddlmZ ddlmZ ddlmZ G dd deZdS )	    )DictIterator   )SpeakerDiarizationProtocol)Subset)LEGACY_SUBSET_MAPPINGc                   @   s   e Zd ZdZdedee fddZdee fddZdee fdd	Z	dee fd
dZ
dee fddZdee fddZdee fddZdS )SpeakerVerificationProtocola  A protocol for speaker verification experiments

    A speaker verification protocol can be defined programmatically by creating
    a class that inherits from `SpeakerVerificationProtocol` and implement at
    least one of `train_trial_iter`, `development_trial_iter` and
    `test_trial_iter` methods:

        >>> class MySpeakerVerificationProtocol(SpeakerVerificationProtocol):
        ...     def train_trial_iter(self) -> Iterator[Dict]:
        ...         yield {"reference": 0,
        ...                "file1": {
        ...                     "uri":"filename1",
        ...                     "try_with":Timeline(...)
        ...                },
        ...                "file2": {
        ...                     "uri":"filename3",
        ...                     "try_with":Timeline(...)
        ...                },
        ...         }

    `{subset}_trial_iter` should return an iterator of dictionnaries with

    - `reference` key (mandatory) that provides an int portraying whether
      `file1` and `file2` are uttered by the same speaker (1 is same, 0 is
      different),
    - `file1` key (mandatory) that provides the first file,
    - `file2` key (mandatory) that provides the second file.

    Both `file1` and `file2` should be provided as dictionaries or ProtocolFile
    instances with

    - `uri` key (mandatory),
    - `try_with` key (mandatory) that describes which part of the file should
      be used in the validation process, as a `pyannote.core.Timeline` instance.
    - any other key that the protocol may provide.

    It can then be used in Python like this:

        >>> protocol = MySpeakerVerificationProtocol()
        ... for trial in protocol.train_trial():
        ...     print(f"{trial['reference']} {trial['file1']['uri']} {trial['file2']['uri']}")
        1 filename1 filename2
        0 filename1 filename3

    A speaker verification protocol can also be defined using `pyannote.database`
    configuration file, whose (configurable) path defaults to "~/database.yml".

    ~~~ Content of ~/database.yml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Protocols:
        MyDatabase:
        SpeakerVerification:
            MyProtocol:
            train:
                uri: /path/to/train.lst
                duration: /path/to/duration.map
                trial: /path/to/trial.txt
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    where `/path/to/train.lst` contains the list of identifiers of the
    files in the collection:

    ~~~ Content of /path/to/train.lst~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    filename1
    filename2
    filename3
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    `/path/to/duration.map` contains the duration of the files:

    ~~~ Content of /path/to/duration.map ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    filename1 30.000
    filename2 30.000
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    `/path/to/trial.txt` contains a list of trials :

    ~~~ Content of /path/to/trial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    1 filename1 filename2
    0 filename1 filename3
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    `1` stands for _target_ trials and `0` for _non-target_ trials.
    In the example below, it means that the same speaker uttered files
    `filename1` and `filename2` and that `filename1` and `filename3` are from
    two different speakers.

    It can then be used in Python like this:

        >>> from pyannote.database import registry
        >>> protocol = registry.get_protocol('MyDatabase.SpeakerVerification.MyProtocol')
        >>> for trial in protocol.train_trial():
        ...     print(f"{trial['reference']} {trial['file1']['uri']} {trial['file2']['uri']}")
        1 filename1 filename2
        0 filename1 filename3

    Note that speaker verification protocols (`SpeakerVerificationProtocol`)
    are a subclass of speaker diarization protocols (`SpeakerDiarizationProtocol`).
    As such, they also define regular `{subset}` methods.
    subsetreturnc                 c   s    zt | | d }W n+ ttfy7   t| }zt | | d }W n ty4   | d}t|w Y nw |D ]}| |d |d< | |d |d< |V  q:d S )N_trial_iter	_try_iterz_trial_iter is not implemented.file1file2)getattrAttributeErrorNotImplementedErrorr   
preprocess)selfr	   trialssubset_legacymsgtrial r   c/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/database/protocol/speaker_verification.pysubset_trial_helper   s$   
z/SpeakerVerificationProtocol.subset_trial_helperc                 C      t  )z'Iterate over trials in the train subsetr   r   r   r   r   train_trial_iter      z,SpeakerVerificationProtocol.train_trial_iterc                 C   r   )z-Iterate over trials in the development subsetr   r   r   r   r   development_trial_iter   r   z2SpeakerVerificationProtocol.development_trial_iterc                 C   r   )z&Iterate over trials in the test subsetr   r   r   r   r   test_trial_iter   r   z+SpeakerVerificationProtocol.test_trial_iterc                 C   
   |  dS )Ntrainr   r   r   r   r   train_trial      
z'SpeakerVerificationProtocol.train_trialc                 C   r"   )Ndevelopmentr$   r   r   r   r   development_trial   r&   z-SpeakerVerificationProtocol.development_trialc                 C   r"   )Ntestr$   r   r   r   r   
test_trial   r&   z&SpeakerVerificationProtocol.test_trialN)__name__
__module____qualname____doc__r   r   r   r   r   r    r!   r%   r(   r*   r   r   r   r   r   $   s    dr   N)	typingr   r   speaker_diarizationr   protocolr   r   r   r   r   r   r   <module>   s
   