o
    8wi9                     @   s   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZ zddlmZ W n ey;   ddlmZ Y nw ed ZddddZed	 Ze	d
ge
f Zeeef ZG dd
 d
ejjZG dd dZdS )z 
#########
Protocols
#########

    N)UnionDictIteratorCallableAnyTextOptional)Literal)traindevelopmenttesttrndevtst)filedatabaseglobalProtocolFilec                   @   s~   e Zd ZdZddeed f defddZdd Zd	d
 Zdd Z	dd Z
dd Zdd Zdd Zdd Zded  fddZdS )r   a  Protocol file with lazy preprocessors

    This is a dict-like data structure where some values may depend on other
    values, and are only computed if/when requested. Once computed, they are
    cached and never recomputed again.

    Parameters
    ----------
    precomputed : dict
        Regular dictionary with precomputed values
    lazy : dict, optional
        Dictionary describing how lazy value needs to be computed.
        Values are callable expecting a dictionary as input and returning the
        computed value.

    Nprecomputedlazyc                 C   s   |d u rt  }t|tr2t|| _t|jt|@ D ]	}|| | j|< qt |j}|| || _n
t || _t || _t	 | _
t | _d S N)dict
isinstancer   abs_storesetr   update	threadingRLocklock_collectionsCounterevaluating_)selfr   r   keycompound_lazy r&   `/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/pyannote/database/protocol/protocol.py__init__I   s   






zProtocolFile.__init__c                 C   s   t | j}|d= |S )Nr   )r   __dict__r#   dr&   r&   r'   __getstate__o   s   
zProtocolFile.__getstate__c                 C   s   | j | t | _d S r   )r)   r   r   r   r   r*   r&   r&   r'   __setstate__u   s   zProtocolFile.__setstate__c                 C   s4   | j  t| jW  d    S 1 sw   Y  d S r   )r   r   r   r#   r&   r&   r'   __abs__y   s   $zProtocolFile.__abs__c                 C   s   | j N || jv rE| j| dkrE| j|g | j| | }| j|= || jv r9|| j| kr9d}t|j|d || j|< | j|g | j| W  d    S 1 sTw   Y  d S )Nr   zEExisting precomputed key "{key}" has been modified by a preprocessor.)r$   )	r   r   r"   r   r   warningswarnformatsubtract)r#   r$   valuemsgr&   r&   r'   __getitem__}   s   
$zProtocolFile.__getitem__c                 C   sH   | j  || jv r| j|= || j|< W d    d S 1 sw   Y  d S r   r   r   r   )r#   r$   r4   r&   r&   r'   __setitem__   s
   
"zProtocolFile.__setitem__c                 C   sF   | j  || jv r| j|= | j|= W d    d S 1 sw   Y  d S r   r7   )r#   r$   r&   r&   r'   __delitem__   s
   

"zProtocolFile.__delitem__c                 c   sn    | j ) t| j}|D ]}|V  qt| j}|D ]}|| jv r!q|V  qW d    d S 1 s0w   Y  d S r   )r   listr   r   )r#   
store_keysr$   	lazy_keysr&   r&   r'   __iter__   s   


"zProtocolFile.__iter__c                 C   sB   | j  tt| jt| jB W  d    S 1 sw   Y  d S r   )r   lenr   r   r   r.   r&   r&   r'   __len__   s   $zProtocolFile.__len__returnc           
      c   s    | d }t |ts| V  dS t|}d|i}t|  D ]2\}}|dkr&qt |ts3t|||< qt||krKd| d| dt| d}t||||< qt| }t	|
  D ]}tt	||}	t|	| jV  q\dS )a>  Iterate over all files

        When `current_file` refers to only one file,
            yield it and return.
        When `current_file` refers to a list of file (i.e. 'uri' is a list),
            yield each file separately.

        Examples
        --------
        >>> current_file = ProtocolFile({
        ...     'uri': 'my_uri',
        ...     'database': 'my_database'})
        >>> for file in current_file.files():
        ...     print(file['uri'], file['database'])
        my_uri my_database

        >>> current_file = {
        ...     'uri': ['my_uri1', 'my_uri2', 'my_uri3'],
        ...     'database': 'my_database'}
        >>> for file in current_file.files():
        ...     print(file['uri'], file['database'])
        my_uri1 my_database
        my_uri2 my_database
        my_uri3 my_database

        uriNz#Mismatch between number of "uris" (z) and number of "z" (z).)r   r:   r>   r   items	itertoolsrepeat
ValueErrorkeyszipvaluesr   r   r   )
r#   urisn_urisr   r$   r4   r5   rF   rH   precomputed_oner&   r&   r'   files   s4   


zProtocolFile.filesr   )__name__
__module____qualname____doc__r   r   r(   r,   r-   r/   r6   r8   r9   r=   r?   r   rL   r&   r&   r&   r'   r   7   s    &c                       s   e Zd ZdZddee f fddZdeee	f de	fdd	Z
d
d Zdeeee	f  fddZdeeee	f  fddZdeeee	f  fddZdedee	 fddZdee	 fddZdee	 fddZdee	 fddZdee	 fddZ  ZS )Protocola	  Experimental protocol

    An experimental protocol usually defines three subsets: a training subset,
    a development subset, and a test subset.

    An experimental protocol can be defined programmatically by creating a
    class that inherits from Protocol and implements at least
    one of `train_iter`, `development_iter` and `test_iter` methods:

        >>> class MyProtocol(Protocol):
        ...     def train_iter(self) -> Iterator[Dict]:
        ...         yield {"uri": "filename1", "any_other_key": "..."}
        ...         yield {"uri": "filename2", "any_other_key": "..."}

    `{subset}_iter` should return an iterator of dictionnaries with
        - "uri" key (mandatory) that provides a unique file identifier (usually
          the filename),
        - any other key that the protocol may provide.

    It can then be used in Python like this:

        >>> protocol = MyProtocol()
        >>> for file in protocol.train():
        ...    print(file["uri"])
        filename1
        filename2

    An experimental protocol can also be defined using `pyannote.database`
    configuration file, whose (configurable) path defaults to "~/database.yml".

    ~~~ Content of ~/database.yml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Protocols:
      MyDatabase:
        Protocol:
          MyProtocol:
            train:
                uri: /path/to/collection.lst
                any_other_key: ... # see custom loader documentation
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    where "/path/to/collection.lst" contains the list of identifiers of the
    files in the collection:

    ~~~ Content of "/path/to/collection.lst ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    filename1
    filename2
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    It can then be used in Python like this:

        >>> from pyannote.database import registry
        >>> protocol = registry.get_protocol('MyDatabase.Protocol.MyProtocol')
        >>> for file in protocol.train():
        ...    print(file["uri"])
        filename1
        filename2

    This class is usually inherited from, but can be used directly.

    Parameters
    ----------
    preprocessors : dict
        Preprocess protocol files so that `file[key] = preprocessors[key](file)`
        for each key in `preprocessors`. In case `preprocessors[key]` is not
        callable, it should be a string containing placeholders for `file` keys
        (e.g. {'audio': '/path/to/{uri}.wav'})
    Npreprocessorsc                    s   t    |d u rt }t | _| D ],\}}t|r"|| j|< qt|tr7t|  fdd}|| j|< qd| d}t|d S )Nc                    s    j di | S )Nr&   )r2   )current_filepreprocessor_copyr&   r'   funcK  s   zProtocol.__init__.<locals>.func"z2" preprocessor is neither a callable nor a string.)	superr(   r   rR   rB   callabler   strrE   )r#   rR   r$   preprocessorrV   r5   	__class__rT   r'   r(   :  s   

zProtocol.__init__rS   r@   c                 C   s   t || jdS )N)r   )r   rR   )r#   rS   r&   r&   r'   
preprocessT  s   zProtocol.preprocessc                 C   s   | j S r   )rP   r.   r&   r&   r'   __str__W  s   zProtocol.__str__c                 C      t  )z)Iterate over files in the training subsetNotImplementedErrorr.   r&   r&   r'   
train_iterZ     zProtocol.train_iterc                 C   r`   )z,Iterate over files in the development subsetra   r.   r&   r&   r'   development_iter^  rd   zProtocol.development_iterc                 C   r`   )z%Iterate over files in the test subsetra   r.   r&   r&   r'   	test_iterb  rd   zProtocol.test_itersubsetc                 c   s    zt | | d }W n, ttfy8   t| }zt | | d }W n ty5   d| d}t|w Y nw |D ]}| |V  q;d S )N_iterzProtocol does not implement a z subset.)getattrAttributeErrorrb   LEGACY_SUBSET_MAPPINGr^   )r#   rg   rL   subset_legacyr5   r   r&   r&   r'   subset_helperf  s    zProtocol.subset_helperc                 C   
   |  dS )Nr
   rm   r.   r&   r&   r'   r
   y     
zProtocol.trainc                 C   rn   )Nr   ro   r.   r&   r&   r'   r   |  rp   zProtocol.developmentc                 C   rn   )Nr   ro   r.   r&   r&   r'   r     rp   zProtocol.testc                 #   s    ddl m} t }dD ]2 t sq fdd}| D ]}d|vr%q| D ]}||}||v r4q)|V  || q)qqdS )z$Iterate over all files in `protocol`r   )get_unique_identifier)	r   development_enrolmentdevelopment_trialr   test_enrolment
test_trialr
   train_enrolmenttrain_trialc               	   3   s:    zt   D ]} | V  qW d S  ttfy   Y d S w r   )ri   rj   rb   )r   methodr#   r&   r'   iterate  s   zProtocol.files.<locals>.iteraterA   N)pyannote.database.utilrq   r   hasattrrL   add)r#   rq   yielded_urisrz   rS   current_file_rA   r&   rx   r'   rL     s&   

zProtocol.filesr   )rM   rN   rO   rP   r   Preprocessorsr(   r   r   r   r^   r_   r   rc   re   rf   Subsetrm   r
   r   r   rL   __classcell__r&   r&   r\   r'   rQ      s    DrQ   )rP   r0   r    r   rC   typingr   r   r   r   r   r   r   r	   ImportErrortyping_extensionsr   rk   ScopePreprocessorr   abcMutableMappingr   rQ   r&   r&   r&   r'   <module>   s&   $ ?