o
    piz=                     @   s  d Z ddlmZ ddlZddlmZ ddlmZ ddl	Z	ddl
mZ ddlmZmZmZmZmZ ddlZdd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ dd eddD Z dededeegef fddZ!dd Z"dededefddZ#dededed ed!edefd"d#Z$d$edede%fd%d&Z&						d6d'ed(ed)ed*ed$edefd+d,Z'						d6d'ed(ed)ed*ed$edefd-d.Z(d/d0 Z)d'ed(ed)efd1d2Z*d'ed(ed)ed3ededee+df fd4d5Z,dS )7zCustom protocols

Protocols:
  MyDatabase:
    Collection:
      MyProtocol:
        train:
          uris: xxx.lst
          annotation: xxx.rttm
          annotated: xxx.uem
    )PathN   )protocolProtocolFile)Number)TextDictCallableAnyUnion)Subset)SegmentationProtocol)SpeakerDiarizationProtocol)entry_points)get_annotated)load_lst
load_trialc                 C   s   i | ]}|j |qS  )name).0epr   r   L/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/database/custom.py
<dictcomp>F   s    r   zpyannote.database.loader)grouptemplatedatabase_ymlreturnc                    sP   t }|jtvrd|j d}t|t|j   dtf fdd}|S )af  Get data loader based on template

    Parameters
    ----------
    template : str
        Path format template (e.g. "/path/to/{uri}.csv").
        Extension (here ".csv") determined which data loader to use.
    database_yml : Path
        Path to YAML configuration file, to which `template` is relative.
        Defaults to assume that `template` is absolute or relative to 
        current working directory.

    Returns
    -------
    data_loader : Callable[[ProtocolFile], Any]
        Callable that takes a ProtocolFile and returns some data.

    See also
    --------
    pyannote.database.loader
    zNo loader for files with '' suffixcurrent_filec                    sP   t tjdi t| }| s d| d d}t| |}|| S )NNo such file or directory: 'z' (via 'z' template).r   )resolve_pathr   formatabsis_fileFileNotFoundError)r   pathmsgloaderLoaderr   r   r   r   loadj   s   zTemplate.<locals>.load)r   suffixLOADERS
ValueErrorr+   r   )r   r   r&   r'   r+   r   r)   r   TemplateL   s   
r/   c                    s   dt f fdd}|S )Nr   c                    s    S Nr   )r   valuer   r   r+   y   s   zNumericValue.<locals>.loadr   )r2   r+   r   r1   r   NumericValuex   s   r3   r&   c                 C   s>   |   } |  r
| S |j|  }| r|S d|  d}t|)aV  Resolve path

    Parameters
    ----------
    path : `Path`
        Path. Can be either absolute, relative to current working directory, 
        or relative to `database_yml` parent directory.
    database_yml : `Path`
        Path to YAML configuration file. 

    Returns
    -------
    resolved_path: `Path`
        Resolved path.
    zCould not find file "z".)
expanduserr$   parentr%   )r&   r   relative_pathr'   r   r   r   r!   ~   s   
r!   meta_database	meta_taskmeta_protocolmeta_subsetsubset_entriesc                 c   sZ    ddl m} | D ]\}}||}	|D ]}
|
 d}t|	| D ]}|V  q#qqdS )a8  Meta-protocol method that iterates over a subset

    Parameters
    ----------
    meta_database : str
        "X"
    meta_task : str
        Task name (e.g. SpeakerDiarization, SpeakerVerification)
    meta_protocol : str
        Protocol name (e.g. MyProtocol)
    meta_subset : {"train", "development", "test"}
        Subset
    subset_entries : dict
        Subset entries.
            Etape.SpeakerDiarization.TV: [train]
            REPERE.SpeakerDiarization.Phase1: [train, development]
            REPERE.SpeakerDiarization.Phase2: [train, development]
    r   )registry_iterN) r<   itemsget_protocolgetattr)r7   r8   r9   r:   r;   r   r<   r   subsetspartial_protocolsubsetmethod_namefiler   r   r   meta_subset_iter   s   

rG   entriesc                 C   s  t  }|  D ]\}}|dks|dkrqt|tr t|||< qtt | \}}}}t	t
|t
dg dk}|rV|drNd}t| |dd }t||||< qtt||}	|	 skd|	 d	}t||	jtvr{d
|	j d}t|t|	j  }
|
|	||< q|S )a  Loads all Loaders for data type specified in 'entries' into a dict.

    Parameters
    ----------
    entries : Dict, optional
        Subset entries (eg 'uri', 'annotated', 'annotation', ...)
    database_yml : Path, optional
        Path to the 'database.yml' file

    Returns
    -------
    dict
        A dictionary mapping each key of entry (except 'uri' and 'trial')
        to a function that given a ProtocolFile returns the data type
        related to this entry.
    uritrialNr   _zSince version 4.1, pyannote.database is smart enough to know when paths defined in 'database.yml' contains placeholders. Remove the underscore (_) prefix to get rid of this warning.r   r    'zNo loader for file with 'r   )dictr?   
isinstancer   r3   zipstring	Formatterparselenset
startswithwarningswarnr/   r!   r   r$   r%   r,   r-   	TypeErrorr+   )rH   r   lazy_loaderkeyr2   rK   placeholdersis_templater'   r&   r*   r   r   r   gather_loaders   s4   




r]   databasetaskr   rD   c                 k   s    d|v r
|d }n3d|v r+|d }d| d| d| d| d| d}	t |	t nd| d| d| d| }	t|	ttt||}
t||d}|
D ]}t|||d	||d
V  qNdS )a  

    Parameters
    ----------
    database : str
        Database name (e.g. MyDatabase)
    task : str
        Task name (e.g. SpeakerDiarization, SpeakerVerification)
    protocol : str
        Protocol name (e.g. MyProtocol)
    subset : {"train", "development", "test"}
        Subset
    entries : dict
        Subset entries.
    database_yml : `Path`
        Path to the 'database.yml' file
    metadata : dict
        Additional metadata to be added to each ProtocolFile (such
        as "scope" or "classes")
    rI   urisz!Found deprecated 'uris' entry in .z+. Please use 'uri' (singular) instead, in 'z'.z!Missing mandatory 'uri' entry in rH   r   rI   r^   rD   lazyN)	rV   rW   DeprecationWarningr.   r   r!   r   r]   r   )selfr^   r_   r   rD   rH   r   metadatarI   r'   r`   rY   r   r   r   subset_iter  s&   

ri   c                 c   s    t ||d}t|d< t }ttt|d |D ]>}	|	d |	d }
}|
|vr7| t|
||d|d||
< ||vrJ| t|||d|d||< |	d ||
 || d	V  qd
S )a  

    Parameters
    ----------
    database : str
        Database name (e.g. MyDatabase)
    task : str
        Task name (e.g. SpeakerDiarization, SpeakerVerification)
    protocol : str
        Protocol name (e.g. MyProtocol)
    subset : {"train", "development", "test"}
        Subset
    entries : dict
        Subset entries.
    database_yml : `Path`
        Path to the 'database.yml' file
    rb   try_withrJ   uri1uri2rc   rd   	reference)rm   file1file2N)r]   r   rM   r   r!   r   
preprocessr   )rg   r^   r_   r   rD   rH   r   rY   filesrJ   rk   rl   r   r   r   subset_trialI  s2   


rr   c                    s    fdd}|S )Nc                    s(   t | j|    D ]}| j|  q
d S r0   )super	__class____init__register_protocol)rg   r   	protocolsr   r   init  s   zget_init.<locals>.initr   )rx   ry   r   rw   r   get_init  s   rz   c                 C   s   |  d| d| S )N__r   )r^   r_   r   r   r   r   get_custom_protocol_class_name  s   r|   protocol_entriesc                 C   s  zt t|dkr	dn| d}W n ty-   d|  d| d| d| d	}t| Y dS w |dkr6d	|i}t }t|trId
|v rI|d
|d
< t|trx|dd}|du rt| dkrtd|  d| d| d| d	}t| d|d< n||d< t }	|	 D ]c\}
}|
dvrd|  d| d| d|
 d| d|
 d}t
| q|
 d}| dkrtt| |||
|||	|< qtjtf| |||
||d||	|< d| v rtjt| |||
||d|	|
 d< qt| ||}t||f|	}|t |< |S )zCreate new protocol class

    Parameters
    ----------
    database : str
    task : str
    protocol : str
    protocol_entries : dict

    Returns
    -------
    CustomProtocol : type or None

    Protocolz
Ignoring 'ra   z' protocols found in z
 because 'z' tasks are not supported yet.N
Collectionrq   classesscopeXrL   z' found in za does not define the 'scope' of speaker labels (file, database, or global). Setting it to 'file'.rF   )rq   traindevelopmenttesttrain_trialdevelopment_trial
test_trialz ' entries are not supported yet.r=   )r^   r_   r   rD   rH   r   rJ   _trial)rA   protocol_moduleAttributeErrorprintrM   
issubclassr   popr   r?   rV   rW   	functoolspartialrG   partialmethodri   keysrr   r|   typeglobals)r^   r_   r   r}   r   
base_classr'   rh   r   methodsrD   r;   rE   custom_protocol_class_nameCustomProtocolClassr   r   r   create_protocol  s   


 





r   )NNNNNN)-__doc__pathlibr   rP   r>   r   r   #pyannote.database.protocol.protocolr   rV   numbersr   typingr   r	   r
   r   r   r   protocol.protocolr   protocol.segmentationr   protocol.speaker_diarizationr   importlib.metadatar   utilr   r(   r   r   r-   r/   r3   r!   rG   rM   r]   ri   rr   rz   r|   r   r   r   r   r   r   <module>   s     ,
&
Q
9
;	
