o
    pi'G                     @   s   d Z ddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
m
Z
 ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ G dd dZdd ZdS )ah  
Pipeline

Usage:
  pyannote-pipeline train [options] [(--forever | --iterations=<iterations>)] <experiment_dir> <database.task.protocol>
  pyannote-pipeline best [options] <experiment_dir> <database.task.protocol>
  pyannote-pipeline apply [options] <train_dir> <database.task.protocol>
  pyannote-pipeline -h | --help
  pyannote-pipeline --version

Common options:
  <database.task.protocol>   Experimental protocol (e.g. "Etape.SpeakerDiarization.TV")
  --registry=<db.yml>        Path to, comma-separated, database configuration files.
                             [default: ~/.pyannote/db.yml]
  --subset=<subset>          Set subset. Defaults to 'development' in "train"
                             mode, and to 'test' in "apply" mode.

"train" mode:
  <experiment_dir>           Set experiment root directory. This script expects
                             a configuration file called "config.yml" to live
                             in this directory. See "Configuration file"
                             section below for more details.
  --iterations=<iterations>  Number of iterations. [default: 1]
  --forever                  Iterate forever.
  --sampler=<sampler>        Choose sampler between RandomSampler or TPESampler
                             [default: TPESampler].
  --pruner=<pruner>          Choose pruner between MedianPruner or
                             SuccessiveHalvingPruner. Defaults to no pruning.
  --pretrained=<train_dir>   Use parameters in existing training directory to
                             bootstrap the optimization process. In practice,
                             this will simply run a first trial with this set
                             of parameters.
  --average-case             Optimize for average case instead of worst case.

"apply" mode:
  <train_dir>                Path to the directory containing trained hyper-
                             parameters (i.e. the output of "train" mode).

  --use-filter               Apply pipeline only to files that pass the filter.

Configuration file:
    The configuration of each experiment is described in a file called
    <experiment_dir>/config.yml that describes the pipeline.

    ................... <experiment_dir>/config.yml ...................
    pipeline:
       name: Yin2018
       params:
          sad: tutorials/pipeline/sad
          scd: tutorials/pipeline/scd
          emb: tutorials/pipeline/emb
          metric: angular

    # preprocessors can be used to automatically add keys into
    # each (dict) file obtained from pyannote.database protocols.
    preprocessors:
       audio: ~/.pyannote/db.yml   # load template from YAML file
       video: ~/videos/{uri}.mp4   # define template directly

    # filters can be used to filter out some files from the protocol
    # (e.g. to only keep files with a specific number of speakers)
    filters:
        pyannote.audio.utils.protocol.FilterByNumberOfSpeakers:
            num_speakers: 2

    # one can freeze some hyper-parameters if needed (e.g. when
    # only part of the pipeline needs to be updated)
    freeze:
       speech_turn_segmentation:
          speech_activity_detection:
              onset: 0.5
              offset: 0.5

    # pyannote.audio pipelines will run on CPU by default.
    # use `device` key to send it to GPU.
    device: cuda
    ...................................................................

"train" mode:
    Tune the pipeline hyper-parameters
        <experiment_dir>/<database.task.protocol>.<subset>.yml

"best" mode:
    Display current best loss and corresponding hyper-paramters.

"apply" mode
    Apply the pipeline (with best set of hyper-parameters)

    N)Optional)Path)docopt)tqdm)datetime)
FileFinder)registry)get_annotated)get_class_by_name   )	Optimizerc                       s   e Zd ZdZdZdZdZed!dede	dd fd	d
Z
d!dede	f fddZ						d"dedee dee dedee dee de	fddZd#dedefddZ		d$dededee de	fdd Z  ZS )%
ExperimentzPipeline experiment

    Parameters
    ----------
    experiment_dir : `Path`
        Experiment root directory.
    training : `bool`, optional
        Switch to training mode
    z{experiment_dir}/config.ymlz*{experiment_dir}/train/{protocol}.{subset}z{train_dir}/apply/{date}F	train_dirtrainingreturnc                 C   sB   |j d }| ||d}|d }ttj||_|j| |S )a6  Load pipeline from train directory

        Parameters
        ----------
        train_dir : `Path`
            Path to train directory
        training : `bool`, optional
            Switch to training mode.

        Returns
        -------
        xp : `Experiment`
            Pipeline experiment.
        r   r   
params.yml)	parentsr   fromtimestampospathgetmtimemtime_	pipeline_load_params)clsr   r   experiment_dirxp
params_yml r   P/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/pipeline/experiment.pyfrom_train_dir   s   
zExperiment.from_train_dirr   c                    s  t    || _| jj| jd}t|d}tj|tjd| _	W d    n1 s)w   Y  i }| j	
di  D ]@\}}t|trXt|d dd}|di |
di ||< q9z	t|d	||< W q9 tyy }	 z|}
|
||< W Y d }	~	q9d }	~	ww || _g  | j	
d
i  D ]\}}t|} |di | qdtf fdd}|| _| j	d d }t|dd}|di | j	d 
di | _d| j	v r| j	d }| j| d| j	v rdd l}|| j	d }| j| d S d S )N)r   rLoaderpreprocessorsnamezpyannote.pipeline)default_module_nameparams)database_ymlfiltersr   c                    s   t  fddD S )Nc                 3   s    | ]}| V  qd S )Nr   ).0fir   r    	<genexpr>   s    z;Experiment.__init__.<locals>.all_filters.<locals>.<genexpr>)allr-   r*   r-   r    all_filters   s   z(Experiment.__init__.<locals>.all_filterspipelinezpyannote.pipeline.blocksfreezedevicer   r   )super__init__r   
CONFIG_YMLformatopenyamlload
SafeLoaderconfig_getitems
isinstancedictr
   r   FileNotFoundErrorpreprocessors_appendboolfilters_r   r4   torchr5   to)selfr   r   
config_ymlfpr%   keypreprocessorKlassetemplater(   r2   pipeline_namerH   r5   	__class__r1   r    r7      sR   




zExperiment.__init__developmentNr   protocol_namesubset
pretrainedn_iterationssamplerpruneraverage_casec              
   C   s  t | jj| j||d}|jddd tj|| jd}	d}
t| j	|d |
|||d}| j	
 dkr3d	nd
}|d }tdddd}|d |d |rs|d }t|dd}tj|tjd}W d   n1 siw   Y  |d }nd}tt| jt|	| }|j||dd}z|j}W n ty } z|tj }W Y d}~nd}~ww |dk rt nt|}t||D ]3\}}|d }|| || k r|d }|}| j	j|||d dd| dd}|j|d |d	 qdS )ay  Train pipeline

        Parameters
        ----------
        protocol_name : `str`
            Name of pyannote.database protocol to use.
        subset : `str`, optional
            Use this subset for training. Defaults to 'development'.
        pretrained : Path, optional
            Use parameters in "pretrained" training directory to bootstrap the
            optimization process. In practice this will simply run a first trial
            with this set of parameters.
        n_iterations : `int`, optional
            Number of iterations. Defaults to 1.
        sampler : `str`, optional
            Choose sampler between RandomSampler and TPESampler
        pruner : `str`, optional
            Choose between MedianPruner or SuccessiveHalvingPruner.
        average_case : `bool`, optional
            Optimise for average case. Defaults to False (i.e. worst case).
        r   protocolrW   Tr   exist_okr%   defaulttrials.journal)db
study_namerZ   r[   r\   minimizer   r   trialr   )unitpositionleavezFirst trial in progressr"   moder#   Nr(   )
warm_startshow_progressloss)r(   rp   zBest trial: d   g%)desc) r   	TRAIN_DIRr9   r   mkdirr   get_protocolrD   r   r   get_directionr   set_descriptionupdater:   r;   r<   r=   listfilterrG   getattr	tune_iter	best_loss
ValueErrornpinf	itertoolscountrangezipdump_params)rJ   rV   rW   rX   rY   rZ   r[   r\   r   r^   re   	optimizer	directionr   progress_barpre_params_ymlrL   
pre_paramsrn   inputs
iterationsr   rP   r   r.   statusrp   best_paramsrt   r   r   r    train   sn   	



zExperiment.trainc           
   
   C   s   t | jj| j||d}d}t| j|d |d}z|j}W n ty4 } ztd W Y d}~dS d}~ww |j	}tdd| d	d
 t
j|dd}	t|	 dS )a  Print current best pipeline

        Parameters
        ----------
        protocol_name : `str`
            Name of pyannote.database protocol used for training.
        subset : `str`, optional
            Subset used for training. Defaults to 'development'.
        r]   rb   rc   )rd   re   z4Still waiting for at least one iteration to succeed.NzLoss = rq   rr   z&% with the following hyper-parameters:F)default_flow_style)r   ru   r9   r   r   r   r   r   printr   r;   dump)
rJ   rV   rW   r   re   r   r   rP   r   contentr   r   r    bestX  s,   
zExperiment.besttest
output_dir
use_filterc              
   C   s  t j|| jd}z| j }W n ty# } zd}W Y d}~nd}~ww |jddd |r<|| d| d| jj  }n|| d| d| jj  }t|ddS}	t	t
|| }
|rat| j|
}
d	| d
| d}t|
|ddD ]*}| |}| j|	| |dd}|du rd}|du rqqt|}||||d}qqW d   n1 sw   Y  |jd }| r|  || |du rd| d}t| dS |r|| d| d }n
|| d| d }t|d}	|	t| W d   dS 1 sw   Y  dS )zApply current best pipeline

        Parameters
        ----------
        protocol_name : `str`
            Name of pyannote.database protocol to process.
        subset : `str`, optional
            Subset to process. Defaults to 'test'
        ra   NTr_   .z_INCOMPLETE.wrl   zProcessing z ()file)iterablert   ri   
annotation)uemlatestzWFor some (possibly good) reason, the output of this pipeline could not be evaluated on z_INCOMPLETE.evalz.eval)r   rw   rD   r   
get_metricNotImplementedErrorrv   write_formatr:   r{   r}   r|   rG   r   writer?   r	   parentexistsunlink
symlink_tor   str)rJ   rV   r   rW   r   r^   metricrP   
output_extrL   filesrt   current_fileoutput	referencer   _r   msgoutput_evalr   r   r    apply}  sf   


"zExperiment.apply)F)rU   Nr   NNF)rU   )r   F)__name__
__module____qualname____doc__r8   ru   	APPLY_DIRclassmethodr   rF   r!   r7   r   r   intr   r   r   __classcell__r   r   rS   r    r      sR    
H
b)r   c               	   C   s  t tdd} | d dD ]}t| q| d }| d }| d rp|d u r'd}| d	 r.d
}nt| d }| d }| d }| d }|rLt| jdd}| d }t| d }	|	 jdd}	t	|	dd}
|
j
|||||||d | d r|d u rzd}t| d }	|	 jdd}	t	|	dd}
|
j||d | d r|d u rd}| d }t| d }| jdd}t	j|dd}
t|
jj||
jdd}|
j||||d d S d S )NzTunable pipelines)versionz
--registry,z<database.task.protocol>z--subsetr   rU   z	--foreverrg   z--iterationsz	--samplerz--prunerz--pretrainedT)strictz--average-casez<experiment_dir>r   )rW   rY   rX   rZ   r[   r\   r   F)rW   r   r   z--use-filterz<train_dir>z%Y%m%d-%H%M%S)r   date)rW   r   )r   r   splitr   load_databaser   r   
expanduserresolver   r   r   r!   r   r9   r   strftimer   )	argumentsr)   rV   rW   r   rZ   r[   rX   r\   r   
experimentr   r   r   r   r   r    main  sh   

r   )r   r   os.pathr;   numpyr   typingr   pathlibr   r   r   r   r   pyannote.databaser   r   r	   pyannote.core.utils.helperr
   r   r   r   r   r   r   r   r    <module>   s(   Z  J