o
    8wiZB                     @   s   d Z ddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
m
Z
 ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ G dd dZdd ZdS )a+  
Pipeline

Usage:
  pyannote-pipeline train [options] [(--forever | --iterations=<iterations>)] <experiment_dir> <database.task.protocol>
  pyannote-pipeline best [options] <experiment_dir> <database.task.protocol>
  pyannote-pipeline apply [options] <train_dir> <database.task.protocol>
  pyannote-pipeline -h | --help
  pyannote-pipeline --version

Common options:
  <database.task.protocol>   Experimental protocol (e.g. "Etape.SpeakerDiarization.TV")
  --registry=<db.yml>        Path to, comma-separated, database configuration files.
                             [default: ~/.pyannote/db.yml]
  --subset=<subset>          Set subset. Defaults to 'development' in "train"
                             mode, and to 'test' in "apply" mode.
  
"train" mode:
  <experiment_dir>           Set experiment root directory. This script expects
                             a configuration file called "config.yml" to live
                             in this directory. See "Configuration file"
                             section below for more details.
  --iterations=<iterations>  Number of iterations. [default: 1]
  --forever                  Iterate forever.
  --sampler=<sampler>        Choose sampler between RandomSampler or TPESampler
                             [default: TPESampler].
  --pruner=<pruner>          Choose pruner between MedianPruner or
                             SuccessiveHalvingPruner. Defaults to no pruning.
  --pretrained=<train_dir>   Use parameters in existing training directory to
                             bootstrap the optimization process. In practice,
                             this will simply run a first trial with this set
                             of parameters.
  --average-case             Optimize for average case instead of worst case.

"apply" mode:
  <train_dir>                Path to the directory containing trained hyper-
                             parameters (i.e. the output of "train" mode).

Configuration file:
    The configuration of each experiment is described in a file called
    <experiment_dir>/config.yml that describes the pipeline.

    ................... <experiment_dir>/config.yml ...................
    pipeline:
       name: Yin2018
       params:
          sad: tutorials/pipeline/sad
          scd: tutorials/pipeline/scd
          emb: tutorials/pipeline/emb
          metric: angular

    # preprocessors can be used to automatically add keys into
    # each (dict) file obtained from pyannote.database protocols.
    preprocessors:
       audio: ~/.pyannote/db.yml   # load template from YAML file
       video: ~/videos/{uri}.mp4   # define template directly

    # one can freeze some hyper-parameters if needed (e.g. when
    # only part of the pipeline needs to be updated)
    freeze:
       speech_turn_segmentation:
          speech_activity_detection:
              onset: 0.5
              offset: 0.5
    
    # pyannote.audio pipelines will run on CPU by default.
    # use `device` key to send it to GPU.
    device: cuda
    ...................................................................

"train" mode:
    Tune the pipeline hyper-parameters
        <experiment_dir>/<database.task.protocol>.<subset>.yml

"best" mode:
    Display current best loss and corresponding hyper-paramters.

"apply" mode
    Apply the pipeline (with best set of hyper-parameters)

    N)Optional)Path)docopt)tqdm)datetime)
FileFinder)registry)get_annotated)get_class_by_name   )	Optimizerc                       s   e Zd ZdZdZdZdZed dede	dd fd	d
Z
d dede	f fddZ						d!dedee dee dedee dee de	fddZd"dedefddZ	d#dededee fddZ  ZS )$
ExperimentzPipeline experiment

    Parameters
    ----------
    experiment_dir : `Path`
        Experiment root directory.
    training : `bool`, optional
        Switch to training mode
    z{experiment_dir}/config.ymlz*{experiment_dir}/train/{protocol}.{subset}z{train_dir}/apply/{date}F	train_dirtrainingreturnc                 C   sB   |j d }| ||d}|d }ttj||_|j| |S )a6  Load pipeline from train directory

        Parameters
        ----------
        train_dir : `Path`
            Path to train directory
        training : `bool`, optional
            Switch to training mode.

        Returns
        -------
        xp : `Experiment`
            Pipeline experiment.
        r   r   
params.yml)	parentsr   fromtimestampospathgetmtimemtime_	pipeline_load_params)clsr   r   experiment_dirxp
params_yml r   Y/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/pyannote/pipeline/experiment.pyfrom_train_dir   s   
zExperiment.from_train_dirr   c                    s  t    || _| jj| jd}t|d}tj|tjd| _	W d    n1 s)w   Y  i }| j	
di  D ]@\}}t|trXt|d dd}|di |
di ||< q9z	t|d	||< W q9 tyy }	 z|}
|
||< W Y d }	~	q9d }	~	ww || _| j	d
 d }t|dd}|di | j	d
 
di | _d| j	v r| j	d }| j| d| j	v rdd l}|| j	d }| j| d S d S )N)r   rLoaderpreprocessorsnamezpyannote.pipeline)default_module_nameparams)database_ymlpipelinezpyannote.pipeline.blocksfreezedevicer   r   )super__init__r   
CONFIG_YMLformatopenyamlload
SafeLoaderconfig_getitems
isinstancedictr
   r   FileNotFoundErrorpreprocessors_r   r+   torchr,   to)selfr   r   
config_ymlfpr%   keypreprocessorKlassetemplatepipeline_namer(   r<   r,   	__class__r   r    r.      sF   




zExperiment.__init__developmentNr   protocol_namesubset
pretrainedn_iterationssamplerpruneraverage_casec              
   C   s  t | jj| j||d}|jddd tj|| jd}	d}
t| j	|d |
|||d}| j	
 dkr3d	nd
}|d }tdddd}|d |d |rs|d }t|dd}tj|tjd}W d   n1 siw   Y  |d }nd}tt|	| }|j||dd}z|j}W n ty } z|tj }W Y d}~nd}~ww |dk rt nt|}t||D ]3\}}|d }|| || k r|d }|}| j	j|||d dd| dd}|j|d |d	 qdS )ay  Train pipeline

        Parameters
        ----------
        protocol_name : `str`
            Name of pyannote.database protocol to use.
        subset : `str`, optional
            Use this subset for training. Defaults to 'development'.
        pretrained : Path, optional
            Use parameters in "pretrained" training directory to bootstrap the
            optimization process. In practice this will simply run a first trial
            with this set of parameters.
        n_iterations : `int`, optional
            Number of iterations. Defaults to 1.
        sampler : `str`, optional
            Choose sampler between RandomSampler and TPESampler
        pruner : `str`, optional
            Choose between MedianPruner or SuccessiveHalvingPruner.
        average_case : `bool`, optional
            Optimise for average case. Defaults to False (i.e. worst case).
        r   protocolrK   Tr   exist_okr%   defaulttrials.journal)db
study_namerN   rO   rP   minimizer   r   trialr   )unitpositionleavezFirst trial in progressr"   moder#   Nr(   )
warm_startshow_progressloss)r(   rd   zBest trial: d   g%)desc)r   	TRAIN_DIRr0   r   mkdirr   get_protocolr;   r   r   get_directionr   set_descriptionupdater1   r2   r3   r4   listgetattr	tune_iter	best_loss
ValueErrornpinf	itertoolscountrangezipdump_params)r>   rJ   rK   rL   rM   rN   rO   rP   r   rR   rY   	optimizer	directionr   progress_barpre_params_ymlr@   
pre_paramsrb   inputs
iterationsrr   rD   rw   istatusrd   best_paramsrh   r   r   r    train   sn   	



zExperiment.trainc           
   
   C   s   t | jj| j||d}d}t| j|d |d}z|j}W n ty4 } ztd W Y d}~dS d}~ww |j	}tdd| d	d
 t
j|dd}	t|	 dS )a  Print current best pipeline

        Parameters
        ----------
        protocol_name : `str`
            Name of pyannote.database protocol used for training.
        subset : `str`, optional
            Subset used for training. Defaults to 'development'.
        rQ   rV   rW   )rX   rY   z4Still waiting for at least one iteration to succeed.NzLoss = re   rf   z&% with the following hyper-parameters:F)default_flow_style)r   ri   r0   r   r   r   rr   rs   printr   r2   dump)
r>   rJ   rK   r   rY   r{   rr   rD   r   contentr   r   r    bestD  s,   
zExperiment.besttest
output_dirc              
   C   s  t j|| jd}z| j }W n ty# } zd}W Y d}~nd}~ww |jddd || d| d| jj  }t|ddK}t	t
|| }	d| d	| d
}
t|	|
ddD ]*}| |}| j|| |dd}|du rrd}|du rwqXt|}||||d}qXW d   n1 sw   Y  |jd }| r|  || |du rd| d}t| dS || d| d }t|d}|t| W d   dS 1 sw   Y  dS )zApply current best pipeline

        Parameters
        ----------
        protocol_name : `str`
            Name of pyannote.database protocol to process.
        subset : `str`, optional
            Subset to process. Defaults to 'test'
        rU   NTrS   .wr`   zProcessing z ()file)iterablerh   r]   
annotation)uemlatestzWFor some (possibly good) reason, the output of this pipeline could not be evaluated on z.eval)r   rk   r;   r   
get_metricNotImplementedErrorrj   write_formatr1   ro   rp   r   writer6   r	   parentexistsunlink
symlink_tor   str)r>   rJ   r   rK   rR   metricrD   
output_extr@   filesrh   current_fileoutput	referencer   _r   msgoutput_evalr   r   r    applyi  sT   


"zExperiment.apply)F)rI   Nr   NNF)rI   )r   )__name__
__module____qualname____doc__r/   ri   	APPLY_DIRclassmethodr   boolr!   r.   r   r   intr   r   r   __classcell__r   r   rG   r    r      sL    
=
a&r   c               	   C   s  t tdd} | d dD ]}t| q| d }| d }| d rp|d u r'd}| d	 r.d
}nt| d }| d }| d }| d }|rLt| jdd}| d }t| d }	|	 jdd}	t	|	dd}
|
j
|||||||d | d r|d u rzd}t| d }	|	 jdd}	t	|	dd}
|
j||d | d r|d u rd}t| d }| jdd}t	j|dd}
t|
jj||
jdd}|
j|||d d S d S )NzTunable pipelines)versionz
--registry,z<database.task.protocol>z--subsetr   rI   z	--foreverr[   z--iterationsz	--samplerz--prunerz--pretrainedT)strictz--average-casez<experiment_dir>r   )rK   rM   rL   rN   rO   rP   r   F)rK   r   r   z<train_dir>z%Y%m%d-%H%M%S)r   date)r   r   splitr   load_databaser   r   
expanduserresolver   r   r   r!   r   r0   r   strftimer   )	argumentsr)   rJ   rK   r   rN   rO   rL   rP   r   
experimentr   r   r   r   r    main  sb   
r   )r   r   os.pathr2   numpyrt   typingr   pathlibr   r   rv   r   r   pyannote.databaser   r   r	   pyannote.core.utils.helperr
   r{   r   r   r   r   r   r   r    <module>   s(   R  -