o
    pi~                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZmZmZmZ d dlZd dlZ d dl!Z"d dl#Z#d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8 e9e-j:Z;e9e,j:Z<G dd deZ=G dd deZ>G dd de?Z@e	G dd dZAG dd de1ZBG dd de0ZCd'd#d$ZDG d%d& d&ejEZFdS )(    )annotationsN)defaultdict)	dataclass)Enum)cached_propertypartial)Number)Path)mkstemp)DictListLiteralOptionalSequenceTextTupleUnion)binary_cross_entropynll_loss)check_protocol)Protocol)ScopeSubset)
DataLoaderDatasetIterableDataset)Identity)BaseWaveformTransform)MetricMetricCollectionc                   @  s    e Zd ZdZdZdZdZdZdS )Problemr               N)__name__
__module____qualname__BINARY_CLASSIFICATIONMONO_LABEL_CLASSIFICATIONMULTI_LABEL_CLASSIFICATIONREPRESENTATION
REGRESSION r-   r-   L/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/audio/core/task.pyr    ;   s    r    c                   @  s   e Zd ZdZdZdS )
Resolutionr!   r"   N)r%   r&   r'   FRAMECHUNKr-   r-   r-   r.   r/   G   s    r/   c                   @  s   e Zd ZdS )UnknownSpecificationsErrorN)r%   r&   r'   r-   r-   r-   r.   r2   L   s    r2   c                   @  s   e Zd ZU ded< ded< ded< dZded	< d
Zded< dZded< dZded< dZded< e	dddZ
e	dddZdd Zdd ZdS ) Specificationsr    problemr/   
resolutionfloatdurationNOptional[float]min_duration)        r:   zOptional[Tuple[float, float]]warm_upzOptional[List[Text]]classesOptional[int]powerset_max_classesFboolpermutation_invariantreturnc                 C  s&   | j d u rdS | jtjkrtddS )NFzQ`powerset_max_classes` only makes sense with multi-class classification problems.T)r>   r4   r    r)   
ValueErrorselfr-   r-   r.   powersetm   s   
zSpecifications.powersetintc                   s&   t t fddtd jd D S )Nc                 3  s$    | ]}t jt j|V  qd S N)scipyspecialbinomlenr<   ).0irC   r-   r.   	<genexpr>   s
    
z6Specifications.num_powerset_classes.<locals>.<genexpr>r   r!   )rF   sumranger>   rC   r-   rC   r.   num_powerset_classesy   s
   z#Specifications.num_powerset_classesc                 C     dS )Nr!   r-   rC   r-   r-   r.   __len__      zSpecifications.__len__c                 c  s    | V  d S rG   r-   rC   r-   r-   r.   __iter__   s   
zSpecifications.__iter__rA   r?   )rA   rF   )r%   r&   r'   __annotations__r9   r;   r<   r>   r@   r   rE   rQ   rS   rU   r-   r-   r-   r.   r3   P   s   
 r3   c                      .   e Zd Zd	 fddZdd Zdd Z  ZS )
TrainDatasettaskTaskc                      t    || _d S rG   super__init__rZ   rD   rZ   	__class__r-   r.   r_         

zTrainDataset.__init__c                 C  
   | j  S rG   )rZ   train__iter__rC   r-   r-   r.   rU         
zTrainDataset.__iter__c                 C  rd   rG   )rZ   train__len__rC   r-   r-   r.   rS      rf   zTrainDataset.__len__rZ   r[   )r%   r&   r'   r_   rU   rS   __classcell__r-   r-   ra   r.   rY          rY   c                      rX   )

ValDatasetrZ   r[   c                   r\   rG   r]   r`   ra   r-   r.   r_      rc   zValDataset.__init__c                 C  s   | j |S rG   )rZ   val__getitem__)rD   idxr-   r-   r.   __getitem__   s   zValDataset.__getitem__c                 C  rd   rG   )rZ   
val__len__rC   r-   r-   r.   rS      rf   zValDataset.__len__rh   )r%   r&   r'   r_   rn   rS   ri   r-   r-   ra   r.   rk      rj   rk   valuerF   rA   strc                   s.   g d} fdd|D }|sdS |d d S )aM  Return the most suitable type for storing the
    value passed in parameter in memory.

    Parameters
    ----------
    value: int
        value whose type is best suited to storage in memory

    Returns
    -------
    str:
        numpy formatted type
        (see https://numpy.org/doc/stable/reference/arrays.dtypes.html)
    ))   b)i   i2)l        rM   c                   s$   g | ]\}}|t  kr||fqS r-   )abs)rL   max_valtyperp   r-   r.   
<listcomp>   s    zget_dtype.<locals>.<listcomp>i8r   r!   r-   )rp   
types_listfiltered_listr-   rx   r.   	get_dtype   s   
r}   c                      s,  e Zd ZdZ									dYdZ fddZdd Zd[d!d"Zd\d#d$Zed]d&d'Z	e	j
d^d*d'Z	ed_d,d-Zej
d`d/d-Zd0d1 Zd2d3 Zd4d5 Zdad7d8Zdbd:d;Z	d\dcd>d?ZdddCdDZdedEdFZdGdH ZdIdJ ZdfdLdMZdedNdOZdgdPdQZedhdSdTZdUdV ZedWdX Z  ZS )ir[   u
  Base task class

    A task is the combination of a "problem" and a "dataset".
    For example, here are a few tasks:
    - voice activity detection on the AMI corpus
    - speaker embedding on the VoxCeleb corpus
    - end-to-end speaker diarization on the VoxConverse corpus

    A task is expected to be solved by a "model" that takes an
    audio chunk as input and returns the solution. Hence, the
    task is in charge of generating (input, expected_output)
    samples used for training the model.

    Parameters
    ----------
    protocol : Protocol
        pyannote.database protocol
    cache : str, optional
        As (meta-)data preparation might take a very long time for large datasets,
        it can be cached to disk for later (and faster!) re-use.
        When `cache` does not exist, `Task.prepare_data()` generates training
        and validation metadata from `protocol` and save them to disk.
        When `cache` exists, `Task.prepare_data()` is skipped and (meta)-data
        are loaded from disk. Defaults to a temporary path.
    duration : float, optional
        Chunks duration in seconds. Defaults to two seconds (2.).
    min_duration : float, optional
        Sample training chunks duration uniformely between `min_duration`
        and `duration`. Defaults to `duration` (i.e. fixed length chunks).
    warm_up : float or (float, float), optional
        Use that many seconds on the left- and rightmost parts of each chunk
        to warm up the model. This is mostly useful for segmentation tasks.
        While the model does process those left- and right-most parts, only
        the remaining central part of each chunk is used for computing the
        loss during training, and for aggregating scores during inference.
        Defaults to 0. (i.e. no warm-up).
    batch_size : int, optional
        Number of training samples per batch. Defaults to 32.
    num_workers : int, optional
        Number of workers used for generating training samples.
        Defaults to multiprocessing.cpu_count() // 2.
    pin_memory : bool, optional
        If True, data loaders will copy tensors into CUDA pinned
        memory before returning them. See pytorch documentation
        for more details. Defaults to False.
    augmentation : BaseWaveformTransform, optional
        torch_audiomentations waveform transform, used by dataloader
        during training.
    metric : optional
        Validation metric(s). Can be anything supported by torchmetrics.MetricCollection.
        Defaults to value returned by `default_metric` method.

    Attributes
    ----------
    specifications : Specifications or tuple of Specifications
        Task specifications (available after `Task.setup` has been called.)

    N       @r:       Fprotocolr   cacheOptional[Union[str, None]]r7   r6   r9   r8   r;   !Union[float, Tuple[float, float]]
batch_sizerF   num_workersr=   
pin_memoryr?   augmentationOptional[BaseWaveformTransform]metric2Union[Metric, Sequence[Metric], Dict[str, Metric]]c                   s  t    t|\| _}|d | _|d | _| jstd|d | _|r(t|n|| _	|| _
|d u r4|n|| _|| _t|trC||f}|| _|d u rPt d }|dkrntjdkrntjd dkrntjd	 d
krntd d}|| _|| _|	pztdd| _|
| _d S )Nhas_validation	has_scopezQProtocol must provide 'scope' information (e.g. 'file', 'database', or 'global').has_classesr"   r   darwinr#   r!      zUnum_workers > 0 is not supported with macOS and Python 3.8+: setting num_workers = 0.dict)output_type)r^   r_   r   r   r   r   rB   r   r	   r   r7   r9   r   
isinstancer   r;   multiprocessing	cpu_countsysplatformversion_infowarningswarnr   r   r   r   _metric)rD   r   r   r7   r9   r;   r   r   r   r   r   checksra   r-   r.   r_      s<   






zTask.__init__c           &   
     s  | j r| j  r| j  jdkrdS | j jjddd ntt d | _ tt	t
d< td< t	 }t	  t	 }t	 }t	 }t	 }t	 }t	 }i }| jrftttd| j ttd	| j }	nttd| j }	d}
d}t|	D ]C\}\}}t }|d
 d
 vrd
 |d
  d
 |d
 |d
< t
||d< t|d |d< t|tg d }|D ]<}|| }t|tr|| vrӈ| | | |||< qt|tr|||< qtjd| dt| dtd q | t	 }|t|d  |
}d}|d D ]!}|j | j k rq||j |j!f}|| ||j 7 }|
d7 }
q|| |||
f |}|d j"ddD ]l\}}}t|d }||vr_|| ||}d }}|dkr|d
 }||vrzg ||< ||| vr|| | || |}|dkr||vr|| ||}|||j!|j#|||f |d7 }qI|||f qyfdd D   fddtD } dt$t%dd |D fddg}!dt$t%dd |D fddd t$t%d!d |D fd"t$t%d#d |D fd$t$t%d%d |D fg}"i }#| jj&|#d&< t'j(|t'j)d'|#d(< |*  t'j( | d'|#d)<  *  t'(||#d*< |*  t'j(||!d'|#d+< |*  t'j(|d,d-gd'|#d.< |*  t'j(||"d'|#d/< |*  t'j(|d,d-gd'|#d0< |*  |#d1< |+ D ]\}}$t'j(|$t'j)d'|#d2| d3< q|*  t'j(|t'j)d'|#d4< |*  | jr| ,|# | -|# t.| j d5}%t'j/|%fi |# W d   dS 1 sw   Y  dS )6a  Use this to prepare data from task protocol

        Notes
        -----
        Called only once on the main process (and only on it), for global_rank 0.

        After this method is called, the task should have a `prepared_data` attribute
        with the following dictionary structure:

        prepared_data = {
            'protocol': name of the protocol
            'audio-path': array of N paths to audio
            'audio-metadata': array of N audio infos such as audio subset, scope and database
            'audio-annotated': array of N annotated duration (usually equals file duration but might be shorter if file is not fully annotated)
            'annotations-regions': array of M annotated regions
            'audio-regions-ids': array of N start/end indices of annotated regions
            'annotations-segments': array of M' annotated segments
            'audio-segments-ids': array of N start/end indices of annotated segments
            'metadata-values': dict of lists of values for subset, scope and database
            'metadata-`database-name`-labels': array of `database-name` labels. Each database with "database" scope labels has it own array.
            'metadata-labels': array of global scope labels
        }

        r   NT)parentsexist_okr!   subsetscopetraindevelopmentdatabase)urir   r   audior   r<   
annotation	annotatedz
Ignoring 'z ' metadata because of its type (z*). Only str and int are supported for now.)categoryr   r:   r   r   )yield_labelc                   s"   g | ] t  fd dD qS )c                 3  s    | ]	}  |d V  qdS )r   N)getrL   key	metadatumr-   r.   rN   	  s    /Task.prepare_data.<locals>.<listcomp>.<genexpr>)tuple)rL   )metadata_unique_valuesr   r.   ry     s    z%Task.prepare_data.<locals>.<listcomp>c                   s.   g | ]\ }|t t fd dD fqS )c                 3  s    | ]}|  V  qd S rG   r-   )rL   mrM   r-   r.   rN         r   )r}   maxr   )metadatar   r.   ry     s    file_idc                 s      | ]}|d  V  qdS r   Nr-   )rL   arr-   r-   r.   rN     r   z$Task.prepare_data.<locals>.<genexpr>)r7   f)startr   c                 s  r   r   r-   rL   ar-   r-   r.   rN     r   )endr   file_label_idxc                 s  r   )r#   Nr-   r   r-   r-   r.   rN   #  r   database_label_idxc                 s  r   )r$   Nr-   r   r-   r-   r.   rN   $  r   global_label_idxc                 s  r   )   Nr-   r   r-   r-   r.   rN   %  r   r   )dtypez
audio-pathzaudio-metadatazaudio-annotatedzannotations-regions)r   rM   )r   rM   zaudio-regions-idszannotations-segmentszaudio-segments-idszmetadata-valuesz	metadata-z-labelszmetadata-labelswb)0r   existsstatst_sizeparentmkdirr	   r
   r   listSubsetsScopesr   	itertoolschainziprepeatr   r   r   	enumerater   appendindexsetr   rq   rF   r   r   rw   UserWarningr7   r   
itertracksr   r}   r   namenparraystr_clearitemsprepare_validationpost_prepare_dataopensavez_compressed)&rD   audiosannotated_durationannotated_regionsaudio_regions_idsr   audio_segments_idsunique_labelsdatabase_unique_labels
files_iter
regions_idsegments_idr   r   filer   remaining_metadata_keysr   rp   file_unique_labels_regions_id_annotated_durationsegmentannotated_region_segments_id_labelr   r   r   r   r   metadata_dtyperegion_dtypesegment_dtypeprepared_datalabels
cache_filer-   )r   r   r.   prepare_data5  s6  























$zTask.prepare_datar   r   c                 C  rR   )a  Method for completing `prepared_data` with task-specific data.
        For instance, for a classification task, this could be a list of
        possible classes.

        Parameters
        ----------
        prepared_data: dict
            dictionnary containing protocol data prepared by
            `prepare_data()`
        Note
        ----
        This method does not return anything. Thus, user have to directly modify
        `prepared_data`, for updates to be taken into account
        Nr-   )rD   r   r-   r-   r.   r   _  s   zTask.post_prepare_datac                 C  s   |dkr| j j| j| _z"t| jd}ttj|dd| _W d   n1 s)w   Y  W n t	y<   t
dd  w | jj| jd krVtd	| jj d
| jd  ddS )z>Setup data cached by prepare_data into the task on each devicefitrbT)allow_pickleNzICached data for protocol not found. Ensure that prepare_data() was calledzJ and executed correctly or/and that the path to the task cache is correct.r   z!Protocol specified for the task (z)) does not correspond to the cached one ())trainerstrategy	broadcastr   r   r   r   loadr   FileNotFoundErrorprintr   r   rB   )rD   stager   r-   r-   r.   setupp  s*   z
Task.setuprA   c                 C  s   | j jS rG   modelautomatic_optimizationrC   r-   r-   r.   r    s   zTask.automatic_optimizationautomatic_optimisationNonec                 C  s   || j _d S rG   r	  )rD   r  r-   r-   r.   r    s   ,Union[Specifications, Tuple[Specifications]]c                 C  s   t | ds	td| jS )N_specificationsa  Task specifications are not available. This is most likely because they depend on the content of the training subset. Use `task.prepare_data()` and `task.setup()` to go over the training subset and fix this, or let lightning trainer do that for you in `trainer.fit(model)`.)hasattrr2   r  rC   r-   r-   r.   specifications  s
   
zTask.specificationsr  c                 C  s
   || _ d S rG   )r  )rD   r  r-   r-   r.   r    s   
c                 C  s   d S rG   r-   rC   r-   r-   r.   setup_loss_func  rT   zTask.setup_loss_funcc                 C     d| j j d}t|)N	Missing 'z.train__iter__' method.rb   r%   NotImplementedErrorrD   msgr-   r-   r.   re        zTask.train__iter__c                 C  r  )Nr  z.train__len__' method.r  r  r-   r-   r.   rg     r  zTask.train__len__r   c                 C  s   d| j j d}t|)Nr  z.collate_fn' method.r  )rD   batchr  r  r-   r-   r.   
collate_fn  s   zTask.collate_fnr   c              
   C  s(   t t| | j| j| jdt| jdddS )NTr   r  r   r   r   	drop_lastr  )r   rY   r   r   r   r   r  rC   r-   r-   r.   train_dataloader  s   zTask.train_dataloaderr3   torch.Tensorc                 C  sH   |j tjtjfv rt|||dS |j tjfv rt|||dS d}t|)a8  Guess and compute default loss according to task specification

        Parameters
        ----------
        specifications : Specifications
            Task specifications
        target : torch.Tensor
            * (batch_size, num_frames) for binary classification
            * (batch_size, num_frames) for multi-class classification
            * (batch_size, num_frames, num_classes) for multi-label classification
        prediction : torch.Tensor
            (batch_size, num_frames, num_classes)
        weight : torch.Tensor, optional
            (batch_size, num_frames, 1)

        Returns
        -------
        loss : torch.Tensor
            Binary cross-entropy loss in case of binary and multi-label classification,
            Negative log-likelihood loss in case of multi-class classification.

        weightz+TODO: implement for other types of problems)r4   r    r(   r*   r   r)   r   r  )rD   r  target
predictionr"  r  r-   r-   r.   default_loss  s   zTask.default_loss	batch_idxr  Literal['train', 'val']c              	   C  s  t | jtr
td| |d }|j\}}}|d }|dkr%t| ddnd}	||	tj	||d| jj
d}
t| jd	 | j | }d
|
ddd|f< t| jd | j | }d
|
dd|| df< | j| j|||
d}t|rudS | jjd| |ddddd d|iS )a  Default training or validation step according to task specification

            * binary cross-entropy loss for binary or multi-label classification
            * negative log-likelihood loss for regular classification

        If "weight" attribute exists, batch[self.weight] is also passed to the loss function
        during training (but has no effect in validation).

        Parameters
        ----------
        batch : (usually) dict of torch.Tensor
            Current batch.
        batch_idx: int
            Batch index.
        stage : {"train", "val"}
            "train" for training step, "val" for validation step

        Returns
        -------
        loss : {str: torch.tensor}
            {"loss": loss}
        zCDefault training/validation step is not implemented for multi-task.Xyr   r"  Nr!   )devicer   r:   r!  zloss/FT)on_stepon_epochprog_barloggerloss)r   r  r   r  r
  shapegetattrr   torchonesr*  roundr;   r7   r%  isnanlog)rD   r  r&  r  y_predr   
num_framesr   r)  
weight_keyr"  warm_up_leftwarm_up_rightr/  r-   r-   r.   common_step  s8   
zTask.common_stepc                 C     |  ||dS )Nr   r<  rD   r  r&  r-   r-   r.   training_step+     zTask.training_stepc                 C  s   d| j j d}t|)Nr  z.val__getitem__' method.r  )rD   rm   r  r-   r-   r.   rl   .  r  zTask.val__getitem__c                 C  r  )Nr  z.val__len__' method.r  r  r-   r-   r.   ro   3  r  zTask.val__len__Optional[DataLoader]c              
   C  s2   | j rtt| | j| j| jdt| jdddS d S )NFvalr  r  )r   r   rk   r   r   r   r   r  rC   r-   r-   r.   val_dataloader8  s   	zTask.val_dataloaderc                 C  r=  )NrC  r>  r?  r-   r-   r.   validation_stepG  rA  zTask.validation_stepc                 C  r  )zDefault validation metricr  z.default_metric' method.r  r  r-   r-   r.   default_metricJ  r  zTask.default_metricr   c                 C  s   | j d u r
|  | _ t| j S rG   )r   rF  r   rC   r-   r-   r.   r   O  s   


zTask.metricc                 C  s0   | j }|d ur|| j_| jj| jj d S d S rG   )r   r
  validation_metrictor*  )rD   r   r-   r-   r.   setup_validation_metricV  s
   zTask.setup_validation_metricc                 C  s*   t t| j \}}||jrdfS dfS )a  Quantity (and direction) to monitor

        Useful for model checkpointing or early stopping.

        Returns
        -------
        monitor : str
            Name of quantity to monitor.
        mode : {'min', 'max}
            Minimize

        See also
        --------
        lightning.pytorch.callbacks.ModelCheckpoint
        lightning.pytorch.callbacks.EarlyStopping
        r   min)nextiterr   r   higher_is_better)rD   r   r   r-   r-   r.   val_monitor\  s   zTask.val_monitor)	Nr~   Nr:   r   NFNN)r   r   r   r   r7   r6   r9   r8   r;   r   r   rF   r   r=   r   r?   r   r   r   r   )r   r   rG   rV   )r  r?   rA   r  )rA   r  )r  r  )r   )rA   r   )r  r3   rA   r   )r&  rF   r  r'  )r&  rF   )rA   rB  )rA   r   )rA   r   )r%   r&   r'   __doc__r_   r   r   r  propertyr  setterr  r  re   rg   r  r  r%  r<  r@  rl   ro   rD  rE  rF  r   r   rI  rN  ri   r-   r-   ra   r.   r[      sX    ><  
,




'
G


r[   )rp   rF   rA   rq   )G
__future__r   r   r   r   r   collectionsr   dataclassesr   enumr   	functoolsr   r   numbersr   pathlibr	   tempfiler
   typingr   r   r   r   r   r   r   r   	lightningnumpyr   scipy.specialrH   r2  pyannote.audio.utils.lossr   r   pyannote.audio.utils.protocolr   pyannote.databaser   #pyannote.database.protocol.protocolr   r   torch.utils.datar   r   r   torch_audiomentationsr   /torch_audiomentations.core.transforms_interfacer   torchmetricsr   r   r   __args__r   r   r    r/   	Exceptionr2   r3   rY   rk   r}   LightningDataModuler[   r-   r-   r-   r.   <module>   sH   (

;
