o
    piq=                     @   s   d dl Z d dlZd dlmZmZmZmZmZmZm	Z	 d dl
Zd dlZd dlm  mZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	l m!Z! G d
d deZ"dS )    N)DictListOptionalSequenceTextTupleUnion)Problem
ResolutionSpecifications)SegmentationTask)SegmentSlidingWindowFeature)Protocol)SegmentationProtocol)BaseWaveformTransform)Metricc                       s  e Zd ZdZ											d%dedeeedf  d	eee  d
e	dee	e
e	e	f f deee  dee dedee dedee deeee eeef f f fddZdefddZd& fdd	Zdede	d
e	fddZdefdd Zdefd!d"Zed#d$ Z  ZS )'MultiLabelSegmentationa
  Generic multi-label segmentation

    Multi-label segmentation is the process of detecting temporal intervals
    when a specific audio class is active.

    Example use cases include speaker tracking, gender (male/female)
    classification, or audio event detection.

    Parameters
    ----------
    protocol : Protocol
    cache : str, optional
        As (meta-)data preparation might take a very long time for large datasets,
        it can be cached to disk for later (and faster!) re-use.
        When `cache` does not exist, `Task.prepare_data()` generates training
        and validation metadata from `protocol` and save them to disk.
        When `cache` exists, `Task.prepare_data()` is skipped and (meta)-data
        are loaded from disk. Defaults to a temporary path.
    classes : List[str], optional
        List of classes. Defaults to the list of classes available in the training set.
    duration : float, optional
        Chunks duration. Defaults to 2s.
    warm_up : float or (float, float), optional
        Use that many seconds on the left- and rightmost parts of each chunk
        to warm up the model. While the model does process those left- and right-most
        parts, only the remaining central part of each chunk is used for computing the
        loss during training, and for aggregating scores during inference.
        Defaults to 0. (i.e. no warm-up).
    balance: Sequence[Text], optional
        When provided, training samples are sampled uniformly with respect to these keys.
        For instance, setting `balance` to ["database","subset"] will make sure that each
        database & subset combination will be equally represented in the training samples.
    weight: str, optional
        When provided, use this key to as frame-wise weight in loss function.
    batch_size : int, optional
        Number of training samples per batch. Defaults to 32.
    num_workers : int, optional
        Number of workers used for generating training samples.
        Defaults to multiprocessing.cpu_count() // 2.
    pin_memory : bool, optional
        If True, data loaders will copy tensors into CUDA pinned
        memory before returning them. See pytorch documentation
        for more details. Defaults to False.
    augmentation : BaseWaveformTransform, optional
        torch_audiomentations waveform transform, used by dataloader
        during training.
    metric : optional
        Validation metric(s). Can be anything supported by torchmetrics.MetricCollection.
        Defaults to AUROC (area under the ROC curve).
    N       @            Fprotocolcacheclassesdurationwarm_upbalanceweight
batch_sizenum_workers
pin_memoryaugmentationmetricc                    sR   t |tstdt| dt j|||||	|
|||d	 || _|| _|| _d S )NzHMultiLabelSegmentation task expects a SegmentationProtocol but you gave z. )r   r   r   r   r    r!   r"   r   )	
isinstancer   
ValueErrortypesuper__init__r   r   r   )selfr   r   r   r   r   r   r   r   r   r    r!   r"   	__class__ `/home/ubuntu/.local/lib/python3.10/site-packages/pyannote/audio/tasks/segmentation/multilabel.pyr'   \   s$   

zMultiLabelSegmentation.__init__prepared_datac                    s
  j d u rjstd}jrtj j	 }nj }j d u rvt
  t
 }|D ]8}|dd }|sMtd|d  d|d  d}t||D ]}| vrZ | qO| fdd	|D  q/tj tjd
|d<  _ ngt
 }|D ]V}|dd }|std|d  d|d  d}t|t|tj  }|rtd|d  d|d  dd| d}t| |fdd	t|tj @ D  q{tjj tjd
|d< tjt|tj ftjd
}	t|D ]
\}
 d|	|
 f< q|	|d< |  d S )NaE  
                Could not infer list of classes. Either provide a list of classes when
                instantiating the task, or make sure that the training protocol provides
                a 'classes' entry. See https://github.com/pyannote/pyannote-database#segmentation
                for more details.
                r   z
                        File "uriz" (from databaseaW   database) does not
                        provide a 'classes' entry. Please make sure the corresponding
                        training protocol provides a 'classes' entry for all files. See
                        https://github.com/pyannote/pyannote-database#segmentation for more
                        details.
                        c                    s   g | ]}  |qS r+   )index.0klass)r   r+   r,   
<listcomp>       z<MultiLabelSegmentation.post_prepare_data.<locals>.<listcomp>dtypeclasses-listz; database) provides
                        extra classes (z, z,) that are ignored.
                        c                    s   g | ]} j |qS r+   )r   r0   r1   r(   r+   r,   r4      s    
Tclasses-annotated)r   has_classestextwrapdedenthas_validation	itertoolschainr   traindevelopmentlistgetr$   appendnparraystr_setjoinprintzeroslenbool_	enumerateclear)r(   r-   msg
files_iterannotated_classesfilefile_classesr3   extra_classesannotated_classes_arrayfile_idr+   )r   r(   r,   post_prepare_data   s   	

	
	
z(MultiLabelSegmentation.post_prepare_datac                    s6   t  | t| jd tjtj| j| j	| j
d| _d S )Nr8   )r   problem
resolutionr   min_durationr   )r&   setupr   r-   r	   MULTI_LABEL_CLASSIFICATIONr
   FRAMEr   r\   r   specifications)r(   stager)   r+   r,   r]      s   zMultiLabelSegmentation.setuprX   
start_timec                    s  |  |}t||| }t }| jj||\|d< }| jd | jd d |k }||d |jk |d |jk@  }	| jj	j
}
d| jj	j }t|	d |j|j | }tdt||
 t}t|	d |j|j | }t||
 t}| jt|| jjj }tj|t| jd ftjd	 }d|d
d
| jd | f< t|||	d D ]\}}}d|||d |f< qt|| jj	| jd|d< | jd |   fdd jjD |d< ||d d< |S )a  Prepare chunk for multi-label segmentation

        Parameters
        ----------
        file_id : int
            File index
        start_time : float
            Chunk start time
        duration : float
            Chunk duration.

        Returns
        -------
        sample : dict
            Dictionary containing the chunk data with the following keys:
            - `X`: waveform
            - `y`: target (see Notes below)
            - `meta`:
                - `database`: database index
                - `file`: file index

        Notes
        -----
        y is a trinary matrix with shape (num_frames, num_classes):
            -  0: class is inactive
            -  1: class is active
            - -1: we have no idea

        Xzannotations-segmentsrX   startendg      ?r   r8   r6   Nr:   global_label_idx   )labelsyzaudio-metadatac                    s   i | ]}| | qS r+   r+   )r2   keymetadatar+   r,   
<dictcomp>F  r5   z8MultiLabelSegmentation.prepare_chunk.<locals>.<dictcomp>metarT   )get_filer   dictmodelaudiocropr-   re   rd   receptive_fieldstepr   rF   maximumroundastypeintminimum
num_frameshparamssample_rateonesrM   int8zipr   r   r7   names)r(   rX   rb   r   rT   chunksample_annotationschunk_annotationsru   halfrd   	start_idxre   end_idxr{   ri   labelr+   rk   r,   prepare_chunk   sJ   



z$MultiLabelSegmentation.prepare_chunk	batch_idxc                 C   s   |d }|  |}|d }|j|jksJ |dk}|| }|| }t||tj}t|r2d S | j jd|ddddd d|iS )	Nrc   ri   z
loss/trainFTon_stepon_epochprog_barloggerloss)	rq   shapeFbinary_cross_entropyr%   torchfloatisnanlogr(   batchr   rc   y_predy_truemaskr   r+   r+   r,   training_stepK  s&   

z$MultiLabelSegmentation.training_stepc                 C   sv   |d }|  |}|d }|j|jksJ |dk}|| }|| }t||tj}| j jd|ddddd d|iS )	Nrc   ri   r   loss/valFTr   r   )rq   r   r   r   r%   r   r   r   r   r+   r+   r,   validation_steph  s"   
z&MultiLabelSegmentation.validation_stepc                 C   s   dS )a  Quantity (and direction) to monitor

        Useful for model checkpointing or early stopping.

        Returns
        -------
        monitor : str
            Name of quantity to monitor.
        mode : {'min', 'max}
            Minimize

        See also
        --------
        lightning.pytorch.callbacks.ModelCheckpoint
        lightning.pytorch.callbacks.EarlyStopping
        )r   minr+   r9   r+   r+   r,   val_monitor  s   z"MultiLabelSegmentation.val_monitor)NNr   r   NNr   NFNN)N)__name__
__module____qualname____doc__r   r   r   strr   r   r   r   r   ry   boolr   r   r   r'   rY   r]   r   r   r   propertyr   __classcell__r+   r+   r)   r,   r   (   sZ    6

	
(gTr   )#r?   r<   typingr   r   r   r   r   r   r   numpyrF   r   torch.nn.functionalnn
functionalr   pyannote.audio.core.taskr	   r
   r   (pyannote.audio.tasks.segmentation.mixinsr   pyannote.corer   r   pyannote.databaser   pyannote.database.protocolr   /torch_audiomentations.core.transforms_interfacer   torchmetricsr   r   r+   r+   r+   r,   <module>   s   $