o
    7wiP                     @   s^   d Z ddlZddlZddlm  mZ ddlmZ ddl	m
Z
 e
eZG dd dejjZdS )zWClasses for implementing data augmentation pipelines.

Authors
 * Mirco Ravanelli 2022
    N)lengths_arg_exists)
get_loggerc                       sp   e Zd ZdZdddddddddddde df fdd	Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Z  ZS )	Augmenteras  Applies pipelines of data augmentation.

    Arguments
    ---------
    parallel_augment: bool
        If False, the augmentations are applied sequentially with
        the order specified in the pipeline argument.
        When True, all the N augmentations are concatenated in the output
        on the batch axis.
    parallel_augment_fixed_bs: bool
        If False, each augmenter (performed in parallel) generates a number of
        augmented examples equal to the batch size. Thus, overall, with this
        option N*batch size artificial data are
        generated, where N is the number of augmenters.
        When True, the number of total augmented examples is kept fixed at
        the batch size, thus, for each augmenter, fixed at batch size // N examples.
        This option is useful to keep controlled the number of synthetic examples
        with respect to the original data distribution, as it keep always
        50% of original data, and 50% of augmented data.
    concat_original: bool
        if True, the original input is concatenated with the
        augmented outputs (on the batch axis).
    min_augmentations: int
        The number of augmentations applied to the input signal is randomly
        sampled between min_augmentations and max_augmentations. For instance,
        if the augmentation dict contains N=6 augmentations and we set
        select min_augmentations=1 and max_augmentations=4 we apply up to
        M=4 augmentations. The selected augmentations are applied in the order
        specified in the augmentations dict. If shuffle_augmentations = True,
        a random set of M augmentations is selected.
    max_augmentations: int
        Maximum number of augmentations to apply. See min_augmentations for
        more details.
    shuffle_augmentations:  bool
        If True, it shuffles the entries of the augmentations dictionary.
        The effect is to randomply select the order of the augmentations
        to apply.
    repeat_augment: int
        Applies the augmentation algorithm N times. This can be used to
        perform more data augmentation.
    augment_start_index: int
        The index of the first element in the input batch from which data
        augmentation should begin.
        This argument allows you to specify the starting point for applying
        data augmentation.
    augment_end_index: int
        The index of the last element in the input batch at which data
        augmentation should stop.
        You can use this argument to define the endpoint for applying data
        augmentation within the batch.
    concat_start_index: int
        If `concat_original` is set to True, you can specify a subpart of the
        original batch to concatenate in the output.
        Use this argument to select the index of the first element from the
        original input batch to start copying from.
    concat_end_index: int
        If `concat_original` is set to True, you can specify a subpart of the
        original batch to concatenate in the output. Use this argument to select
        the index of the last element from the original input batch to end the
        copying process.
    augment_prob: float
        The probability (0.0 to 1.0) of applying data augmentation. When set to 0.0,
        the original signal is returned without any augmentation. When set to 1.0,
        augmentation is always applied. Values in between determine the likelihood
        of augmentation.
    augmentations: list
        List of augmentater objects to combine to perform data augmentation.
    enable_augmentations: list
        A list of booleans used to selectively enable or disable specific augmentation
        techniques within the 'augmentations' list.
        Each boolean corresponds to an augmentation object in the 'augmentations' list
        and should be of the same length and order.
        This feature is useful for performing ablations on augmentation techniques to
        tailor them for a specific task.

    Example
    -------
    >>> from speechbrain.augment.time_domain import DropFreq, DropChunk
    >>> freq_dropper = DropFreq()
    >>> chunk_dropper = DropChunk(drop_start=100, drop_end=16000)
    >>> augment = Augmenter(parallel_augment=False, concat_original=False, augmentations=[freq_dropper, chunk_dropper])
    >>> signal = torch.rand([4, 16000])
    >>> output_signal, lengths = augment(signal, lengths=torch.tensor([0.2,0.5,0.7,1.0]))
    FN   r   g      ?c                    s  t    || _|| _|| _|| _|| _|| _|| _|| _	|	| _
|
| _|| _|| _|| _|   d | _d| _t| jts@td| jdk rItd| j
d urX| j
| j	k rXtd| jd urg| j| jk rgtd|d u rsdgt| }nt|ts|tdt|t|krtdd	d
 t||D }dd t|D | _t| jdkrtd | jdkrtd | jdk rd| _td | j| jkrtd | j| _i | _| j D ]\}}t|j| j|< qd S )NTz"repeat_augment must be an integer.r   z&repeat_augment must be greater than 0.zBaugment_end_index must be smaller or equal to augment_start_index.z@concat_end_index must be smaller or equal to concat_start_index.z$enable_augmentations must be a list.z@enable_augmentations must have the same length as augmentations.c                 S   s   g | ]\}}|r|qS  r   ).0augenabledr   r   Z/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/speechbrain/augment/augmenter.py
<listcomp>   s    z&Augmenter.__init__.<locals>.<listcomp>c                 S   s"   i | ]\}}|j jt| |qS r   )	__class____name__str)r   iaugmentationr   r   r
   
<dictcomp>       z&Augmenter.__init__.<locals>.<dictcomp>zBNo augmentation is applied because the augmentation list is empty.zCNo augmentations applied because max_augmentations is non-positive.z;min_augmentations is negative. Modified to be non-negative.z`min_augmentations is greater than max_augmentations. min_augmentations set to max_augmentations.)super__init__parallel_augmentparallel_augment_fixed_bsconcat_originalaugmentationsmin_augmentationsmax_augmentationsshuffle_augmentationsaugment_start_indexaugment_end_indexconcat_start_indexconcat_end_indexrepeat_augmentaugment_probcheck_min_max_augmentationsnum_augmentations
do_augment
isinstanceint
ValueErrorlenlistzip	enumerateloggerwarningrequire_lengthsitemsr   forward)selfr   r   r   r   r   r   r    r   r   r   r   r!   r   enable_augmentationsaug_keyaug_funr   r   r
   r   h   s   






zAugmenter.__init__c                 C   sB  |}|}g }g }|}t |D ]{\}	}
| j|
 }t|jd }| jrG| jrGtd|jd t|d 	tj
}||	 }||	d  }||| }| j|
 rY|||df || d}n|||df }t|trut|dkrq|\}}ntd| js|}|| }q|| || q| jr| ||\}}||fS |}|}||fS )a-  Applies data augmentation on the selected augmentations.

        Arguments
        ---------
        x : torch.Tensor (batch, time, channel)
            input to augment.
        lengths : torch.Tensor
            The length of each sequence in the batch.
        selected_augmentations: dict
            Dictionary containing the selected augmentation to apply.

        Returns
        -------
        output : torch.Tensor
            Augmented outputs.
        output_lengths : torch.Tensor
            The corresponding length of each output.
        r   r   .)lengths   zEThe function must return max two arguments (Tensor, Length[optional]))r+   r   torcharangeshaper   r   linspacer(   tor&   r.   r%   tupler'   appendconcatenate_outputs)r1   xr6   selected_augmentations
next_inputnext_lengthsoutputoutput_lengthsout_lengthskaugment_nameaugment_funidxidx_startstop	idx_startidx_stopoutr   r   r
   augment   sR   





zAugmenter.augmentc                 C   s  d| _ t | jkrd| _ ||fS |}|}| jdur#t| j|jd n|jd | _| j|jd kr=d| _ t	d ||fS t
j| j| jd d|jd| _t| j }| jdksc| jdksct|dkrjd| _ ||fS | jrrt| |d| j }|| j| j }|| j| j }g }g }| j | _| jr| j|jd krd| _n-d| _| jdurt| j|jd n|jd | _||| j| j  ||| j| j  t| jD ]}	| |||\}
}||
 || q| ||\}
}|
|fS )	a  Applies data augmentation.

        Arguments
        ---------
        x : torch.Tensor (batch, time, channel)
            input to augment.
        lengths : torch.Tensor
            The length of each sequence in the batch.

        Returns
        -------
        output : torch.Tensor
            Augmented outputs.
        output_lengths : torch.Tensor
            The corresponding length of each output.
        TFNr   zNo augmentation is applied because the augmentation start index is greater than or equal to the number of examples in the input batch.r   r   )lowhighsizedevice) r$   randomr!   r   minr:   augment_end_index_batchr   r,   r-   r8   randintr   r   rT   	N_augmentr)   r   keysr    r(   r   shuffler   skip_concatr   r   concat_end_index_batchr>   rangerO   r?   )r1   r@   r6   
x_originallen_originalaugmentations_lstrA   
output_lstoutput_len_lstr   rD   rE   r   r   r
   r0     s   









zAugmenter.forwardc                    s`   t dd |D   fddt||D } fdd|D }tj|dd}tj|dd}||fS )a  
        Concatenate a list of augmented signals, accounting for varying temporal lengths.
        Padding is applied to ensure all signals can be concatenated.

        Arguments
        ---------
        augment_lst : List of torch.Tensor
            List of augmented signals to be concatenated.
        augment_len_lst : List of torch.Tensor
            List of lengths corresponding to the augmented signals.

        Returns
        -------
        concatenated_signals : torch.Tensor
            A tensor containing the concatenated signals.
        concatenated_lengths : torch.Tensor
            A tensor containing the concatenated signal lengths.

        Notes
        -----
        This function takes a list of augmented signals, which may have different temporal
        lengths due to variations such as speed changes. It pads the signals to match the
        maximum temporal dimension found among the input signals and rescales the lengths
        accordingly before concatenating them.
        c                 s   s    | ]}|j d  V  qdS )r   Nr:   )r   rO   r   r   r
   	<genexpr>  s    z0Augmenter.concatenate_outputs.<locals>.<genexpr>c                    s"   g | ]\}}||j d     qS rP   rd   )r   lengthrD   max_lenr   r
   r     r   z1Augmenter.concatenate_outputs.<locals>.<listcomp>c              	      s&   g | ]}t |d  |jd  fqS )r   r   )Fpadr:   )r   rD   rg   r   r
   r     s    r   dim)maxr*   r8   cat)r1   augment_lstaugment_len_lstrD   rE   r   rg   r
   r?     s   

zAugmenter.concatenate_outputsc                 G   s,   | j s|S g }|D ]
}|| | q	|S )a9  
        Replicates the labels along the batch axis a number of times that
        corresponds to the number of augmentations. Indeed parallel and
        concatenation augmentations alter the time dimension.

        Arguments
        ---------
        *args : tuple
            Input label tensors to be replicated. Can be a uniq or a list of
            torch.Tensors.

        Returns
        -------
        augmented_labels: torch.Tensor
            Labels corresponding to the augmented input. Returns as many torch.Tensor
            as given in input.
        )r$   r>   replicate_labels)r1   argslist_of_augmented_labelslabelsr   r   r
   replicate_multiple_labels  s   z#Augmenter.replicate_multiple_labelsc                 C   sz   | j s|S g }| jr| js|| j| j g}|| j| j }| jr,tj	|g| j
 dd}||g| j  }tj	|dd}|S )a  
        Replicates the labels along the batch axis a number of times that
        corresponds to the number of augmentations. Indeed parallel and
        concatenation augmentations alter the time dimension.

        Arguments
        ---------
        labels : torch.Tensor
            Input label tensors to be replicated.

        Returns
        -------
        augmented_labels: torch.Tensor
            Labels corresponding to the augmented input. Returns as many torch.Tensor
            as given in input.
        r   rk   )r$   r   r\   r   r]   r   rW   r   r8   rn   rY   r    )r1   rt   augmented_labelsselected_labelsr   r   r
   rq     s"   
zAugmenter.replicate_labelsc                 C   sf   | j du rd| _ | jdu rt| j| _| jt| jkr!t| j| _| j t| jkr1t| j| _ dS dS )z=Checks the min_augmentations and max_augmentations arguments.Nr   )r   r   r(   r   )r1   r   r   r
   r"     s   

z%Augmenter.check_min_max_augmentations)r   
__module____qualname____doc__r)   r   rO   r0   r?   ru   rq   r"   __classcell__r   r   r5   r
   r      s.    WjI|1,r   )rz   rU   r8   torch.nn.functionalnn
functionalri   speechbrain.utils.callchainsr   speechbrain.utils.loggerr   r   r,   Moduler   r   r   r   r
   <module>   s    