o
    Si*                     @   st   d dl mZ d dlmZmZ d dlZd dlZd dlm	Z	m
Z
mZ G dd dZdejdefd	d
ZG dd dZdS )    )sqrt)ListOptionalN)DecibelsSecondscompute_num_samplesc                	   @   s   e Zd ZdZ		ddejdedee de	fdd	Z
	dd
ejdedee dejfddZedefddZedeej fddZedejfddZedejfddZ		dd
ejdee de	fddZdS )
AudioMixera  
    Utility class to mix multiple waveforms into a single one.
    It should be instantiated separately for each mixing session (i.e. each ``MixedCut``
    will create a separate ``AudioMixer`` to mix its tracks).
    It is initialized with a numpy array of audio samples (typically float32 in [-1, 1] range)
    that represents the "reference" signal for the mix.
    Other signals can be mixed to it with different time offsets and SNRs using the
    ``add_to_mix`` method.
    The time offset is relative to the start of the reference signal
    (only positive values are supported).
    The SNR is relative to the energy of the signal used to initialize the ``AudioMixer``.

    .. note:: Both single-channel and multi-channel signals are supported as reference
        and added signals. The only requirement is that the when mixing 2 multi-channel
        signals, they must have the same number of channels.

    .. note:: When the AudioMixer contains multi-channel tracks, 2 types of mixed signals
        can be generated:
        - `mixed_audio` mixes each channel independently, and returns a multi-channel signal.
          If there is a mono track, it is added to all the channels.
        - `mixed_mono_audio` mixes all channels together, and returns a single-channel signal.
    N        
base_audiosampling_ratereference_energybase_offsetc                 C   sV   |g| _ t||g| _|| _|jd | _| j d j| _|du r&t|| _dS || _dS )a  
        AudioMixer's constructor.

        :param base_audio: A numpy array with the audio samples for the base signal
            (all the other signals will be mixed to it).
        :param sampling_rate: Sampling rate of the audio.
        :param reference_energy: Optionally pass a reference energy value to compute SNRs against.
            This might be required when ``base_audio`` corresponds to zero-padding.
        :param base_offset: Optionally pass a time offset for the base signal.
        r   N)	tracksr   offsetsr   shapenum_channelsdtypeaudio_energyr   )selfr
   r   r   r    r   F/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/audio/mixer.py__init__"   s   
zAudioMixer.__init__audiooffsettotalreturnc                 C   s   |j dksJ d|j  |d u r|jd | }|jd | |ks0J |jd  d| d| tj|d|||jd  | ffdS )N   zaudio.ndim=    +  <= )r   r   )	pad_width)ndimr   nppad)r   r   r   r   r   r   r   
_pad_track@   s   zAudioMixer._pad_trackc                 C   s4   d}t | j| jD ]\}}t|||jd  }q	|S )Nr   r   zipr   r   maxr   )r   longestr   r   r   r   r   num_samples_totalM      zAudioMixer.num_samples_totalc                    $    j  fddt j jD S )
        Return a list of numpy arrays with the shape (C, num_samples), where each track is
        zero padded and scaled adequately to the offsets and SNR used in ``add_to_mix`` call.
        c                        g | ]\}} j ||d qS )r   r   r$   .0r   trackr   r   r   r   
<listcomp>[       z,AudioMixer.unmixed_audio.<locals>.<listcomp>)r)   r&   r   r   r   r   r3   r   unmixed_audioT      zAudioMixer.unmixed_audioc                 C   s   | j }tj| j|f| jd}t| j| jD ],\}}|jd dkr.| jdkr.t	|| jdf}|dd|||jd  f  |7  < q|S )
        Return a numpy ndarray with the shape (num_channels, num_samples) - a mix of the tracks
        supplied with ``add_to_mix`` calls.
        r   r   r   N)
r)   r"   zerosr   r   r&   r   r   r   tiler   r   mixedr   r2   r   r   r   mixed_audio`   s   (zAudioMixer.mixed_audioc                 C   sz   | j }tjd|f| jd}t| j| jD ]&\}}|jd dkr'tj|ddd}|dd|||jd  f  |7  < q|S )z
        Return a numpy ndarray with the shape (1, num_samples) - a mix of the tracks
        supplied with ``add_to_mix`` calls.
        r   r:   r   T)axiskeepdimsN)	r)   r"   r;   r   r&   r   r   r   sumr=   r   r   r   mixed_mono_audior   s   (zAudioMixer.mixed_mono_audiosnrc                 C   s   |j dkrdS |dksJ dt|| j}d}|dur8| jdkr8t|}|dkr8| jd| d   }t|| }| j||  | j| |j	d | j
kri| j
dkri|j	d dkritd	|j	d  d
| j
 dt| j
|j	d | _
dS )a  
        Add audio of a new track into the mix.
        :param audio: An array of audio samples to be mixed in.
        :param snr: Signal-to-noise ratio, assuming `audio` represents noise (positive SNR - lower `audio` energy,
        negative SNR - higher `audio` energy)
        :param offset: How many seconds to shift `audio` in time. For mixing, the signal will be padded before
        the start with low energy values.
        :return:
        r   Nr	   +Negative offset in mixing is not supported.g      ?g      $@
   r   zCannot mix audios with z and z
 channels.)sizer   r   r   r   r   r   appendr   r   r   
ValueErrorr'   )r   r   rD   r   num_samples_offsetgainadded_audio_energytarget_energyr   r   r   
add_to_mix   s&   

zAudioMixer.add_to_mix)Nr	   N)__name__
__module____qualname____doc__r"   ndarrayintr   floatr   r   r$   propertyr)   r   r7   r?   rC   r   rN   r   r   r   r   r   
   sP    

r   r   r   c                 C   s   t t| d S )Nr   )rV   r"   average)r   r   r   r   r      s   r   c                	   @   s   e Zd ZdZ	ddejdedefddZ	dd	ejd
e	de
e	 dejfddZede	fddZedeej fddZedejfddZ	dd	ejd
efddZdS )
VideoMixerze
    Simple video "mixing" class that actually does not mix anything but supports concatenation.
    r	   
base_videofpsr   c                 C   sf   ddl m} |g| _t||g| _|| _| jd j| _| | _| j| jd | jd |j	d   d S )Nr   )IntervalTree)
intervaltreer\   r   r   r   r[   r   treeaddir   )r   rZ   r[   r   r\   r   r   r   r      s   (zVideoMixer.__init__Nvideor   r   r   c                 C   sz   |d u r|j d | }|j d | |ks#J |j d  d| d| tjjj|dddddd|||j d  | fdddS )Nr   r   r   constant)modevalue)r   torchnn
functionalr#   )r   r`   r   r   r   r   r   r$      s    zVideoMixer._pad_trackc                 C   s4   d}t | j| jD ]\}}t|||jd  }q	|S )Nr   r%   )r   r(   r   r`   r   r   r   num_frames_total   r*   zVideoMixer.num_frames_totalc                    r+   )r,   c                    r-   r.   r/   r0   r3   r   r   r4      r5   z,VideoMixer.unmixed_video.<locals>.<listcomp>)rg   r&   r   r   r6   r   r3   r   unmixed_video   r8   zVideoMixer.unmixed_videoc                 C   s^   | j }| jd |f| jd jdd  }t| j| jD ]\}}|||||jd  < q|S )r9   r   r   N)rg   r   	new_zerosr   r&   r   r=   r   r   r   mixed_video   s
   &zVideoMixer.mixed_videoc                 C   s   |j dkrd S |dksJ dt|| j}ddlm} ||||jd  }| j|r8J d| d| j  | j	
| | j
| | j| d S )Nr   r	   rE   )Intervalz%Cannot add an overlapping video. Got z( while we have the following intervals: )rG   r   r[   r]   rk   r   r^   overlapsall_intervalsr   rH   r   add)r   r`   r   frame_offsetrk   intervalr   r   r   rN      s   
zVideoMixer.add_to_mix)r	   rO   )rP   rQ   rR   rS   rd   TensorrV   r   r   rU   r   r$   rW   rg   r   rh   rj   rN   r   r   r   r   rY      s@    

rY   )mathr   typingr   r   numpyr"   rd   lhotse.utilsr   r   r   r   rT   rV   r   rY   r   r   r   r   <module>   s     &