o
    Si]                     @   sF   d dl mZ d dlZd dlmZ d dlmZmZm	Z	 G dd dZ
dS )    )OptionalN)FeatureExtractor)DecibelsSecondscompute_num_framesc                   @   s   e Zd ZdZ		ddedejdedede	e f
d	d
Z
edd ZedejfddZedejfddZdedejfddZ		ddejdede	e defddZdS )FeatureMixera^  
    Utility class to mix multiple feature matrices into a single one.
    It should be instantiated separately for each mixing session (i.e. each ``MixedCut``
    will create a separate ``FeatureMixer`` to mix its tracks).
    It is initialized with a numpy array of features (typically float32)
    that represents the "reference" signal for the mix.
    Other signals can be mixed to it with different time offsets and SNRs using the
    ``add_to_mix`` method.
    The time offset is relative to the start of the reference signal
    (only positive values are supported).
    The SNR is relative to the energy of the signal used to initialize the ``FeatureMixer``.

    It relies on the ``FeatureExtractor`` to have defined ``mix`` and ``compute_energy`` methods,
    so that the ``FeatureMixer`` knows how to scale and add two feature matrices together.
         @Nfeature_extractor
base_featsframe_shiftpadding_valuereference_energyc                 C   sj   || _ |g| _|jdkrdn|jd | _g | _|| _|| _| jd j| _|du r0|	|| _
dS || _
dS )ah  
        FeatureMixer's constructor.

        :param feature_extractor: The ``FeatureExtractor`` instance that specifies how to mix the features.
        :param base_feats: The features used to initialize the ``FeatureMixer`` are a point of reference
            in terms of energy and offset for all features mixed into them.
        :param frame_shift: Required to correctly compute offset and padding during the mix.
        :param padding_value: The value used to pad the shorter features during the mix.
            This value is adequate only for log space features. For non-log space features,
            e.g. energies, use either 0 or a small positive value like 1e-5.
        :param reference_energy: Optionally pass a reference energy value to compute SNRs against.
            This might be required when ``base_feats`` correspond to padding energies.
              r   N)r	   tracksndimshapenum_channelsgainsr   r   dtypecompute_energyr   )selfr	   r
   r   r   r    r   I/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/features/mixer.py__init__   s   
zFeatureMixer.__init__c                 C   s   | j d jd S )Nr   r   )r   r   r   r   r   r   num_features>   s   zFeatureMixer.num_featuresreturnc                 C   s   t | jS )z
        Return a numpy ndarray with the shape (num_tracks, num_frames, num_features), where each track's
        feature matrix is padded and scaled adequately to the offsets and SNR used in ``add_to_mix`` call.
        )npstackr   r   r   r   r   unmixed_featsB   s   zFeatureMixer.unmixed_featsc                 C   s@   | j d }t| j dd | jD ]\}}| jj|||d}q|S )z
        Return a numpy ndarray with the shape (num_frames, num_features) - a mono mixed feature matrix
        of the tracks supplied with ``add_to_mix`` calls.
        r   r   N)
features_a
features_benergy_scaling_factor_b)r   zipr   r	   mix)r   resultfeats_to_addgainr   r   r   mixed_featsJ   s   
zFeatureMixer.mixed_feats
num_framesc                 C   s2   t j| jdkr|| jfn|| j| jf| j| jdS )Nr   )r   
fill_valuer   )r   fullr   r   r   r   )r   r+   r   r   r   _get_dummy_arrayW   s   

zFeatureMixer._get_dummy_array        featssampling_ratesnroffsetc                 C   s\  t |dkrdS |dksJ d| jd j|jksJ d| jd }t|| j|d}|jd }|jd | }t||}	|}
||	k r_tt | jD ]}t	| j| | 
|	| g}|| j|< qH|dkrmt	| 
||
g}
||	k r}t	|
| 
|	| g}
d}|dur| jdkr| j|}|dkr| jd| d	   }|| }| j|
 | j| dS )
a  
        Add feature matrix of a new track into the mix.
        :param feats: A 2D feature matrix to be mixed in.
        :param sampling_rate: The sampling rate of ``feats``
        :param snr: Signal-to-noise ratio, assuming ``feats`` represents noise (positive SNR - lower ``feats`` energy,
        negative SNR - higher ``feats`` energy)
        :param offset: How many seconds to shift ``feats`` in time. For mixing, the signal will be padded before
        the start with low energy values.
        r   Nr/   z+Negative offset in mixing is not supported.z%Feature dimensions mismatch in mixing)durationr   r1   g      ?g      $@
   )lenr   r   r   r   r   maxranger   vstackr.   r   r	   r   appendr   )r   r0   r1   r2   r3   reference_featsnum_frames_offsetcurrent_num_framesincoming_num_framesmix_num_framesr(   idxpadded_trackr)   added_feats_energytarget_energyr   r   r   
add_to_mixd   sT   


zFeatureMixer.add_to_mix)r   N)Nr/   )__name__
__module____qualname____doc__r   r   ndarrayr   floatr   r   propertyr   r!   r*   intr.   r   rD   r   r   r   r   r   	   sB    
$
r   )typingr   numpyr   lhotse.features.baser   lhotse.utilsr   r   r   r   r   r   r   r   <module>   s
    