o
    }oi-                     @   st   d dl mZmZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZmZ d dlmZ G dd	 d	e	ZdS )
    )DictOptionalN)calculate_mean)
wrap_to_pi)NeuralModule	typecheck)LengthsType
NeuralTypeSpectrogramType)loggingc                       s  e Zd ZdZ							d*dedee dee dee d	ed
ee dee def fddZ	e
deeef fddZe
deeef fddZe
defddZe
defddZed+dejdeej dejfddZe	d,dejdeej dedejfddZeede eed e d!d"ede id#dejdejdejfd$d%Zeede eed e d!d"ede id#dejdejdejfd&d'Ze dejdejdejfd(d)Z  ZS )-!SpectrogramToMultichannelFeaturesa  Convert a complex-valued multi-channel spectrogram to
    multichannel features.

    Args:
        num_subbands: Expected number of subbands in the input signal
        num_input_channels: Optional, provides the number of channels
                            of the input signal. Used to infer the number
                            of output channels.
        mag_reduction: Reduction across channels. Default `None`, will calculate
                       magnitude of each channel.
        mag_power: Optional, apply power on the magnitude.
        use_ipd: Use inter-channel phase difference (IPD).
        mag_normalization: Normalization for magnitude features
        ipd_normalization: Normalization for IPD features
        eps: Small regularization constant.
    NF:0yE>num_subbandsnum_input_channelsmag_reduction	mag_poweruse_ipdmag_normalizationipd_normalizationepsc	           	         s  t    || _|| _|| _|dvrtd| || _|dvr'td| || _| jr6d| | _|| _	n|| _| jd u r@|nd| _	|| _
td| jj td| td| j td	| j td
| j td| j td| j td| j
 td| j td| j	 d S )N)Nmeanmean_varz Unknown magnitude normalization zUnknown ipd normalization       zInitialized %s withz	num_subbands:      %dz	mag_reduction:     %sz	mag_power:         %sz	use_ipd:           %sz	mag_normalization: %sz	ipd_normalization: %sz	eps:               %fz	_num_features:     %sz	_num_channels:     %s)super__init__r   r   r   NotImplementedErrorr   r   _num_features_num_channelsr   r   debug	__class____name__)	selfr   r   r   r   r   r   r   r   r     [/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/audio/modules/features.pyr   ,   s4   

z*SpectrogramToMultichannelFeatures.__init__returnc                 C      t dt t dt dS )+Returns definitions of module output ports.BCDTr*   inputinput_lengthr	   r
   r   r"   r$   r$   r%   input_typesX      

z-SpectrogramToMultichannelFeatures.input_typesc                 C   r'   )r(   r)   r.   )outputoutput_lengthr2   r3   r$   r$   r%   output_types`   r5   z.SpectrogramToMultichannelFeatures.output_typesc                 C   s   | j S )zConfigured number of features)r   r3   r$   r$   r%   num_featuresh   s   z.SpectrogramToMultichannelFeatures.num_featuresc                 C   s   | j dur| j S td)zConfigured number of channelsNzvNum channels is not configured. To configure this, `num_input_channels` must be provided when constructing the object.)r   
ValueErrorr3   r$   r$   r%   num_channelsm   s
   
z.SpectrogramToMultichannelFeatures.num_channelsr0   r1   c                 C   sZ   | j dksJ d| j  |du rtj| ddd}|S t| |ddd}tj|ddd}|S )	a,  Calculate mean across time and channel dimensions.

        Args:
            input: tensor with shape (B, C, F, T)
            input_length: tensor with shape (B,)

        Returns:
            Mean of `input` calculated across time and channel dimension
            with shape (B, 1, F, 1)
           )Expected input to have 4 dimensions, got NTdimkeepdimr?   r@   )ndimtorchr   r   )r0   r1   r   r$   r$   r%   get_mean_time_channelx   s   z7SpectrogramToMultichannelFeatures.get_mean_time_channel绽|=c                 C   s   |j dksJ d|j  |du r tj|dddd\}}||fS | ||}|| d}t||d	dd
}tj|ddd
}t||}||fS )ae  Calculate mean and standard deviation across time and channel dimensions.

        Args:
            input: tensor with shape (B, C, F, T)
            input_length: tensor with shape (B,)

        Returns:
            Mean and standard deviation of the `input` calculated across time and
            channel dimension, each with shape (B, 1, F, 1).
        r<   r=   Nr>   FT)rB   unbiasedrC   r   r?   rA   r@   )	rD   rE   std_meanrF   powr   r   sqrtclamp)clsr0   r1   r   stdr   r$   r$   r%   get_mean_std_time_channel   s   z;SpectrogramToMultichannelFeatures.get_mean_std_time_channelr)   r*   r/   r6   )r4   r8   c                 C   s   | j ||d}|| }|S )zMean normalization for the input tensor.

        Args:
            input: input tensor
            input_length: valid length for each example

        Returns:
            Mean normalized input.
        r/   )rF   )r"   r0   r1   r   r6   r$   r$   r%   normalize_mean   s   z0SpectrogramToMultichannelFeatures.normalize_meanc                 C   s&   | j ||| jd\}}|| | }|S )zMean and variance normalization for the input tensor.

        Args:
            input: input tensor
            input_length: valid length for each example

        Returns:
            Mean and variance normalized input.
        )r0   r1   r   )rO   r   )r"   r0   r1   r   rN   r6   r$   r$   r%   normalize_mean_var   s   z4SpectrogramToMultichannelFeatures.normalize_mean_varc                 C   s  | d}| jdu rt|}n@| jdkr!ttj|ddd}n/| jdkr2tjt|ddd}n| jdkrHttjt|d ddd}ntd	| j | jdur\t|| j}| j	d
kri| j
||d}n| j	dkru| j||d}|}| jr|dkrtj||j|jd}n/tj|ddd}t|t| }t|}| jd
kr| j
||d}n| jdkr| j||d}tj||j|gdd}| jdur| d| jkrtd| d d| j ||fS )a\  Convert input batch of C-channel spectrograms into
        a batch of time-frequency features with dimension num_feat.
        The output number of channels may be the same as input, or
        reduced to 1, e.g., if averaging over magnitude and not appending individual IPDs.

        Args:
            input: Spectrogram for C channels with F subbands and N time frames, (B, C, F, N)
            input_length: Length of valid entries along the time dimension, shape (B,)

        Returns:
            num_feat_channels channels with num_feat features, shape (B, num_feat_channels, num_feat, N)
        r   Nabs_meanT)axisrC   mean_absrmsr   zUnexpected magnitude reduction r   r/   r   )dtypedevice)rS   zNumber of channels in features z5 is different than the configured number of channels )sizer   rE   absr   rK   r:   r   rJ   r   rP   rQ   r   
zeros_likerV   rW   angler   r   catexpandshaper   RuntimeError)r"   r0   r1   r   magfeaturesipd	spec_meanr$   r$   r%   forward   sB   




"




z)SpectrogramToMultichannelFeatures.forward)NNNFNNr   )N)NrG   )r!   
__module____qualname____doc__intr   strfloatboolr   propertyr   r	   r4   r8   r9   r;   staticmethodrE   TensorrF   classmethodrO   r   r
   tupler   rP   rQ   rd   __classcell__r$   r$   r#   r%   r      s    	,
$
	
	&r   )typingr   r   rE   #nemo.collections.audio.losses.audior   (nemo.collections.audio.parts.utils.audior   nemo.core.classesr   r   nemo.core.neural_typesr   r	   r
   
nemo.utilsr   r   r$   r$   r$   r%   <module>   s   