o
    Si                     @   s   d dl mZmZ d dlmZmZ d dlZd dlm	Z	m
Z
 d dlmZmZmZmZmZ eG dd dZdefd	ejd
ededefddZdddddddefdejdededededededededefddZe
G d d! d!e	ZdS )"    )asdict	dataclass)AnyDictN)FeatureExtractorregister_extractor)EPSILONLOG_EPSILONSecondscompute_num_framesis_module_availablec                   @   s   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dZeed< dZeed< dZeed< deeef fddZedeeef dd fddZdS )LibrosaFbankConfiga  Default librosa config with values consistent with various TTS projects.

    This config is intended for use with popular TTS projects such as [ParallelWaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN)
    Warning: You may need to normalize your features.
    i"V  sampling_rate   fft_size   hop_sizeN
win_lengthhannwindowP   num_mel_binsfmin  fmaxreturnc                 C   s   t | S N)r   self r   Q/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/features/librosa_fbank.pyto_dict!      zLibrosaFbankConfig.to_dictdatac                 C   s   t di | S )Nr   )r   )r#   r   r   r    	from_dict$   s   zLibrosaFbankConfig.from_dict)__name__
__module____qualname____doc__r   int__annotations__r   r   r   r   strr   r   r   r   r   r!   staticmethodr$   r   r   r   r    r      s   
  r      featsexpected_num_framesabs_tol	pad_valuec                 C   s   | j d | }d|  k r|krn n| d | } | S | |  kr&dk r8n ntj| d| fdfdtd} | S t||krKtd| d| j d  | S )Nr   )r   r   constant)modeconstant_valuesz	Expected z  source_feats; feats.shape[0] = )shapenppadr	   abs
ValueError)r.   r/   r0   r1   frames_diffr   r   r    pad_or_truncate_features)   s"   r;   r   r   r   r   r   audior   r   r   r   r   r   r   r   epsc
              	   C   s  t dr	ddl}
ntdt| jdkr)| jd dks$J d| j d| d } nt| jdks9J d| j d|
j| ||||d	d
}t|j}|du rPdn|}|du rZ|d n|}|
j	j
|||||d}tt|	t||j}tt| | || |d}t||}|S )a  Compute log-Mel filterbank feature.

    Args:
        audio (ndarray): Audio signal (T,).
        sampling_rate (int): Sampling rate.
        fft_size (int): FFT size.
        hop_size (int): Hop size.
        win_length (int): Window length. If set to None, it will be the same as fft_size.
        window (str): Window function type.
        num_mel_bins (int): Number of mel basis.
        fmin (int): Minimum frequency in mel basis calculation.
        fmax (int): Maximum frequency in mel basis calculation.
        eps (float): Epsilon value to avoid inf in log calculation.
    Returns:
        ndarray: Log Mel filterbank feature (#source_feats, num_mel_bins).
    librosar   NzULibrosa is not installed. Please install librosa before using LibrosaFbank extractor.   r-   z?LibrosaFbank works only with single-channel recordings (shape: )reflect)n_fft
hop_lengthr   r   pad_mode)srrB   n_melsr   r   )durationframe_shiftr   )r   r>   ImportErrorlenr5   stftr6   r8   Tfiltersmellog10maximumdotr   r;   )r<   r   r   r   r   r   r   r   r   r=   r>   x_stftspc	mel_basisr.   r/   r   r   r    logmelfilterbankB   sF   




rU   c                	   @   s   e Zd ZdZdZeZedefddZ	de
de
fddZd	ejde
dejfd
dZedejdejdedejfddZedejdefddZdS )LibrosaFbankzLibrosa fbank feature extractor

    Differs from Fbank extractor in that it uses librosa backend for stft and mel scale calculations.
    It can be easily configured to be compatible with existing speech-related projects that use librosa features.
    zlibrosa-fbankr   c                 C   s   | j j| j j S r   )configr   r   r   r   r   r    rH      s   zLibrosaFbank.frame_shiftr   c                 C   s   | j jS r   )rW   r   )r   r   r   r   r    feature_dim   r"   zLibrosaFbank.feature_dimsamplesc                 C   s&   || j jksJ t|fi t| j S r   )rW   r   rU   r   )r   rY   r   r   r   r    extract   s   zLibrosaFbank.extract
features_a
features_benergy_scaling_factor_bc              
   C   s&   t t tt | |t |  S r   )r6   logrP   r   exp)r[   r\   r]   r   r   r    mix   s   zLibrosaFbank.mixfeaturesc                 C   s   t tt| S r   )floatr6   sumr_   )ra   r   r   r    compute_energy   s   zLibrosaFbank.compute_energyN)r%   r&   r'   r(   namer   config_typepropertyr
   rH   r)   rX   r6   ndarrayrZ   r,   rb   r`   rd   r   r   r   r    rV      s(    rV   )dataclassesr   r   typingr   r   numpyr6   lhotse.features.baser   r   lhotse.utilsr   r	   r
   r   r   r   rh   r)   rb   r;   r+   rU   rV   r   r   r   r    <module>   sb    	
	

H