o
    [i*                     @   s   d dl mZ d dlZd dlmZ d dlmZ ddddd	d
d dddddd fddZdddd	d
d dddd f	ddZ		d%ddZ	dddd	d
d dddd f	ddZ
dd Zdd Zd&dd Zd'd!d"Zd#d$ ZdS )(    )divisionN)sigproc)dct>  皙?{Gz?         
ףp=
?   Tc                 C      t | fS Nnumpyonesx r   Y/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/python_speech_features/base.py<lambda>
       r   c                 C   sp   t | ||||||||	|
\}}t|}t|ddddddd|f }t||
}|r6t||dddf< |S )aX  Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
          ortho)typeaxisnormNr   )fbankr   logr   lifter)signal
sampleratewinlenwinstepnumcepnfiltnfftlowfreqhighfreqpreemph	ceplifterappendEnergywinfuncfeatenergyr   r   r   mfcc   s   
 
r0   c                 C   r   r   r   r   r   r   r   r   %   r   c
                 C   s   |p|d }t | |} t | || || |	}
t |
|}t|d}t|dkttj	|}t
|||||}t||j}t|dkttj	|}||fS )a  Compute Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
    :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
        second return value is the energy in each frame (total energy, unwindowed)
    r   r   r   )r   preemphasisframesigpowspecr   sumwherefinfofloatepsget_filterbanksdotT)r!   r"   r#   r$   r&   r'   r(   r)   r*   r-   framespspecr/   fbr.   r   r   r   r   #   s   r   c	              
   C   s&   t | ||||||||	\}	}
t|	S )a  Compute log Mel-filterbank energy features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
    )r   r   r   )r!   r"   r#   r$   r&   r'   r(   r)   r*   r.   r/   r   r   r   logfbankB   s   
r?   c                 C   r   r   r   r   r   r   r   r   V   r   c
              
   C   s   |p|d }t | |} t | || || |	}
t |
|}t|dkttj|}t	|||||}t
||j}ttd|d t|dt|ddf}t
|| |j| S )aJ  Compute Spectral Subband Centroid features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
    :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
    r   r   r   )r   r1   r2   r3   r   r5   r6   r7   r8   r9   r:   r;   tilelinspacesize)r!   r"   r#   r$   r&   r'   r(   r)   r*   r-   r<   r=   r>   r.   Rr   r   r   sscT   s   .rD   c                 C   s   dt d| d   S )zConvert a value in Hertz to Mels

    :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
    :returns: a value in Mels. If an array was passed in, an identical sized array is returned.
    i#
  r   g     @)r   log10)hzr   r   r   hz2melq   s   rG   c                 C   s   dd| d  d  S )zConvert a value in Mels to Hertz

    :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
    :returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
    i  
   g     F@r   r   )melr   r   r   mel2hzy   s   rJ      c                 C   s*  |p|d }||d ksJ dt |}t |}t||| d }t|d t| | }t| |d d g}	td| D ]T}
tt||
 t||
d  D ]}|||
  ||
d  ||
   |	|
|f< qOtt||
d  t||
d  D ]}||
d  | ||
d  ||
d    |	|
|f< qwq>|	S )ax  Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
    to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)

    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
    :param lowfreq: lowest band edge of mel filters, default 0 Hz
    :param highfreq: highest band edge of mel filters, default samplerate/2
    :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
    r   z%highfreq is greater than samplerate/2r   r   )rG   r   rA   floorrJ   zerosrangeint)r&   r'   r"   r(   r)   lowmelhighmel	melpointsbinr   jir   r   r   r9      s   "*&2r9   c                 C   sL   |dkr$t | \}}t |}d|d t t j| |   }||  S | S )a8  Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the
    magnitude of the high frequency DCT coeffs.

    :param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size.
    :param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter.
    r   r   g       @)r   shapearangesinpi)cepstraLnframesncoeffnliftr   r   r   r       s   
 r    c              	   C   s   |dk rt dt| }dtdd td|d D  }t| }tj| ||fdfdd}t|D ]}tt| |d |||d|  d  | ||< q1|S )	a  Compute delta features from a feature vector sequence.

    :param feat: A numpy array of size (NUMFRAMES by number of features) containing features. Each row holds 1 feature vector.
    :param N: For each frame, calculate delta features based on preceding and following N frames
    :returns: A numpy array of size (NUMFRAMES by number of features) containing delta features. Each row holds 1 delta feature vector.
    r   zN must be an integer >= 1r   c                 S   s   g | ]}|d  qS )r   r   ).0rU   r   r   r   
<listcomp>   s    zdelta.<locals>.<listcomp>)r   r   edge)mode)	
ValueErrorlenr4   rN   r   
empty_likepadr:   rW   )r.   N	NUMFRAMESdenominator
delta_featpaddedtr   r   r   delta   s    
8rn   )r   r   r   r	   r
   r   Nr   )rK   r
   r   r   N)r   )
__future__r   r   python_speech_featuresr   scipy.fftpackr   r0   r   r?   rD   rG   rJ   r9   r    rn   r   r   r   r   <module>   s0   







