o
    SiB                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
Zd dlZd dlmZmZ d dlmZmZmZmZ d dlmZmZmZmZ eG dd dZeG d	d
 d
eZeG dd dZeG dd deZeG dd dZeG dd deZeG dd dZ eG dd deZ!			d"dede	ej"ej#eej" eej# f de$dedee	ej"ej#f  de	e%ej&f de	ej"ej#eej" eej# f fd d!Z'dS )#    N)	dataclass)AnyDictListOptionalSequenceUnion)FeatureExtractorregister_extractor)Wav2LogFilterBankWav2LogSpecWav2MFCCWav2Spec)EPSILONSecondsasdict_nonullcompute_num_frames_from_samplesc                   @   s>  e Zd ZU dZeed< dZeed< dZeed< dZ	e
ed< dZe
ed	< d
Zeed< dZeed< dZeed< dZe
ed< eZeed< dZe
ed< dZe
ed< dZe
ed< dZeed< dZeed< dZeed< dZee ed< dZe
ed< dZe
ed< d Zeed!< d"d# Zd$e ee!f fd%d&Z"e#d'e ee!f d$d fd(d)Z$dS )*FbankConfig>  sampling_rate皙?frame_length{Gz?frame_shiftTround_to_power_of_tworemove_dc_offset
ףp=
?preemph_coeffpoveywindow_type        ditherF
snip_edgesenergy_floor
raw_energy
use_energyuse_fft_mag      4@low_freq      y	high_freqP   num_filtersNnum_mel_binsnorm_filterstorchaudio_compatible_mel_scalecpudevicec                 C   0   | j d ur| j | _d | _ | jrtd d S d S Nzc`snip_edges` is set to True, which may cause issues in duration to num-frames conversion in Lhotse.r-   r,   r"   warningswarnself r9   T/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/features/kaldi/extractors.py__post_init__.      
zFbankConfig.__post_init__returnc                 C      t | S Nr   r7   r9   r9   r:   to_dict:      zFbankConfig.to_dictdatac                 C      t di | S Nr9   )r   rC   r9   r9   r:   	from_dict=      zFbankConfig.from_dict)%__name__
__module____qualname__r   int__annotations__r   r   r   r   boolr   r   floatr   strr!   r"   r   r#   r$   r%   r&   r(   r*   r,   r-   r   r.   r/   r1   r;   r   r   rA   staticmethodrG   r9   r9   r9   r:   r      s2   
  r   c                       sN  e Zd ZdZeZddee f fddZede	e
ejf fddZedefd	d
Zde
fddZdedefddZde	ejejf dede	ejejf fddZ	dde	ejejeej eej f dedee	ejejf  de	ejejeej eej f fddZedejdejdedejfddZedejdefddZ  ZS ) Fbankzkaldi-fbankNconfigc                    B   t  j|d | j }|d tdi || j | _	d S N)rS   r1   r9   )
super__init__rS   rA   popr   tor1   eval	extractorr8   rS   config_dict	__class__r9   r:   rW   G      

 zFbank.__init__r=   c                 C      | j jS r?   rS   r1   r7   r9   r9   r:   r1   M      zFbank.devicec                 C   ra   r?   rS   r   r7   r9   r9   r:   r   Q   rc   zFbank.frame_shiftr1   c                 C   s   || j _| j| d S r?   )rS   r1   r[   rY   )r8   r1   r9   r9   r:   rY   U   s   zFbank.tor   c                 C   ra   r?   )rS   r,   r8   r   r9   r9   r:   feature_dimY   rB   zFbank.feature_dimsamplesc                 C      || j jksJ d| j j d| dd}t|tjs"t|}d}|jdkr,|d}| |	| j
d }|r?|  S |S )Nz)Fbank was instantiated for sampling_rate , but sampling_rate=l was passed to extract(). Note you can use CutSet/RecordingSet.resample() to change the audio sampling rate.FT   r   rS   r   
isinstancetorchTensor
from_numpyndim	unsqueezer[   rY   r1   r0   numpyr8   rg   r   is_numpyfeatsr9   r9   r:   extract\   "   


zFbank.extractlengthsc                 C      t | j||| j|| jdS N)r   ry   r1   _extract_batchr[   r   r1   r8   rg   r   ry   r9   r9   r:   extract_batchu      zFbank.extract_batch
features_a
features_benergy_scaling_factor_bc              
   C   s&   t t tt | |t |  S r?   )nplogmaximumr   expr   r   r   r9   r9   r:   mix   s   z	Fbank.mixfeaturesc                 C   s   t tt| S r?   )rO   r   sumr   r   r9   r9   r:   compute_energy   s   zFbank.compute_energyr?   )rI   rJ   rK   namer   config_typer   rW   propertyr   rP   rn   r1   r   r   rY   rL   rf   r   ndarrayro   rw   r   r   r   rQ   rO   r   r   __classcell__r9   r9   r^   r:   rR   B   sR    

rR   c                   @   sV  e Zd ZU dZeed< dZeed< dZeed< dZ	e
ed< dZe
ed	< d
Zeed< dZeed< dZeed< dZe
ed< eZeed< dZe
ed< dZe
ed< dZe
ed< dZeed< dZeed< dZeed< dZe
ed< dZee ed< dZe
ed< d Zeed!< d"Zeed#< d$Z eed%< d&d' Z!d(e"ee#f fd)d*Z$e%d+e"ee#f d(d fd,d-Z&dS ).
MfccConfigr   r   r   r   r   r   Tr   r   r   r   r   r   r    r!   Fr"   r#   r$   r%   r&   r'   r(   r)   r*      r,   r/   Nr-   r.      num_ceps   cepstral_lifterr0   r1   c                 C   r2   r3   r4   r7   r9   r9   r:   r;      r<   zMfccConfig.__post_init__r=   c                 C   r>   r?   r@   r7   r9   r9   r:   rA      rB   zMfccConfig.to_dictrC   c                 C   rD   rE   )r   rF   r9   r9   r:   rG      rH   zMfccConfig.from_dict)'rI   rJ   rK   r   rL   rM   r   r   r   r   rN   r   r   rO   r   rP   r!   r"   r   r#   r$   r%   r&   r(   r*   r,   r/   r-   r   r.   r   r   r1   r;   r   r   rA   rQ   rG   r9   r9   r9   r:   r      s6   
  r   c                       s  e Zd ZdZeZddee f fddZede	e
ejf fddZedefd	d
ZdedefddZde	ejejf dede	ejejf fddZ	dde	ejejeej eej f dedee	ejejf  de	ejejeej eej f fddZ  ZS )Mfccz
kaldi-mfccNrS   c                    rT   rU   )
rV   rW   rS   rA   rX   r   rY   r1   rZ   r[   r\   r^   r9   r:   rW      r`   zMfcc.__init__r=   c                 C   ra   r?   rb   r7   r9   r9   r:   r1      rc   zMfcc.devicec                 C   ra   r?   rd   r7   r9   r9   r:   r      rc   zMfcc.frame_shiftr   c                 C   ra   r?   )rS   r   re   r9   r9   r:   rf      rB   zMfcc.feature_dimrg   c                 C   rh   )Nz(Mfcc was instantiated for sampling_rate ri   rj   FTrk   r   rl   rt   r9   r9   r:   rw      rx   zMfcc.extractry   c                 C   rz   r{   r|   r~   r9   r9   r:   r      r   zMfcc.extract_batchr?   )rI   rJ   rK   r   r   r   r   rW   r   r   rP   rn   r1   r   r   rL   rf   r   r   ro   rw   r   r   r   r   r9   r9   r^   r:   r      s8    
r   c                   @      e Zd ZU dZeed< dZeed< dZeed< dZ	e
ed< dZe
ed	< d
Zeed< dZeed< dZeed< dZe
ed< eZeed< dZe
ed< dZe
ed< dZe
ed< dZeed< dd Zdeeef fddZedeeef dd fddZd S )!SpectrogramConfigr   r   r   r   r   r   Tr   r   r   r   r   r   r    r!   Fr"   r#   r$   r%   r&   r0   r1   c                 C      | j r
td d S d S r3   r"   r5   r6   r7   r9   r9   r:   r;     
   zSpectrogramConfig.__post_init__r=   c                 C   r>   r?   r@   r7   r9   r9   r:   rA     rB   zSpectrogramConfig.to_dictrC   c                 C   rD   rE   )r   rF   r9   r9   r:   rG     rH   zSpectrogramConfig.from_dictNrI   rJ   rK   r   rL   rM   r   r   r   r   rN   r   r   rO   r   rP   r!   r"   r   r#   r$   r%   r&   r1   r;   r   r   rA   rQ   rG   r9   r9   r9   r:   r     &   
  r   c                       @  e Zd ZdZeZddee f fddZede	e
ejf fddZedefd	d
ZdedefddZde	ejejf dede	ejejf fddZ	dde	ejejeej eej f dedee	ejejf  de	ejejeej eej f fddZedejdejdedejfddZedejdefddZ  ZS )Spectrogramzkaldi-spectrogramNrS   c                    rT   rU   )
rV   rW   rS   rA   rX   r   rY   r1   rZ   r[   r\   r^   r9   r:   rW   )  r`   zSpectrogram.__init__r=   c                 C   ra   r?   rb   r7   r9   r9   r:   r1   /  rc   zSpectrogram.devicec                 C   ra   r?   rd   r7   r9   r9   r:   r   3  rc   zSpectrogram.frame_shiftr   c                 C      | j jd d S N   rk   r[   
fft_lengthre   r9   r9   r:   rf   7     zSpectrogram.feature_dimrg   c                 C      || j jksJ d| j j d| dd}t|tjs"t|}d}|jdkr,|d}| |	| j
d }|r?|  S | S Nz/Spectrogram was instantiated for sampling_rate ri   rj   FTrk   r   rl   rt   r9   r9   r:   rw   :  "   


zSpectrogram.extractry   c                 C   rz   r{   r|   r~   r9   r9   r:   r   S  r   zSpectrogram.extract_batchr   r   r   c                 C      | ||  S r?   r9   r   r9   r9   r:   r   d     zSpectrogram.mixr   c                 C      t t| S r?   rO   r   r   r   r9   r9   r:   r   j  rH   zSpectrogram.compute_energyr?   )rI   rJ   rK   r   r   r   r   rW   r   r   rP   rn   r1   r   r   rL   rf   r   r   ro   rw   r   r   r   rQ   rO   r   r   r   r9   r9   r^   r:   r   $  P    

r   c                   @   r   )!LogSpectrogramConfigr   r   r   r   r   r   Tr   r   r   r   r   r   r    r!   Fr"   r#   r$   r%   r&   r0   r1   c                 C   r   r3   r   r7   r9   r9   r:   r;     r   z"LogSpectrogramConfig.__post_init__r=   c                 C   r>   r?   r@   r7   r9   r9   r:   rA     rB   zLogSpectrogramConfig.to_dictrC   c                 C   rD   rE   )r   rF   r9   r9   r:   rG     rH   zLogSpectrogramConfig.from_dictNr   r9   r9   r9   r:   r   o  r   r   c                       r   )LogSpectrogramzkaldi-log-spectrogramNrS   c                    rT   rU   )
rV   rW   rS   rA   rX   r   rY   r1   rZ   r[   r\   r^   r9   r:   rW     r`   zLogSpectrogram.__init__r=   c                 C   ra   r?   rb   r7   r9   r9   r:   r1     rc   zLogSpectrogram.devicec                 C   ra   r?   rd   r7   r9   r9   r:   r     rc   zLogSpectrogram.frame_shiftr   c                 C   r   r   r   re   r9   r9   r:   rf     r   zLogSpectrogram.feature_dimrg   c                 C   r   r   rl   rt   r9   r9   r:   rw     r   zLogSpectrogram.extractry   c                 C   rz   r{   r|   r~   r9   r9   r:   r     r   zLogSpectrogram.extract_batchr   r   r   c                 C   r   r?   r9   r   r9   r9   r:   r     r   zLogSpectrogram.mixr   c                 C   r   r?   r   r   r9   r9   r:   r     rH   zLogSpectrogram.compute_energyr?   )rI   rJ   rK   r   r   r   r   rW   r   r   rP   rn   r1   r   r   rL   rf   r   r   ro   rw   r   r   r   rQ   rO   r   r   r   r9   r9   r^   r:   r     r   r   r   r0   r[   rg   r   r   ry   r1   r=   c           	         sb  d}d}|d urfdd|D t |tjsJ dn?t |tr%d}n|jdkr/t|}n|ddg}tdd	 |D rAd}d
d |D } fdd|D tjjj	j
|dd}|j} ||| fddtt|D |sdd D tdkr|rS d S tfdd	dd  D r|rtjddS tjddS S )NFc                    s   g | ]}t | qS r9   )r   ).0l)r   r   r9   r:   
<listcomp>  s    
z"_extract_batch.<locals>.<listcomp>zNIf `lengths` is provided, `samples` must be a batched and padded torch.Tensor.Trk   c                 s   s    | ]	}t |tjV  qd S r?   )rm   rn   ro   r   xr9   r9   r:   	<genexpr>  s    z!_extract_batch.<locals>.<genexpr>c                 S   s.   g | ]}t |tjrt| n| qS r9   )rm   r   r   rn   rp   squeezer   r9   r9   r:   r     s     c                    s    g | ]}t t| jd qS ))num_samplesr   r   )r   lenr   r   )r[   r   r9   r:   r     s    )batch_firstc                    s    g | ]}|d  | f qS r?   r9   )r   i)	feat_lensrv   r9   r:   r     s     c                 S   s   g | ]}|  qS r9   )rs   r   r9   r9   r:   r     s    r   c                 3   s     | ]}|j  d  j kV  qdS )r   N)shape)r   item)resultr9   r:   r     s    )dim)axis)rm   rn   ro   listrq   reshapeanynnutilsrnnpad_sequencer1   rY   ranger   allstackr   )	r[   rg   r   r   ry   r1   input_is_listinput_is_torchinput_devicer9   )r[   r   rv   r   r   r   r:   r}     sR   




r}   )r   Nr0   )(r5   dataclassesr   typingr   r   r   r   r   r   rs   r   rn   lhotse.features.baser	   r
   lhotse.features.kaldi.layersr   r   r   r   lhotse.utilsr   r   r   r   r   rR   r   r   r   r   r   r   r   ro   rL   rP   r1   r}   r9   r9   r9   r:   <module>   sX     *T,@JP	