o
    :i                     @   sz   d dl mZ d dlmZ d dlZd dlZd dlZd dlm	Z	m
Z
 d dlmZmZ eG dd dZe
G dd	 d	e	ZdS )
    )	dataclass)UnionN)FeatureExtractorregister_extractor)Secondscompute_num_framesc                   @   s>   e Zd ZU dZeed< dZeed< dZeed< dZeed< d	S )
VocosFbankConfigi]  sampling_rated   n_melsi   n_fft   
hop_lengthN)	__name__
__module____qualname__r	   int__annotations__r   r   r    r   r   -/home/ubuntu/LuxTTS/zipvoice/utils/feature.pyr      s
   
 r   c                       s   e Zd Zd ZeZddef fddZdd Ze	de
eejf fdd	Zd
edefddZde
ejejf d
ede
ejejf fddZe	defddZ  ZS )
VocosFbank   num_channelsc                    sR   t }t j|d |dv sJ || _tjj| jj| jj	| jj
| jjddd| _d S )N)config)r      Tr   )sample_rater   r   r   centerpower)r   super__init__r   
torchaudio
transformsMelSpectrogramr   r	   r   r   r   fbank)selfr   r   	__class__r   r   r   *   s   zVocosFbank.__init__c                 C   s   |  |}|jdd }|S )NgHz>)min)r#   clamplog)r$   samplemellogmelr   r   r   _feature_fn8   s   
zVocosFbank._feature_fnreturnc                 C      | j jS N)r   devicer$   r   r   r   r1   >   s   zVocosFbank.devicer	   c                 C   r/   r0   )r   r   )r$   r	   r   r   r   feature_dimB   s   zVocosFbank.feature_dimsamplesc                 C   s\  | j j}||ksJ d| d| d}t|tjs!t|}d}t|jdkr.|d}n
|j	dks8J |j| j
dkrL|jd dkrK|jddd}n|jd dksXJ |j| |}|d	|jd	  }t|jd | | j|}|jd |kr|d | }n"|jd |k r|d}tjjj|ddd||jd  fd
dd}|r|  S |S )Nz,Mismatched sampling rate: extractor expects z, got FTr   r   r   )dimkeepdims	replicate)mode)r   r	   
isinstancetorchTensor
from_numpylenshape	unsqueezendimr   meanr-   reshapetr   frame_shiftnn
functionalpadsqueezecpunumpy)r$   r4   r	   expected_sris_numpyr+   
num_framesr   r   r   extractE   sF   




zVocosFbank.extractc                 C   s   | j j| j j S r0   )r   r   r	   r2   r   r   r   rE   v   s   zVocosFbank.frame_shift)r   )r   r   r   namer   config_typer   r   r-   propertyr   strr;   r1   r3   npndarrayr<   rO   r   rE   __classcell__r   r   r%   r   r   $   s"    
1r   )dataclassesr   typingr   rK   rT   r;   r    lhotse.features.baser   r   lhotse.utilsr   r   r   r   r   r   r   r   <module>   s   