o
    Si                     @   sv   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ G dd dej	Z
G dd	 d	e
ZG d
d de
ZdS )    )ListN)EncodecModel)nn)safe_logc                   @   s&   e Zd ZdZdejdejfddZdS )FeatureExtractorz"Base class for feature extractors.audioreturnc                 K   s   t d)a.  
        Extract features from the given audio.

        Args:
            audio (Tensor): Input audio waveform.

        Returns:
            Tensor: Extracted features of shape (B, C, L), where B is the batch size,
                    C denotes output features, and L is the sequence length.
        z-Subclasses must implement the forward method.)NotImplementedError)selfr   kwargs r   L/home/ubuntu/.local/lib/python3.10/site-packages/vocos/feature_extractors.pyforward   s   zFeatureExtractor.forwardN)__name__
__module____qualname____doc__torchTensorr   r   r   r   r   r      s    r   c                       s&   e Zd Zd
 fdd	Zdd	 Z  ZS )MelSpectrogramFeatures]        d   centerc                    sB   t    |dvrtd|| _tjj|||||dkdd| _d S )N)r   samez#Padding must be 'center' or 'same'.r      )sample_raten_fft
hop_lengthn_melsr   power)super__init__
ValueErrorpadding
torchaudio
transformsMelSpectrogrammel_spec)r
   r   r   r   r    r%   	__class__r   r   r#      s   
zMelSpectrogramFeatures.__init__c                 K   sP   | j dkr| jj| jj }tjjj||d |d fdd}| |}t|}|S )Nr      reflect)mode)	r%   r)   
win_lengthr   r   r   
functionalpadr   )r
   r   r   r1   melfeaturesr   r   r   r   +   s   
 
zMelSpectrogramFeatures.forward)r   r   r   r   r   )r   r   r   r#   r   __classcell__r   r   r*   r   r      s    r   c                       sZ   e Zd Zdg ddfdedee def fddZe	 d	d
 Z
dejfddZ  ZS )EncodecFeaturesencodec_24khz)g      ?g      @g      @g      (@Fencodec_model
bandwidthstrain_codebooksc                    s   t    |dkrtj}n|dkrtj}ntd| d|dd| _| j D ]}d|_q(| jj	j
| jjt|d| _tjd	d
 | jj	jjd | j D dd}tjj||d| _|| _d S )Nr6   encodec_48khzzUnsupported encodec_model: z<. Supported options are 'encodec_24khz' and 'encodec_48khz'.T)
pretrainedF)	bandwidthc                 S   s   g | ]}|j qS r   )codebook).0vqr   r   r   
<listcomp>J   s    z,EncodecFeatures.__init__.<locals>.<listcomp>r   dim)requires_grad)r"   r#   r   encodec_model_24khzencodec_model_48khzr$   encodec
parametersrC   	quantizer get_num_quantizers_for_bandwidth
frame_ratemaxnum_qr   catr?   layersr   	Parametercodebook_weightsr8   )r
   r7   r8   r9   rF   paramrP   r*   r   r   r#   5   s"   

*
zEncodecFeatures.__init__c                 C   s4   | d}| j|}| jj|| jj| jj}|S )Nr   )	unsqueezerF   encoderrH   encoderJ   r<   )r
   r   embcodesr   r   r   get_encodec_codesN   s   
z!EncodecFeatures.get_encodec_codesr   c                 K   s   | d}|d u rtd| j  | j| j|  | |}tjd| jj	j
t| | jj	j
|jd}||ddd }tjj|| jjdd}|ddS )	Nbandwidth_idz'The 'bandwidth_id' argument is requiredr   )devicer   rA   r,   )getr$   rF   evalset_target_bandwidthr8   rW   r   arangerH   binslenrY   viewr   r0   	embeddingrP   sum	transpose)r
   r   r   rX   rV   offsetsembeddings_idxsr3   r   r   r   r   U   s   


zEncodecFeatures.forward)r   r   r   strr   floatboolr#   r   no_gradrW   r   r   r4   r   r   r*   r   r5   4   s    
r5   )typingr   r   r&   rF   r   r   vocos.modulesr   Moduler   r   r5   r   r   r   r   <module>   s    