o
    ~Ÿi,  ã                	   @   s|  d Z ddlZddlmZ dgZG dd„ deƒZedkr¼ddlZej 	¡ r&dndZ
d	Zd
ZdgZdD ]ZZeeeed ¡  e
¡ZedkrNe edeeƒ¡ ¡ ne ee¡ e
¡Ze ¡ ( eeƒZeejƒ e ¡ Zeejƒ edv r|e e¡Zeejƒ W d  ƒ n1 s†w   Y  q1e d¡\ZZeeed ¡ Ze ¡  eeƒZW d  ƒ n1 s®w   Y  e  dee¡ dS dS )z7WavLM + K-means (see https://arxiv.org/abs/2312.09747).é    N)ÚCodecÚWavLMKmeansc                       sN   e Zd ZddgZd‡ fdd„	Ze ¡ dd„ ƒZdd	„ Zd
d„ Z	dd„ Z
‡  ZS )r   ©é   )é   é   r   Úreconstructc                    sˆ   zdd l }W n ty   tdƒ‚w tƒ  |d|¡ || _d| _tjjdd|d| _	|dkr8d | j	_
d | j	_d S |d	krBd | j	_d S d S )
Nr   z‰`pip install git+https://github.com/lucadellalib/speechbrain@50ffdc772c0d977390025ee7787735db9b92488c#egg=speechbrain` to use this modulei€>  i   z!lucadellalib/discrete-wavlm-codecÚdiscrete_wavlm_large)Úrepo_or_dirÚmodelÚ	layer_idsÚencodeÚdecode)ÚspeechbrainÚImportErrorÚsuperÚ__init__r   Ú
vocab_sizeÚtorchÚhubÚloadr   ÚdequantizerÚvocoderÚencoder)ÚselfÚsample_rateÚmoder   r   ©Ú	__class__© úL/home/ubuntu/.local/lib/python3.10/site-packages/audiocodecs/wavlm_kmeans.pyr      s*   ÿÿýÿzWavLMKmeans.__init__c                 C   sf   t t| j ¡  ¡ ƒƒj}tj| j|d}|d d …d f  	dt
| jƒ¡ ¡ }| j |¡}| dd¡}|S )N)Údeviceéÿÿÿÿr   )ÚnextÚiterr   Ú
state_dictÚvaluesr!   r   Úaranger   ÚexpandÚlenr   ÚcloneÚtoks_to_qfeatsÚmovedim)r   r!   ÚtoksÚembsr   r   r    r.   6   s   "zWavLMKmeans.embsc                 C   s   | j  |¡}| j  |¡}|S )N)r   Úsig_to_featsÚfeats_to_toks)r   ÚsigÚlengthÚfeatsr-   r   r   r    Ú_sig_to_toks@   s   zWavLMKmeans._sig_to_toksc                 C   s   | j  |¡jdd}|S )Nr"   )Údim)r   r/   Úmean)r   r1   r2   r3   r   r   r    Ú_sig_to_featsG   s   zWavLMKmeans._sig_to_featsc                 C   s4   | j  |¡}| j  |¡}| j  |¡d d …df }|S )Nr   )r   r+   Úqfeats_to_featsÚfeats_to_sig)r   r-   r2   Úqfeatsr3   r1   r   r   r    Ú_toks_to_sigM   s   zWavLMKmeans._toks_to_sig)r   r   )Ú__name__Ú
__module__Ú__qualname__Ú	LAYER_IDSr   r   Úno_gradr.   r4   r7   r;   Ú__classcell__r   r   r   r    r      s    
	Ú__main__ÚcudaÚcpui'  é   r   )r   r   r   )r   r   r   é
   )r   r   zexample.wav)r   zreconstruction.wav)!Ú__doc__r   Úaudiocodecs.codecr   Ú__all__r   r<   Ú
torchaudiorC   Úis_availabler!   r   Ú
batch_sizer   r   ÚevalÚtoÚcodecÚzerosr)   ÚlongÚrandnÚinputr@   ÚoutputÚprintÚshaper.   r/   r   r1   Úrec_sigÚsaver   r   r   r    Ú<module>   sJ   ;ÿÿ
ü




€ù€	

ÿâ