o
    ~i                  	   @   s  d Z ddlZddlZddlZddlmZ ddlmZ dgZG dd deZ	e
dkrddlZej r4dnd	Zd
ZdZdD ]WZe	eed eZedkrVeedd neeeeZe ( eeZeej e Zeej edv reeZeej W d   n1 sw   Y  q<ed\Z Ze	e Ze  ee Z!W d   n1 sw   Y  e"de!e dS dS )z4WavTokenizer (see https://arxiv.org/abs/2408.16532).    N)snapshot_download)CodecWavTokenizerc                       sf   e Zd ZddgZddgZddgZ				d fdd		Ze d
d Z	dd Z
dd Zdd Z  ZS )r   )novateur/WavTokenizer-large-unify-40tokenz*novateur/WavTokenizer-large-speech-75tokenIwavtokenizer_smalldata_frame40_3s_nq1_code4096_dim512_kmeans200_attn.yamlzIwavtokenizer_smalldata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml%wavtokenizer_large_unify_600_24k.ckptz%wavtokenizer_large_speech_320_v2.ckptreconstructc                    s   z&t jt jt dd tjD } fddtjD t_dd l}|t_W n ty1   tdw t 	|d| d| _
d| _t|d	}t j||}	td
d	}t j||}
|j|
|	| _|dkrpd | jjj_d | j_d S |dkr|d | jjj_d S d S )Nc                 S   s   g | ]}|qS  r	   .0xr	   r	   L/home/ubuntu/.local/lib/python3.10/site-packages/audiocodecs/wavtokenizer.py
<listcomp>8   s    z)WavTokenizer.__init__.<locals>.<listcomp>c                    s   g | ]} |vr|qS r	   r	   r
   root_dirr	   r   r   9   s    r   zZ`pip install git+https://github.com/lucadellalib/WavTokenizer.git@main` to use this modulei]     i   )repo_idznovateur/WavTokenizerencodedecode)ospathdirnamerealpath__file__syswavtokenizerImportErrorsuper__init__num_codebooks
vocab_sizer   joinr   from_pretrained0802modelfeature_extractorencodecdecoderheadencoder)selfsample_ratemodesourceconfig
checkpointsys_pathr   r   checkpoint_pathconfig_path	__class__r   r   r   -   s6   


zWavTokenizer.__init__c                 C   s"   | j jjjjjd j}|d  }|S )Nr   )r#   r$   r%   	quantizervqlayerscodebook)r)   embsr	   r	   r   r8   U   s   zWavTokenizer.embsc                 C   s$   | j j|dd\}}|dd}|S )Nr   bandwidth_id)r#   r   movedim)r)   siglength_toksr	   r	   r   _sig_to_toks\   s   zWavTokenizer._sig_to_toksc                 C   s,   | j jj|d d d f }|dd}|S )Nr;   )r#   r$   r%   r(   r<   )r)   r=   r>   featsr	   r	   r   _sig_to_featsc   s   zWavTokenizer._sig_to_featsc                 C   s4   | j |dd}| j j|tjd|jdd}|S )Nr;   r   )devicer9   )r#   codes_to_featuresr<   r   torchtensorrE   )r)   r@   r>   rC   r=   r	   r	   r   _toks_to_sigj   s
   zWavTokenizer._toks_to_sig)r   r   r   r   )__name__
__module____qualname__SOURCESCONFIGSCHECKPOINTSr   rG   no_gradr8   rA   rD   rI   __classcell__r	   r	   r2   r   r      s(    (
__main__cudacpui'     )r   r   r   )r+   r   
   r   )r   r   zexample.wavzreconstruction.wav)#__doc__r   r   rG   huggingface_hubr   audiocodecs.codecr   __all__r   rJ   
torchaudiorS   is_availablerE   r*   
batch_sizer+   evaltocodeczeroslongrandninputrP   outputprintshaper8   sig_to_featsloadr=   rec_sigsaver	   r	   r	   r   <module>   sL   U





	

