o
    @Ti	                     @   sR   d dl Z d dlmZ d dlmZ ddlmZ ddlmZm	Z	m
Z
 G dd dZdS )	    N)Vocos)snapshot_download   )LinaCodecModel)
load_audioload_vocodervocodec                   @   sN   e Zd ZdddZe dd Ze ejdejddd	 Z	d
d Z
dS )	LinaCodecNc                 C   s   |d u rt d}tj| d| dd  }|| d |j  ddg|_t	| d }|
t| d	 || _|| _d S )
NzYatharthS/LinaCodecz/config.yamlz/model.safetensors)config_pathweights_pathz/wavlm_encoder.pth   	   z/vocoder/config.yamlz/vocoder/pytorch_model.bin)r   r   from_pretrainedevalcudaload_distilled_wavlmwavlm_modeldistilled_layersr   from_hparamsload_state_dicttorchloadmodelvocos)self
model_pathr   r    r   C/home/ubuntu/.local/lib/python3.10/site-packages/linacodec/codec.py__init__   s   "


zLinaCodec.__init__c                 C   s.   t || jjjd }| j|}|j|jfS )zxencodes audio into discrete content tokens at a rate of 12.5 t/s or 25 t/s and 128 dim global embedding, single codebook)sample_rate)r   r   configr   r   encodecontent_token_indicesglobal_embedding)r   
audio_pathaudiofeaturesr   r   r   r!      s   zLinaCodec.encoder   )device_typedtypec                 C   s&   | j j||d}t| j|d}|S )z0decodes tokens and embedding into 48khz waveform)r"   r#   r   )r   decoder   r   	unsqueeze)r   content_tokensr#   mel_spectrogramwaveformr   r   r   r)   %   s   zLinaCodec.decodec                 C   s,   |  |\}}|  |\}}| ||}|S )zTconverts voice timbre, will keep content of source file but timbre of reference file)r!   r)   )r   source_filereference_filespeech_tokensr#   ref_speech_tokensref_global_embeddingr%   r   r   r   convert_voice0   s   zLinaCodec.convert_voice)N)__name__
__module____qualname__r   r   no_gradr!   autocastfloat16r)   r3   r   r   r   r   r	      s    

	r	   )r   linacodec.vocoder.vocosr   huggingface_hubr   r   r   utilr   r   r   r	   r   r   r   r   <module>   s    