o
    oi                     @   s.   d dl Z d dlZd dlmZ G dd dZdS )    N)AudioCodecModelc                   @   s.   e Zd Zd
dedefddZdefddZd	S )	NanoCoder,nvidia/nemo-nano-codec-22khz-0.6kbps-12.5fpsdevice_rancmodel_idc                 C   sD   t | | _td| | _tj| j | j| j| _d S )Nzcuda:)	r   from_pretrainedevalnemo_codec_modeltorchdevicecuda
set_deviceto)selfr   r    r   E/home/ubuntu/kanitts-2-dataset-pipeline/utils/nanocodec/nano_codec.py__init__   s   zNanoCoder.__init__returnc                 C   s   t |jdd}|jt jd}|| j}t |d jd g| j}t   | j	j
||d\}}W d    n1 s>w   Y  | }|d d |d d |d d |d d | d	}|S )
Nr   )dim)dtype)audio	audio_lencpu         )nano_layer_1nano_layer_2nano_layer_3nano_layer_4encoded_len)r
   
from_numpy	unsqueezer   float32r   tensorshapeinference_moder	   encodesqueezenumpyitem)r   waveformaudio_tensorr   encoded_tokensr    encoded_audior   r   r   __call__   s   
zNanoCoder.__call__N)r   )__name__
__module____qualname__intstrr   dictr/   r   r   r   r   r      s    r   )r
   r)   npnemo.collections.tts.modelsr   r   r   r   r   r   <module>   s    