o
    ~i                  
   @   s  d Z ddlZddlZddlZddlmZ dgZG dd deZedkrddl	Z	ej
 r.dndZd	Zd
ZdD ]eZeejejD ][\ZZeeeeed eZedkr]eede neeeeZe ( eeZeej e  Z ee j edv re!eZeej W d   n1 sw   Y  q?q6e	"d\Z#Ze#eZ#ee eZe  ee#Z$W d   n1 sw   Y  e	%de$& e dS dS )z4Stable Codec (see https://arxiv.org/abs/2411.19842).    N)CodecStableCodecc                       sp   e Zd ZdgZg dZg dZddddZ					
d fdd	Ze	 dd Z
dd Zdd Zdd Z  ZS )r   #stabilityai/stable-codec-speech-16k)         )@  	=    1x46656_400bps2x15625_700bps4x729_1000bps))r   r   )r   r	   )r   r
   reconstructr   r	   c                    s   z&t jt jt dd tjD } fddtjD t_dd l}|t_W n ty1   tdw t 	|d| || j
v sAJ || jv sHJ || _|| _|j|d| _| j| j||f  |dkrjd | j_d S |d	krtd | j_d S d S )
Nc                 S   s   g | ]}|qS  r   .0xr   r   K/home/ubuntu/.local/lib/python3.10/site-packages/audiocodecs/stablecodec.py
<listcomp>3   s    z(StableCodec.__init__.<locals>.<listcomp>c                    s   g | ]} |vr|qS r   r   r   root_dirr   r   r   4   s    r   zjpip install git+https://github.com/lucadellalib/stable-codec.git@main#egg=stable_codec` to use this modulei>  )pretrained_modelencodedecode)ospathdirnamerealpath__file__sysstable_codecImportErrorsuper__init__NUM_CODEBOOKSVOCAB_SIZESnum_codebooks
vocab_sizer   modelset_posthoc_bottleneckCONFIGSdecoderencoder)selfsample_ratemodesourcer&   r'   sys_pathr    	__class__r   r   r#   (   s.   
zStableCodec.__init__c                    s   t t| j  j}tj| j|d  d d d d f 	dd| j
   fddt| jjjD }tj|dd}|dd}|S )N)devicec                    s$   g | ]\}}|  d |df qS ).N)indices_to_codes)r   k	quantizertoksr   r   r   S   s    z$StableCodec.embs.<locals>.<listcomp>r   dimr   )nextiterr(   
state_dictvaluesr4   torcharanger'   expandr&   clone	enumerateresidual_fsq
quantizerscatmovedim)r-   r4   embsr   r9   r   rJ   L   s    
zStableCodec.embsc                 C   sj   d}|j d | dkrtjj|d||j d |  g}| jj|d d d f dd\}}tj|dd}|S )Ni@  r5   r   Tposthoc_bottleneckr;   )shaperA   nn
functionalpadr(   r   rH   )r-   siglengthwindow_size_r:   r   r   r   _sig_to_toks\   s   
zStableCodec._sig_to_toksc                 C   s0   | j j|d d d f dd\}}|dd}|S )NTrK   r5   )r(   r   rI   )r-   rQ   rR   pre_bottleneck_latentsrT   r   r   r   _sig_to_featsm   s
   
zStableCodec._sig_to_featsc                 C   s:   | j j|dd d d f jdddd}|d d df }|S )N.r5   r;   TrK   r   )r(   r   unbind)r-   r:   rR   rQ   r   r   r   _toks_to_sigv   s
   zStableCodec._toks_to_sig)r   r   r   r	   )__name__
__module____qualname__SOURCESr$   r%   r*   r#   rA   no_gradrJ   rU   rX   rZ   __classcell__r   r   r2   r   r      s$    	$
	__main__cudacpui'  r   )r   r   r   )r/   r&   r'   r   
   )r   r   zexample.wavzreconstruction.wav)'__doc__r   r   rA   audiocodecs.codecr   __all__r   r[   
torchaudiorb   is_availabler4   r.   
batch_sizer/   zipr$   r%   r&   r'   evaltocodeczeroslongrandninputr_   outputprintrM   rJ   sig_to_featsloadrQ   rec_sigsaverc   r   r   r   r   <module>   sd   b








