o
    㥵i                     @   s:  d dl mZ d dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZmZ d dlmZ d dlmZ d dlmZ ejeddd	 d d
lmZ ede d"ddZe	 e ejdddejdeddejdddejeddejdddejdddejdddddd  Zed!kre  dS dS )#    )PathN)compose
initialize)instantiate)logger)	OmegaConfz.project-rootT)	indicator
pythonpath)AUDIO_EXTENSIONSevalcudac                 C   s   t jjj   tddd t| d}W d    n1 sw   Y  t|}t	j
||ddd}d|v r9|d }tdd	 |D rKd
d | D }|j|ddd}|  || td|  |S )Nz1.3z../../configs)version_baseconfig_path)config_nameT)map_locationmmapweights_only
state_dictc                 s   s    | ]}d |v V  qdS )	generatorN ).0kr   r   T/home/ubuntu/.local/lib/python3.10/site-packages/fish_speech/models/dac/inference.py	<genexpr>#   s    zload_model.<locals>.<genexpr>c                 S   s&   i | ]\}}d |v r| d d|qS )z
generator. )replace)r   r   vr   r   r   
<dictcomp>$   s
    zload_model.<locals>.<dictcomp>F)strictassignzLoaded model: )hydracoreglobal_hydraGlobalHydrainstanceclearr   r   r   torchloadanyitemsload_state_dictr   tor   info)r   checkpoint_pathdevicecfgmodelr   resultr   r   r   
load_model   s&   
r2   z--input-pathz-iztest.wav)exists	path_type)defaulttypez--output-pathz-ozfake.wav)r4   z--config-namemodded_dac_vq)r5   z--checkpoint-pathz'checkpoints/openaudio-s1-mini/codec.pthz--devicez-dc                 C   s  t |||d}| jtv rtd|   tt| \}}|jd dkr+|j	ddd}tj
|||j}|d  |}td|jd |j d	d
 tj|jd g|tjd}	|||	\}
}|
jdkri|
d }
td|
j  t|d|
   n>| jdkrtd|   t| }
t|
| }
|
jdksJ d|
j tj|
jd g|tjd}ntd|  ||
|\}}	|jd |j }td|j d|d	d|
jd  d|
jd | d	 |d    }t|||j td|  d S )N)r.   z&Processing in-place reconstruction of r      T)keepdimzLoaded audio with    z.2fz seconds)r.   dtype   zGenerated indices of shape z.npyz$Processing precomputed indices from zExpected 2D indices, got zUnknown input type: zGenerated audio of shape z, equivalent to z seconds from z features, features/second: )r   r   zSaved audio to )r2   suffixr
   r   r,   
torchaudior'   strshapemean
functionalresamplesample_rater+   r&   tensorlongencodendimnpsavewith_suffixcpunumpy
from_numpy
ValueErrordecodefloatsfwrite)
input_pathoutput_pathr   r-   r.   r0   audiosraudiosaudio_lengthsindicesindices_lensfake_audios
audio_time
fake_audior   r   r   main2   s@   



0r`   __main__)r   )pathlibr   clickr    rN   rJ   pyrootutils	soundfilerS   r&   r?   r   r   hydra.utilsr   logurur   	omegaconfr   
setup_root__file__fish_speech.utils.filer
   register_new_resolverr   r2   no_gradcommandoptionr`   __name__r   r   r   r   <module>   sR    
3
