o
    i*                      @   s  d dl Z de jd< d dlmZ d dlZd dlm  mZ d dl	Z	d dl
m
Z
 d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZmZ ej rQdnej rXdn	ejj r`dndZdZdZdZ dZ!dZ"dZ#dZ$dZ%dZ&e'e(ed)de dZ*ede*j+j, Z-e*j+j.Z/e*j0j1Z2e*j+j3Z3e*j+j4j5Z5e*j+j4j6Z6e*j+j4j7Z7e*j+j4j8Z8e*j+j4j9Z9e*j+j4j:Z:e(e
de de  dZ;dZ<e(ed)dZ=d Z>d!Z?d"d#gd$d%ggZ@d&d'gZAd(ZBe jCDe<se Ee< d)ZFe5d*krd+ZGne5d,krd-ZGee5eFeGd.ZHee2e3\ZIZJee-dCi e/eJe7d/eKe:e8e9e7e6e5d0eKe#d1eId2LeZ+e5d,kr1ejMndZNee+e;eeNeBd3Z+e	'e=\ZOZPeOjQd  d'krSejReOd d(d4ZOeSeReTeOZUeUe&k rieOe& eU ZOePe6krye	jVWePe6ZXeXeOZOeOLeZOeY  e+4eOZZeZ[d d5d'ZZW d   n	1 sw   Y  d Z\ej]d'd e7ed6Z^ej]d'd ej_ed7Z`eAdureAa ndZbe@D ]iZcec\ZdZeebdu reeed nebfd Zgehede6 e8 Zieheee6 e8 Zjehege6 e8 Zkeie\ Zlejme^eZdde\eiddf ej]d'eke7ed6fd'd8Z^ejme`ejnd'elej_ed7ej]d'ekej_ed7fd9d8Z`ejZ\qejme^eZdde\dddf fd'd8Z^ejoe`d e^jQd' e`jQd9  fd(d:Z`e?gZpe3d;kr^eepZqnepgZqerd<ep  erd=eq  e^jQd' ZseY s e+jte^eqese!e"e$ee`d>\ZuZverd?eujQ  euLejMZueu[d d5d'Zwe5d*kreHxewy Zzne5d,kreHew{d y ZzeUe&k rezeU e& Zzeewd  y | e< d@ e	}e< dAeze6 erdBezjQ  W d   dS 1 sw   Y  dS )D    N1PYTORCH_ENABLE_MPS_FALLBACK)files)cached_path)	get_class)	OmegaConf)load_checkpointload_vocodersave_spectrogram)CFM)convert_char_to_pinyinget_tokenizercudaxpumpscpuF5TTS_v1_Basei     g       @eulerg      g      ?g?f5_ttszconfigs/z.yamlzf5_tts.model.zhf://SWivid/F5-TTS/z/model_z.safetensorstestsz%infer/examples/basic/basic_ref_en.wavz2Some call me nature, others call me mother nature.z.Some call me optimist, others call me realist.gQ?gQ@g)\(@g@g333333?   TFvocosz'../checkpoints/charactr/vocos-mel-24khzbigvganz,../checkpoints/bigvgan_v2_24khz_100band_256x)vocoder_nameis_local
local_path)text_num_embedsmel_dim)n_fft
hop_length
win_lengthn_mel_channelstarget_sample_ratemel_spec_type)method)transformermel_spec_kwargsodeint_kwargsvocab_char_map)dtypeuse_ema)dimkeepdim   )device)r*   r/   )r,   )valuepinyinztext  : zpinyin: )condtextdurationstepscfg_strengthsway_sampling_coefseed	edit_maskzGenerated mel: z/speech_edit_out.pngz/speech_edit_out.wavzGenerated wav:  )~osenvironimportlib.resourcesr   torchtorch.nn.functionalnn
functionalF
torchaudior   hydra.utilsr   	omegaconfr   f5_tts.infer.utils_inferr   r	   r
   f5_tts.modelr   f5_tts.model.utilsr   r   r   is_availabler   backendsr   r/   r9   exp_name	ckpt_stepnfe_stepr7   
ode_methodr8   speed
target_rmsloadstrjoinpath	model_cfgmodelbackbone	model_clsarch	model_arcdatasetsnamedataset_name	tokenizermel_specr$   r#   r"   r    r!   r   	ckpt_path
output_diraudio_to_editorigin_texttarget_textparts_to_editfix_durationr+   pathexistsmakedirslocalvocoder_local_pathvocoderr)   
vocab_sizedicttofloat32r*   audiosrshapemeansqrtsquarerms
transformsResample	resamplerinference_modeoriginal_melpermuteoffset_framezerosmel_condboolr:   copyfix_dur_listpartstartendpoppart_dur_secroundstart_frame	end_framepart_dur_frameskeep_framescatonespad	text_listfinal_text_listprintr5   sample	generated
trajectorygen_mel_specdecoder   generated_wavesqueezenumpysaver;   r;   r;   L/home/ubuntu/.local/lib/python3.10/site-packages/f5_tts/infer/speech_edit.py<module>   s2   

 











($








$