o
    in#                  	   @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZ ejejd eeZG dd	 d	ej Z!G d
d dZ"dd Z#dd Z$edkre % Z&e&j'ddd e&j'ddd e&j'dddd e&j'de(eddddd e&j'dd d!d e&j'd"e(e
d#)d$d%d e&j'd&d d'd e&j'd(dd)d e&j'd*ej*d+d e&+ Z,ze"e,j-e,j.e,j/e,j0e,j1e,j2e,j3d,Z4e$e,j5e,j6e4 W dS  e7y   e8  Y dS w dS )-    N)files)hf_hub_download)	get_class)	OmegaConf)
chunk_textinfer_batch_process
load_modelload_vocoderpreprocess_ref_audio_text)levelc                       s8   e Zd ZdZ fddZdd Zdd Zdd	 Z  ZS )
AudioFileWriterThreadzAThreaded file writer to avoid blocking the TTS streaming process.c                    s4   t    || _|| _t | _t | _g | _	d S )N)
super__init__output_filesampling_ratequeueQueue	threadingEvent
stop_event
audio_data)selfr   r   	__class__ H/home/ubuntu/.local/lib/python3.10/site-packages/f5_tts/socket_server.pyr   #   s   



zAudioFileWriterThread.__init__c              	   C   s   t d t| jda}|d |d || j | j	
 r'| j sez!| jjdd}|durGt|d }| j| ||  W n
 tjyR   Y qw | j	
 r'| j r'W d   dS W d   dS 1 spw   Y  dS )	z1Process queued audio data and write it to a file.zAudioFileWriterThread started.wb      g?)timeoutNi  )loggerinfowaveopenr   setnchannelssetsampwidthsetframerater   r   is_setr   emptygetnpint16r   appendwriteframestobytesEmpty)r   wfchunkr   r   r   run+   s(   


"zAudioFileWriterThread.runc                 C   s   | j | dS )zAdd a new chunk to the queue.N)r   put)r   r1   r   r   r   	add_chunk=   s   zAudioFileWriterThread.add_chunkc                 C   s    | j   |   td dS )z3Stop writing and ensure all queued data is written.zAudio writing completed.N)r   setjoinr    r!   r   r   r   r   stopA   s   
zAudioFileWriterThread.stop)	__name__
__module____qualname____doc__r   r2   r4   r8   __classcell__r   r   r   r   r       s    r   c                   @   sD   e Zd ZdejfddZdd Zdd Zdd	 Zd
d Z	dd Z
dS )TTSStreamingProcessorNc           	      C   s   |pt j r	dnt j rdn	t jj rdnd| _tt	t
dd| d}td|jj | _|jj| _|jjj| _|jjj| _| |||| _|  | _| || |   d | _d	| _d S )
Ncudaxpumpscpuf5_ttszconfigs/z.yamlzf5_tts.model.T)torchr?   is_availabler@   backendsrA   devicer   loadstrr   joinpathr   modelbackbone	model_clsarch	model_arcmel_specmel_spec_typetarget_sample_rater   load_ema_modelload_vocoder_modelvocoderupdate_reference_warm_upfile_writer_threadfirst_package)	r   rK   	ckpt_file
vocab_file	ref_audioref_textrG   dtype	model_cfgr   r   r   r   I   s(   
 	


zTTSStreamingProcessor.__init__c              
   C   s,   t | j| j|| j|dd| jdj| j|dS )NeulerT)	ckpt_pathrQ   r[   
ode_methoduse_emarG   )r^   )r   rM   rO   rQ   rG   to)r   rZ   r[   r^   r   r   r   rS   a   s   	z$TTSStreamingProcessor.load_ema_modelc                 C   s   t | jdd | jdS )NF)vocoder_nameis_local
local_pathrG   )r	   rQ   rG   r7   r   r   r   rT   m   s   z(TTSStreamingProcessor.load_vocoder_modelc                 C   s   t ||\| _| _t| j\| _| _| jjd | j }t| j	d}t
|| d|  | _t
|| d|  d | _t
|| d|  d | _d S )Nutf-8   r      )r
   r\   r]   
torchaudiorH   audiosrshapelenencodeint	max_chars	few_chars	min_chars)r   r\   r]   ref_audio_durationref_text_byte_lenr   r   r   rV   p   s   z&TTSStreamingProcessor.update_referencec              
   C   sL   t d d}t| j| jf| j|g| j| jd | jddD ]}qt d d S )NzWarming up the model...zWarm-up text for the model.T)progressrG   	streamingzWarm-up completed.)	r    r!   r   rm   rn   r]   rK   rU   rG   )r   gen_text_r   r   r   rW   z   s   



zTTSStreamingProcessor._warm_upc                 C   s*  t || jd}| jr+t |d | jd|dd   }t |d | jd|dd   }d| _t| j| jf| j|| j	| j
d | jddd	}| jd urJ| j  td| j| _| j  |D ]+\}}t|dkrtd	t|  |tjt| d
g|R   | j| qXtd |d | j  d S )N)rs   r   r   FTi   )rx   rG   ry   
chunk_sizez
output.wavzGenerated audio chunk of size: fzFinished sending audio stream.s   END)r   rs   rY   rt   ru   r   rm   rn   r]   rK   rU   rG   rX   r8   r   r   startrp   r    r!   sendallstructpackr4   )r   textconntext_batchesaudio_streamaudio_chunkr{   r   r   r   generate_stream   s:   



"

z%TTSStreamingProcessor.generate_stream)r9   r:   r;   rD   float32r   rS   rT   rV   rW   r   r   r   r   r   r>   H   s    
r>   c                 C   s  zl| _ |  tjtjd 	 | d}|sd|_n7|d }t	d|  z|
||  W n tyN } ztd|  t  W Y d }~nd }~ww qW d    W d S W d    W d S 1 sew   Y  W d S  ty } ztd|  t  W Y d }~d S d }~ww )Nr   Ti   ri   zReceived text: zError during processing: zError handling client: )
setsockoptsocketIPPROTO_TCPTCP_NODELAYrecvrY   decodestripr    r!   r   	Exceptionerror	traceback	print_exc)r   	processordatadata_strinner_eer   r   r   handle_client   s8   
&r   c                 C   s|   t  t jt j,}|| |f |  td|  d|  	 | \}}td|  t|| q 1 s7w   Y  d S )NzServer started on :TzConnected by )	r   AF_INETSOCK_STREAMbindlistenr    r!   acceptr   )hostportr   sr   addrr   r   r   start_server   s   
r   __main__z--hostz0.0.0.0)defaultz--porti'  z--modelF5TTS_v1_Basez"The model name, e.g. F5TTS_v1_Base)r   helpz--ckpt_filezSWivid/F5-TTSz'F5TTS_v1_Base/model_1250000.safetensors)repo_idfilenamez!Path to the model checkpoint filez--vocab_file z$Path to the vocab file if customizedz--ref_audiorC   z%infer/examples/basic/basic_ref_en.wavz=Reference audio to provide model with speaker characteristicsz
--ref_textz8Reference audio subtitle, leave empty to auto-transcribez--devicezDevice to run the model onz--dtypez$Data type to use for model inference)rK   rZ   r[   r\   r]   rG   r^   )9argparsegcloggingr   r   r   r   r   r"   importlib.resourcesr   numpyr*   rD   rl   huggingface_hubr   hydra.utilsr   	omegaconfr   f5_tts.infer.utils_inferr   r   r   r	   r
   basicConfigINFO	getLoggerr9   r    Threadr   r>   r   r   ArgumentParserparseradd_argumentrI   rJ   r   
parse_argsargsrK   rZ   r[   r\   r]   rG   r^   r   r   r   KeyboardInterruptcollectr   r   r   r   <module>   s    	
(l
