o
    i                     @   sH  d Z ddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ejddZejddd	d
 ejdddg ddd ejdddg ddd ejddeddd ejddeddd e Zed e	ejejejejejdZejZejZed  d!Zd"ed#ed$ed%ed&ed'efd(d)Zejd*d+Ze d,e!  d-e d. e"  ej#d/d0b ej$d1d2d3d4d5d6Z%ej&d7d8d9d:Z'ej(d;d8d<1 ej)d=d>d=d?d@dAZ*ej)dBd>dCd?dDdAZ+ej)d>dEdFdGdHdAZ,ej)dd5ddddIdJZ-W d   n1 sw   Y  ej.dKdLdMdNZ/W d   n	1 sw   Y  ej#dd0 ej0dOdPdQdRZ1ej$dSd8dTd5dUZ2W d   n	1 s=w   Y  W d   n	1 sMw   Y  ej3g dVg dWg dXg dYge%e*e+e,gdZd[ e/j4ee%e*e+e,e-e'ge1e2gd\ e d] W d   n	1 sw   Y  ddd_d`Z5dadb Z6e7dckre6  dS dS )ez&
Gradio Web Interface for Soprano TTS
    N)
SopranoTTS)play_streamz#Soprano Text-to-Speech Gradio WebUI)descriptionz--model-pathz-mz(Path to local model directory (optional))helpz--devicez-dauto)r   cudacpumpszDevice to use for inference)defaultchoicesr   z	--backendz-b)r   transformerslmdeployzBackend to use for inferencez--cache-sizez-cd   z'Cache size in MB (for lmdeploy backend))typer
   r   z--decoder-batch-sizez-bs   zBatch size when decoding audiozLoading Soprano TTS model...)backenddevicecache_size_mbdecoder_batch_size
model_pathzModel loaded successfully!i }  texttemperaturetop_prepetition_penalty
chunk_size	streamingc              
   c   s,   |   s
dV  d S zn|r-tj| ||||d}dV  t|}d d|d ddfV  W d S t }tj| |||d}	t | }
|	  }|d	 	t
j}t|t }|
d
kr\||
 ntd}d|dd|
dd|dd}t|f|fV  W d S  ty } zd dt| fV  W Y d }~d S d }~ww )N)Nz*Please enter some text to generate speech.)r   r   r   r   )Nu   ⏳ Streaming...u   ✓ Streaming complete | i  z.2fz ms latency)r   r   r   i  r   infu   ✓ Generated z s audio | Generation time: z.3fz s (zx realtime)u   ✗ Error: )stripmodelinfer_streamr   timeperf_counterinferr   numpyastypenpint16lenSAMPLE_RATEfloat	Exceptionstr)r   r   r   r   r   r   streamlatency
start_timeaudiogen_timeaudio_npaudio_int16audio_secondsrtfstatuse r7   A/home/ubuntu/.local/lib/python3.10/site-packages/soprano/webui.pygenerate_speech.   sZ   
"r9   zSoprano TTS)titleu   # 🗣️ Soprano TTS

<div align="center">
<img width="300" height="300" alt="soprano-github" src="https://github.com/user-attachments/assets/4d612eac-23b8-44e6-8c59-d7ac14ebafd1" />
</div>

**Device:** z | **Backend:** z

**Model Weights:** https://huggingface.co/ekwek/Soprano-1.1-80M  
**Model Demo:** https://huggingface.co/spaces/ekwek/Soprano-TTS  
**GitHub:** https://github.com/ekwek1/soprano  
   )scalezText to SynthesizezEnter text here...z|Soprano is an extremely lightweight text to speech model designed to produce highly realistic speech at unprecedented speed.   
   )labelplaceholdervaluelines	max_lineszStream AudioFzTNote: This bypasses the Gradio interface and streams audio directly to your speaker.)r?   rA   infozAdvanced Settings)open        g      ?g?Temperature)minimummaximumrA   stepr?   g      ?ffffff?zTop Pg       @333333?g?zRepetition PenaltyzChunk Size (Streaming only))rH   rI   rA   rJ   	precisionr?   zGenerate Speechprimarylg)variantsizezGenerated Speechr#   T)r?   r   autoplayStatus   )r?   interactiverB   rC   )z9Soprano is an extremely lightweight text to speech model.rF   rK   rL   )z2Artificial intelligence is transforming the world.rF   rK   rL   )z"I'm so excited, I can't even wait!rF   rK   rL   )z"Why don't you go ahead and try it?rF   rK   rL   zExample Prompts)examplesinputsr?   )fnrW   outputsa  
### Usage tips:

- When quoting, use double quotes instead of single quotes.
- Soprano works best when each sentence is between 2 and 30 seconds long.
- Although Soprano recognizes numbers and some special characters, it occasionally mispronounces them.
Best results can be achieved by converting these into their phonetic form.
(1+1 -> one plus one, etc)
- If Soprano produces unsatisfactory results, you can easily regenerate it for a new, potentially better generation.
You may also change the sampling settings for more varied results.
  c              	   C   sz   t | | | D ]1}z&ttjtj}|d|f |W  d    W   S 1 s)w   Y  W q ty8   Y qw td)N zCould not find a free port)rangesocketAF_INETSOCK_STREAMbindOSError)
start_port	max_triesportsr7   r7   r8   find_free_port   s   *rf   c                  C   s8   t d} td|   tjd| dtjjdddd d S )	NrZ   z"Starting Gradio interface on port z	127.0.0.1Fgreen)primary_huezQ
a {
    color: var(--primary-600);
}
a:hover {
    color: var(--primary-700);
}
)server_nameserver_portsharethemecss)rf   printdemolaunchgrthemesSoft)rd   r7   r7   r8   main   s   
rt   __main__)rZ   r   )8__doc__argparser]   r    gradiorq   r#   r%   sopranor   soprano.utils.streamingr   ArgumentParserparseradd_argumentint
parse_argsargsrn   r   r   
cache_sizer   r   r   r(   r+   r)   boolr9   Blocksro   MarkdownupperRowColumnTextbox
text_inputCheckboxr   	AccordionSliderr   r   r   r   Buttongenerate_btnAudioaudio_outputstatus_outputExamplesclickrf   rt   __name__r7   r7   r7   r8   <module>   s"  


>
,9


f


