o
    iq                     @   s  d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZ ddlZddlZddlZddlmZmZ dedefd	d
Zde
e	e  dee	e eeef f fddZdedejfddZdd ZdejfddZdejdefddZdejdeeef fddZd.ddZdede
eej e!f  fddZ"d ej d!e!dee!ej f fd"d#Z#d$ed%edefd&d'Z$d%ed$ed(eeef dej%fd)d*Z&d/de!fd+d,Z'e(d-kre)e' dS )0z%
A gradio demo for Qwen3 TTS models.
    Nasdict)AnyDictListOptionalTuple   )Qwen3TTSModelVoiceClonePromptItemsreturnc                 C   s0   | pd  } | dd} ddd |  D S )N _ c                 S   s0   g | ]}|r|d d   |dd   ndqS )N   r   )upper).0w r   E/home/ubuntu/.local/lib/python3.10/site-packages/qwen_tts/cli/demo.py
<listcomp>$   s   0 z'_title_case_display.<locals>.<listcomp>)stripreplacejoinsplitr   r   r   r   _title_case_display!   s   r   itemsc                 C   s6   | sg i fS dd | D }dd t || D }||fS )Nc                 S      g | ]}t |qS r   )r   r   xr   r   r   r   *       z*_build_choices_and_map.<locals>.<listcomp>c                 S   s   i | ]\}}||qS r   r   )r   drr   r   r   
<dictcomp>+   s    z*_build_choices_and_map.<locals>.<dictcomp>)zip)r   displaymappingr   r   r   _build_choices_and_map'   s
   r)   c                 C   sJ   | pd   } | dv rtjS | dv rtjS | dv rtjS td|  d)Nr   )bf16bfloat16)fp16float16half)fp32float32zUnsupported torch dtype: z. Use bfloat16/float16/float32.)r   lowertorchr+   r-   r0   
ValueErrorr   r   r   r   _dtype_from_str/   s   r4   c                 C   s   | d ur| S t  S N)grupdate)vr   r   r   _maybe:   s   r9   c                  C   s  t jddt jdd} | jddd dd | jd	d
d dd | jdddd | jddg ddd | jdddt jdd | jdddd | jdtddd | jd d!d"t jd#d | jd$td%d&d | jd'd d(d | jd)d d*d | jd+d,dt jd-d | jd.td d/d | jd0td d1d | jd2td d3d | jd4td d5d | jd6td d7d | jd8td d9d | jd:td d;d | jd<td d=d | S )>Nzqwen-tts-demoas  Launch a Gradio demo for Qwen3 TTS models (CustomVoice / VoiceDesign / Base).

Examples:
  qwen-tts-demo Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice
  qwen-tts-demo Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign --port 8000 --ip 127.0.0.01
  qwen-tts-demo Qwen/Qwen3-TTS-12Hz-1.7B-Base --device cuda:0
  qwen-tts-demo Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice --dtype bfloat16 --no-flash-attn
T)progdescriptionformatter_classadd_helpcheckpoint_pos?z:Model checkpoint path or HuggingFace repo id (positional).)nargsdefaulthelpz-cz--checkpointzRModel checkpoint path or HuggingFace repo id (optional if positional is provided).)rA   rB   z--devicezcuda:0z@Device for device_map, e.g. cpu, cuda, cuda:0 (default: cuda:0).z--dtyper+   )r+   r*   r-   r,   r0   r/   z6Torch dtype for loading the model (default: bfloat16).)rA   choicesrB   z--flash-attn/--no-flash-attn
flash_attnz+Enable FlashAttention-2 (default: enabled).)destrA   actionrB   z--ipz0.0.0.0z-Server bind IP for Gradio (default: 0.0.0.0).z--porti@  z'Server port for Gradio (default: 8000).)typerA   rB   z--share/--no-shareshareFz;Whether to create a public Gradio link (default: disabled).z--concurrency   z'Gradio queue concurrency (default: 16).z--ssl-certfilez2Path to SSL certificate file for HTTPS (optional).z--ssl-keyfilez*Path to SSL key file for HTTPS (optional).z--ssl-verify/--no-ssl-verify
ssl_verifyz5Whether to verify SSL certificate (default: enabled).z--max-new-tokensz)Max new tokens for generation (optional).z--temperaturez Sampling temperature (optional).z--top-kzTop-k sampling (optional).z--top-pzTop-p sampling (optional).z--repetition-penaltyzRepetition penalty (optional).z--subtalker-top-kz2Subtalker top-k (optional, only for tokenizer v2).z--subtalker-top-pz2Subtalker top-p (optional, only for tokenizer v2).z--subtalker-temperaturez8Subtalker temperature (optional, only for tokenizer v2).)argparseArgumentParserRawTextHelpFormatteradd_argumentBooleanOptionalActionintfloat)parserr   r   r   build_parser>   s   		rS   argsc                 C   s   | j p| j}|std|S )Nr   )
checkpointr>   
SystemExit)rT   ckptr   r   r   _resolve_checkpoint   s   rX   c              	   C   s8   | j | j| j| j| j| j| j| jd}dd | D S )N)max_new_tokenstemperaturetop_ktop_prepetition_penaltysubtalker_top_ksubtalker_top_psubtalker_temperaturec                 S   s   i | ]\}}|d ur||qS r5   r   )r   kr8   r   r   r   r%      s    z'_collect_gen_kwargs.<locals>.<dictcomp>)	rY   rZ   r[   r\   r]   r^   r_   r`   r   )rT   r(   r   r   r   _collect_gen_kwargs   s   
rb   -q=Tc                 C   s  t | }t |jt jr:t |j}|jdk r(|t jt	t
|j|j	 }nA|j	d d }|t j| | }n/t |jt jra|t j}|jrSt 	t 
|nd}|dkrZn|||  }ntd|j |rrt |dd}|jdkrt j|d	d
t j}|S )Nr   r   g       @g        gzo ?zUnsupported dtype: g      g      ?)axis)npasarray
issubdtypedtypeintegeriinfominastyper0   maxabsfloatingsize	TypeErrorclipndimmean)wavepsrs   r!   infoymidmr   r   r   _normalize_audio   s&   

 
r|   audioc                 C   s   | d u rd S t | tr&t| dkr&t | d tr&| \}}t|}|t|fS t | trCd| v rCd| v rCt| d }t| d }||fS d S )Nr	   r   sampling_ratedata)
isinstancetuplelenrP   r|   dict)r}   srrv   r   r   r   _audio_to_tuple   s   $r   rv   r   c                 C   s   t j| t jd} || fS )N)ri   )rf   rg   r0   )rv   r   r   r   r   _wav_to_gradio_audio   s   r   rW   ttsc                 C   s(   t |jdd }|dv r|S td| )Ntts_model_type)custom_voicevoice_designbasezUnknown Qwen-TTS model type: )getattrmodelr3   )rW   r   mtr   r   r   _detect_model_kind   s   r   gen_kwargs_defaultc           &         s  t |}d }ttjdd rj }d }ttjdd r%j }tdd |p,g D \}tdd |p9g D \}dttt	f ffdd t
jjt
jd	d
dgd}d}	t
j||	dQ}
t
d| d| d |dkr#t
  t
jddE t
jdddd}t
  t
jd|ddd}t
jd|ddd}W d    n1 sw   Y  t
jddd d}t
jd!d"d#}W d    n1 sw   Y  t
jd$d t
jd%d&d'}t
jd(dd)}W d    n1 sw   Y  W d    n1 sw   Y  d*td+td,td-tf fd.d/}|j|||||g||gd0 n|d1krt
 y t
jdd= t
jddd2d3}t
  t
jd|ddd}W d    n	1 sUw   Y  t
jd4d$d5d3}t
jd!d"d#}W d    n	1 stw   Y  t
jd$d t
jd%d&d'}t
jd(dd)}W d    n	1 sw   Y  W d    n	1 sw   Y  d*td+td6tf fd7d8}|j||||g||gd0 nt
  t
d9 t
  t
jdd t
jd:d;}t
jd<dd=d}t
jd>d?d@}W d    n	1 sw   Y  t
jdd  t
jdAddd}t
jd|ddd}t
jd!d"d#}W d    n	1 s3w   Y  t
jd$d t
jd%d&d'}t
jd(dd)}W d    n	1 sXw   Y  W d    n	1 shw   Y  dBtdCtd*td+tf fdDdE}|j||||||g||gd0 W d    n	1 sw   Y  t
dF t
  t
jdd0 t
dG t
jd:d&d'}t
jd<dd=d}t
jd>d?d@}t
jdHd"d#}t
jdId;}W d    n	1 sw   Y  t
jdd+ t
dJ t
jdKd;}t
jdAddd}t
jd|ddd} t
jd!d"d#}!W d    n	1 sw   Y  t
jd$d t
jd%d&d'}"t
jd(dd)}#W d    n	1 sAw   Y  W d    n	1 sQw   Y  dBtdCtffdLdM}$d*td+tf fdNdO}%|j|$|||g||#gd0 |!j|%||| g|"|#gd0 W d    n	1 sw   Y  W d    n	1 sw   Y  t
dP W d    |
S 1 sw   Y  |
S )QNget_supported_languagesget_supported_speakersc                 S      g | ]}|qS r   r   r    r   r   r   r   	      zbuild_demo.<locals>.<listcomp>c                 S   r   r   r   r    r   r   r   r   
  r   r   c                      s   t  S r5   )r   r   )r   r   r   _gen_common_kwargs  s   z&build_demo.<locals>._gen_common_kwargszSource Sans ProArialz
sans-serif)fontz/.gradio-container {max-width: none !important;})themecssz#
# Qwen3 TTS Demo
**Checkpoint:** `z`  
**Model Type:** `z`  
r   r	   )scaleu   Text (待合成文本)   u4   Enter text to synthesize (输入要合成的文本).)labellinesplaceholderu   Language (语种)AutoT)r   rC   valueinteractiveu   Speaker (说话人)Vivianu4   Instruction (Optional) (控制指令，可不输入)uH   e.g. Say it in a very angry tone (例如：用特别伤心的语气说).u   Generate (生成)primary)variant   u   Output Audio (合成结果)numpy)r   rG   u   Status (状态))r   r   text	lang_dispspk_dispinstructc           
   
      s   z>| r|   s
W dS |sW dS |d}||}  }jd	|   |||p)d  p-d d|\}}t|d |dfW S  ty\ }	 zd t|	j d|	 fW  Y d }	~	S d }	~	ww )
NNu&   Text is required (必须填写文本).)Nu,   Speaker is required (必须选择说话人).r   r   )r   languagespeakerr   r      Finished. (生成完成): r   )r   getgenerate_custom_voicer   	ExceptionrG   __name__)
r   r   r   r   r   r   kwargswavsr   e)r   lang_mapspk_mapr   r   r   run_instruct=  s*   
$z build_demo.<locals>.run_instruct)inputsoutputsr   z_It's in the top drawer... wait, it's empty? No way, that's impossible! I'm sure I put it there!)r   r   r   u'   Voice Design Instruction (音色描述)zZSpeak in an incredulous tone, but with a hint of panic beginning to creep into your voice.designc              
      s   z7| r|   s
W dS |r|  sW dS |d}  }jd|   ||  d|\}}t|d |dfW S  tyU } zd t|j d| fW  Y d }~S d }~ww )	Nr   )Nu@   Voice design instruction is required (必须填写音色描述).r   )r   r   r   r   r   r   r   )r   r   generate_voice_designr   r   rG   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   run_voice_designl  s&   
$z$build_demo.<locals>.run_voice_designu"   Clone & Generate (克隆并合成)u   Reference Audio (参考音频))r   u#   Reference Text (参考音频文本)uL   Required if not set use x-vector only (不勾选use x-vector only时必填).u\   Use x-vector only (仅用说话人向量，效果有限，但不用传入参考音频文本)F)r   r   u   Target Text (待合成文本)ref_txtuse_xvecc              
      s   zL|r|  s
W dS t| }|d u rW dS |s |r|  s W dS |d}  }jd	|  |||r7|  nd t|d|\}}	t|d |	dfW S  tyj }
 zd t|
j d|
 fW  Y d }
~
S d }
~
ww )
NNu6   Target text is required (必须填写待合成文本).Nu7   Reference audio is required (必须上传参考音频).Nu   Reference text is required when use x-vector only is NOT enabled.
(未勾选 use x-vector only 时，必须提供参考音频文本；否则请勾选 use x-vector only，但效果会变差.)r   )r   r   	ref_audioref_textx_vector_only_moder   r   r   r   )	r   r   r   generate_voice_cloneboolr   r   rG   r   )ref_audr   r   r   r   atr   r   r   r   r   r   r   r   run_voice_clone  s0   
$z#build_demo.<locals>.run_voice_cloneu-   Save / Load Voice (保存/加载克隆音色)u  
### Save Voice (保存音色)
Upload reference audio and text, choose use x-vector only or not, then save a reusable voice prompt file.  
(上传参考音频和参考文本，选择是否使用 use x-vector only 模式后保存为可复用的音色文件)
u$   Save Voice File (保存音色文件)u   Voice File (音色文件)u   
### Load Voice & Generate (加载音色并合成)
Upload a previously saved voice file, then synthesize new text.  
(上传已保存提示文件后，输入新文本进行合成)
u'   Upload Prompt File (上传提示文件)c           	   
      s   zHt | }|d u rW dS |s|r| sW dS  j||r | nd t|d}ddd |D i}tjddd	\}}t| t	|| |d
fW S  t
yf } zd t|j d| fW  Y d }~S d }~ww )Nr   r   )r   r   r   r   c                 S   r   r   r   )r   itr   r   r   r     r"   z3build_demo.<locals>.save_prompt.<locals>.<listcomp>voice_clone_prompt_z.pt)prefixsuffixr   r   )r   r   create_voice_clone_promptr   tempfilemkstemposcloser2   saver   rG   r   )	r   r   r   r   r   payloadfdout_pathr   )r   r   r   save_prompt  s*   

$zbuild_demo.<locals>.save_promptc                    s  z| d u rW dS |r|  sW dS t| dd p t| dd p t| }tj|ddd}t|tr2d|vr5W d	S |d }t|trDt|d
krGW dS g }|D ]Z}t|tsV W dS |	dd }|d urjt
|sjt|}|	dd }	|	d u rx W dS t
|	st|	}	|t||	t|	ddt|	dt|	dd |	dd d qK	|d}
  }jd|  |
|d|\}}t|d
 |dfW S  ty } zd dt|j d| fW  Y d }~S d }~ww )N)Nu2   Voice file is required (必须上传音色文件).r   namepathcpuT)map_locationweights_onlyr   )Nu,   Invalid file format (文件格式不正确).r   )Nu!   Empty voice items (音色为空).)Nu7   Invalid item format in file (文件内部格式错误).ref_coderef_spk_embedding)Nu2   Missing ref_spk_embedding (缺少说话人向量).r   Ficl_moder   )r   r   r   r   r   r   )r   r   voice_clone_promptr   u   Failed to read or use voice file. Check file format/content.
(读取或使用音色文件失败，请检查文件格式或内容)
r   r   )r   r   strr2   loadr   r   listr   r   	is_tensortensorappendr   r   r   r   r   rG   r   )file_objr   r   r   r   	items_rawr   r#   r   ref_spkr   r   r   r   r   r   r   r   load_prompt_and_gen  sj    






z'build_demo.<locals>.load_prompt_and_genu,  
**Disclaimer (免责声明)**  
- The audio is automatically generated/synthesized by an AI model solely to demonstrate the model’s capabilities; it may be inaccurate or inappropriate, does not represent the views of the developer/operator, and does not constitute professional advice. You are solely responsible for evaluating, using, distributing, or relying on this audio; to the maximum extent permitted by applicable law, the developer/operator disclaims liability for any direct, indirect, incidental, or consequential damages arising from the use of or inability to use the audio, except where liability cannot be excluded by law. Do not use this service to intentionally generate or replicate unlawful, harmful, defamatory, fraudulent, deepfake, or privacy/publicity/copyright/trademark‑infringing content; if a user prompts, supplies materials, or otherwise facilitates any illegal or infringing conduct, the user bears all legal consequences and the developer/operator is not responsible.
- 音频由人工智能模型自动生成/合成，仅用于体验与展示模型效果，可能存在不准确或不当之处；其内容不代表开发者/运营方立场，亦不构成任何专业建议。用户应自行评估并承担使用、传播或依赖该音频所产生的一切风险与责任；在适用法律允许的最大范围内，开发者/运营方不对因使用或无法使用本音频造成的任何直接、间接、附带或后果性损失承担责任（法律另有强制规定的除外）。严禁利用本服务故意引导生成或复制违法、有害、诽谤、欺诈、深度伪造、侵犯隐私/肖像/著作权/商标等内容；如用户通过提示词、素材或其他方式实施或促成任何违法或侵权行为，相关法律后果由用户自行承担，与开发者/运营方无关。
)r   callabler   r   r   r   r)   r   r   r   r6   themesSoft
GoogleFontBlocksMarkdownRowColumnTextboxDropdownButtonAudioclickTabsTabCheckboxr   File)&r   rW   r   
model_kindsupported_langs_rawsupported_spks_rawlang_choices_dispspk_choices_dispr   r   demotext_inlang_inspk_ininstruct_inbtn	audio_outerrr   	design_inr   r   r   	xvec_onlyr   ref_audio_s
ref_text_sxvec_only_ssave_btnprompt_file_outprompt_file_intext_in2lang_in2gen_btn2
audio_out2err2r   r   r   )r   r   r   r   r   r   
build_demo   s  





$ 



"!B
07  H  
  =    =r  c           
      C   s   t  }|| }|js|js|  dS t|}t|j}|jr"dnd }t	j
||j||d}t|}t|||}t|j|j|j|jrDdndd}	|jd urR|j|	d< |jd ur\|j|	d< |jt|jd	jd
i |	 dS )Nr   flash_attention_2)
device_mapri   attn_implementationTF)server_nameserver_portrH   rJ   ssl_certfilessl_keyfile)default_concurrency_limitr   )rS   
parse_argsrU   r>   
print_helprX   r4   ri   rD   r
   from_pretraineddevicerb   r  r   ipportrH   rJ   r  r   queuerP   concurrencylaunch)
argvrR   rT   rW   ri   	attn_implr   r   r  launch_kwargsr   r   r   mainS  s8   





r.  __main__)rc   Tr5   )*__doc__rK   r   r   dataclassesr   typingr   r   r   r   r   gradior6   r   rf   r2   r   r
   r   r   r   r)   ri   r4   r9   rL   rS   	NamespacerX   rb   r|   ndarrayrP   r   r   r   r   r  r.  r   rV   r   r   r   r   <module>   s8   .m
  "$  W&
