o
    ciG=                     @   s   d Z ddlZddlZddlZddlZddlZddlZddl	Z	ddl
mZ dededefddZd	ed
ejdefddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zedkr`e  dS dS )zCLI for FasterQwen3TTS.    N)FasterQwen3TTSmodel_iddevicedtypec                 C   s:   |dkrt j}n|dkrt j}nt j}tj| ||dddS )Nbf16fp16sdpa   )r   r   attn_implementationmax_seq_len)torchbfloat16float16float32r   from_pretrained)r   r   r   torch_dtype r   J/home/ubuntu/vllm_env/lib/python3.10/site-packages/faster_qwen3_tts/cli.py_load_model   s   r   out_pathaudiosrc                 C   s,   t jt j| p	ddd t| || d S )N.T)exist_ok)osmakedirspathdirnamesfwrite)r   r   r   r   r   r   _write_audio   s   r    c                 C   sH   g }d }| D ]
\}}}| | q|stjdtjddfS t||fS )N   )r   i.  )appendnpzerosr   concatenate)genchunksr   audio_chunk_r   r   r   _stream_to_audio$   s   r*   c           
      C   sR  t | j| j| j}| jrOt }|j| j| j	| j
| j| j| j| j| j| j | j| j| jd}t|\}}t | }|rBt|| nd}|dkrL|| nd}n@t }|j| j| j	| j
| j| j| j| j| j | j| j| jd\}	}|	d }t | }|rt|| nd}|dkr|| nd}t| j|| td| j d|dd|dd	 d S )
Ntextlanguage	ref_audioref_text
chunk_sizemax_new_tokenstemperaturetop_k	do_samplerepetition_penalty	xvec_onlynon_streaming_mode        r   r,   r-   r.   r/   r1   r2   r3   r4   r5   r6   r7   Wrote  (dur .2fs, RTF ))r   modelr   r   	streamingtimeperf_countergenerate_voice_clone_streamingr,   r-   r.   r/   r0   r1   r2   r3   greedyr5   r6   r7   r*   lengenerate_voice_cloner    outputprint
argsr?   startr&   r   r   
total_time	audio_durrtf
audio_listr   r   r   	cmd_clone.   sR   
&rP   c                 C   s  t | j| j| j}| jr|j pg }td| d S | js(td t	
d | jrjt }|j| j| j| j| j| j| j| j| j| j | jd
}t|\}}t | }|r]t|| nd}|dkrg|| nd}	n<t }|j| j| j| j| j| j| j| j| j | jd	\}
}|
d }t | }|rt|| nd}|dkr|| nd}	t| j|| td| j d	|d
d|	d
d d S )N
z2ERROR: --speaker is required (use --list-speakers)   
r,   speakerr-   instructr0   r1   r2   r3   r4   r5   r8   r   	r,   rT   r-   rU   r1   r2   r3   r4   r5   r:   r;   r<   r=   r>   )r   r?   r   r   list_speakersget_supported_speakersrH   joinrT   sysexitr@   rA   rB   generate_custom_voice_streamingr,   r-   rU   r0   r1   r2   r3   rD   r5   r*   rE   generate_custom_voicer    rG   )rJ   r?   speakersrK   r&   r   r   rL   rM   rN   rO   r   r   r   
cmd_custom]   sX   

&r_   c           
      C   s:  t | j| j| j}| jrIt }|j| j| j	| j
| j| j| j| j| j | jd	}t|\}}t | }|r<t|| nd}|dkrF|| nd}n:t }|j| j| j	| j
| j| j| j| j | jd\}	}|	d }t | }|rwt|| nd}|dkr|| nd}t| j|| td| j d|dd|dd	 d S )
N	r,   rU   r-   r0   r1   r2   r3   r4   r5   r8   r   r,   rU   r-   r1   r2   r3   r4   r5   r:   r;   r<   r=   r>   )r   r?   r   r   r@   rA   rB   generate_voice_design_streamingr,   rU   r-   r0   r1   r2   r3   rD   r5   r*   rE   generate_voice_designr    rG   rH   rI   r   r   r   
cmd_design   sF   

&rd   c                 C   s  t | j| j| j}| jdkr| jr| jstd t	d | jdkr.| j
s.td t	d | jdkr?| js?td t	d td d	}tjD ])}| }|sRqH| d
v r[ d S tj| jd|dd}|d	7 }t }| jdkr| jr|j|| j| j| j| j| j| j| j| j | jd| jd}t|\}}	n|j || j| j| j| j| j| j| j | jd| jd\}
}	|
d }n| jdkr| jr|j!|| j
| j| j| j| j| j| j| j | jd
}t|\}}	n[|j"|| j
| j| j| j| j| j| j | jd	\}
}	|
d }n=| jr%|j#|| j| j| j| j| j| j| j | jd	}t|\}}	n|j$|| j| j| j| j| j| j | jd\}
}	|
d }t%|||	 t | }|	rUt&||	 nd}|dkr`|| nd}td| d|dd|dd qHd S )Nclonez=ERROR: --ref-audio and --ref-text are required for clone moderR   customz,ERROR: --speaker is required for custom modedesignz-ERROR: --instruct is required for design modezCServer started. Enter text per line. Type 'exit' or 'quit' to stop.r!   )r[   quitstopout_04dz.wavFr+   r9   r   rS   rV   r`   ra   r8   r:   r;   r<   r=   r>   )'r   r?   r   r   moder.   r/   rH   rZ   r[   rT   rU   stdinstriplowerr   r   rY   
output_dirrA   rB   r@   rC   r-   r0   r1   r2   r3   rD   r5   r7   r*   rF   r\   r]   rb   rc   r    rE   )rJ   r?   idxliner,   r   rK   r&   r   r   rO   rL   rM   rN   r   r   r   	cmd_serve   s   










"rs   c                  C   sd  t jddd} | jdddd | jdd	g d
dd | jddd}dd }|jddd}|| |jdddd |jdddd |jdddd |jtd |jdd d}|| |jd!d"d |jd#d$d%d |jd&dd'd |jtd |jd(d)d}|| |jd#dd*d |jtd |jd+d,d}|jd-dg d.d/ |jd0dd1d |jd2d3d4d |jdd5d |jdd6d |jd!d7d |jd#d$d8d |jd9dd:d |	 }|jd;d<dd=d> |jd?d<d@dAd> |jddB |jdCt
dDdEdF |jdGt
dHdI |jdJtdKdI |jdLt
dMdI |jdNtdOdI |jdPddQd |jdRdSdTd |jtd | S )UNzfaster-qwen3-ttszFasterQwen3TTS CLI)progdescriptionz--devicecudazDevice (cuda or cpu)defaulthelpz--dtyper   )r   r   fp32zModel dtype)rx   choicesry   commandT)destrequiredc                 S   s   | j dddd | j dddd | j d	dd
d | j dddd | j dtdd | j dtdd | j dtdd | j dtdd | j dddd | j dddd |  }|j ddddd |j d dd!d"d | jdd# | j d$td%d&d' d S )(Nz--textTzText to synthesizer~   ry   
--languageAuto%Language (Auto, English, French, ...)rw   z--outputzOutput wav path--modelModel id or local path--max-new-tokensr	   typerx   --temperature?--top-k2   --repetition-penalty?--greedy
store_trueDisable samplingactionry   --streamingUse streaming generation--non-streaming-moder7   +Prefill full text for non-streaming qualityr}   r   ry   --no-non-streaming-modestore_false?Disable full-text prefill (match upstream non-streaming layout)r7   --chunk-size   Streaming chunk sizer   rx   ry   )add_argumentintfloatadd_mutually_exclusive_groupset_defaults)sp	nsm_groupr   r   r   
add_common:  s2   z build_parser.<locals>.add_commonre   zVoice cloning (reference audio))ry   z--ref-audiozReference audio pathr   z
--ref-textzReference transcriptz--xvec-onlyr   zUse speaker embedding onlyr   )fnrf   zCustomVoice model (speaker IDs)z	--speakerz
Speaker IDz
--instruct zOptional instructionz--list-speakerszList available speaker IDsrg   z%VoiceDesign model (instruction-based)zVoice/style instructionservez8Keep model hot and generate multiple requests from stdinz--mode)re   rf   rg   )r~   r{   r   r   r   r   r   zReference audio path (clone)zReference transcript (clone)zSpeaker ID (custom)zInstruction (custom/design)r   r   r   r7   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   r   r   r   r   r   r   r   z--output-diroutputszDirectory for output wavs)argparseArgumentParserr   add_subparsers
add_parserr   rP   r_   rd   r   r   r   rs   )psubr   r   r   r   r   r   build_parser4  sj   r   c                  C   s   t  } |  }|| d S )N)r   
parse_argsr   )parserrJ   r   r   r   main  s   r   __main__)__doc__r   r   rZ   rA   numpyr#   	soundfiler   r   faster_qwen3_ttsr   strr   ndarrayr   r    r*   rP   r_   rd   rs   r   r   __name__r   r   r   r   <module>   s*   
/4)zW
