o
    i1                     @   s   d Z ddlZddlZddlZddlmZ ddlmZmZ ddl	Z
ddlmZ ddeded	efd
dZded	efddZd	efddZdd Zdd Zdd Zdd Zdd Zedkr_e  dS dS )a  
VoxCPM Command Line Interface

Unified CLI for voice cloning, direct TTS synthesis, and batch processing.

Usage examples:
    # Direct synthesis (single sample)
    voxcpm --text "Hello world" --output output.wav

    # Voice cloning (with reference audio and text)
    voxcpm --text "Hello world" --prompt-audio voice.wav --prompt-text "reference text" --output output.wav --denoise

    # Batch processing (each line in the file is one sample)
    voxcpm --input texts.txt --output-dir ./outputs/
    N)Path)OptionalList)VoxCPMfile	file_path	file_typereturnc                 C   s(   t | }| st| d|  d|S )zValidate that a file exists.z 'z' does not exist)r   existsFileNotFoundError)r   r   path r   >/home/ubuntu/.local/lib/python3.10/site-packages/voxcpm/cli.pyvalidate_file_exists   s   r   output_pathc                 C   s   t | }|jjddd |S )zAValidate the output path and create parent directories if needed.Tparentsexist_ok)r   parentmkdir)r   r   r   r   r   validate_output_path$   s   r   c                 C   s  t d t| ddptjdd}d}t| dd}|r[ddlm} |t| dd	t| d
d	t| ddt| ddt| ddt| ddd}t d|j d|j d|j	 d|j
 d|j 
 t| ddrzt| j|t| dd ||d}t d |W S  ty } zt d|  td W Y d}~nd}~ww z$tjt| dd t| dd |t| d!dt| d"d||d#}t d$ |W S  ty } zt d%|  td W Y d}~dS d}~ww )&zbLoad VoxCPM model.

    Prefer --model-path if provided; otherwise use from_pretrained (Hub).
    zLoading VoxCPM model...zipenhancer_pathNZIPENHANCER_MODEL_PATH	lora_pathr   )
LoRAConfiglora_enable_lmTlora_enable_ditlora_enable_projFlora_r    
lora_alpha   lora_dropout        )	enable_lm
enable_ditenable_projralphadropoutzLoRA config: r=z, alpha=z, lm=z, dit=z, proj=
model_pathno_denoiser)voxcpm_model_pathzipenhancer_model_pathenable_denoiserlora_configlora_weights_pathzModel loaded (local).zFailed to load model (local):    hf_model_idopenbmb/VoxCPM1.5	cache_dirlocal_files_only)r2   load_denoiserzipenhancer_model_idr4   r5   r/   r0   zModel loaded (from_pretrained).z(Failed to load model (from_pretrained): )printgetattrosenvirongetvoxcpm.model.voxcpmr   r'   r(   r$   r%   r&   r   r*   	Exceptionsysexitfrom_pretrained)argsr   r/   r0   r   modeler   r   r   
load_model+   sp   








	rE   c              	   C   s  | j std td | jstd td | js$td td t| jd}t| j}t	| }td| j   td|  td| j  |j
| j t|| j| j| j| j| jd	}tt|||jj td
|  t||jj }td|dd dS )zVoice cloning command.1Error: Please provide text to synthesize (--text)r1   z@Error: Voice cloning requires a reference audio (--prompt-audio)z>Error: Voice cloning requires a reference text (--prompt-text)reference audio fileSynthesizing text: zReference audio: zReference text: textprompt_wav_pathprompt_text	cfg_valueinference_timesteps	normalizedenoiseSaved audio to: 
Duration: .2fsN)rJ   r8   r?   r@   prompt_audiorL   r   r   outputrE   generatestrrM   rN   rO   rP   sfwrite	tts_modelsample_ratelen)rB   prompt_audio_pathr   rC   audio_arraydurationr   r   r   	cmd_clonei   s8   



ra   c              	   C   s   | j std td t| j}t| }td| j   |j| j dd| j| j	| j
dd}tt|||jj td|  t||jj }td|d	d
 dS )zDirect TTS synthesis command.rF   r1   rH   NFrI   rQ   rR   rS   rT   )rJ   r8   r?   r@   r   rV   rE   rW   rM   rN   rO   rY   rZ   rX   r[   r\   r]   )rB   r   rC   r_   r`   r   r   r   cmd_synthesize   s&   

rb   c                 C   s  t | jd}t| j}|jddd z t|ddd}dd |D }W d	   n1 s,w   Y  W n tyP } ztd
|  t	d W Y d	}~nd	}~ww |s\td t	d tdt
| d t| }d	}| jrwtt | jd}d}t|dD ]t\}	}
td|	 dt
| d|
d	d  d zC|j|
|| j| j| j| j| jo|d	ud}|d|	dd }tt|||jj t
||jj }td| d|dd |d7 }W q~ ty } ztd|  W Y d	}~q~d	}~ww td| dt
| d  d	S )!zBatch synthesis command.z
input fileTr   r'   utf-8encodingc                 S   s   g | ]
}|  r|  qS r   )strip).0liner   r   r   
<listcomp>   s    zcmd_batch.<locals>.<listcomp>NzFailed to read input file: r1   z5Error: Input file is empty or contains no valid lineszFound z lines to processrG   r   z
Processing /z: 2   z...rI   output_03dz.wavz	  Saved: z (rS   zs)z
  Failed: z
Batch finished: z
 succeeded)r   inputr   
output_dirr   openr>   r8   r?   r@   r]   rE   rU   rX   	enumeraterW   rL   rM   rN   rO   rP   rY   rZ   r[   r\   )rB   
input_filero   ftextsrD   rC   r^   success_countirJ   r_   output_filer`   r   r   r   	cmd_batch   sZ   

(	rx   c                  C   s  t jdt jdd} | jdddd | jdd	d
d | jdddd | jdddd | jdddd | jdddd | jdddd | jdddd | jdtdd d! | jd"td#d$d! | jd%dd&d | jd'td(d) | jd*td+d,d! | jd-td.d) | jd/dd0d | jd1dd2d | jd3td4d5d! | jd6td7d) | jd8td9d:d! | jd;td<d=d! | jd>td?d@d! | jdAddBdCdD | jdEddBdFdD | jdGddHdIdD | S )Jz>Build unified argument parser (no subcommands, route by args).zLVoxCPM CLI (single parser) - voice cloning, direct TTS, and batch processinga  
Examples:
  # Direct synthesis (single sample)
  voxcpm --text "Hello world" --output out.wav

  # Voice cloning (reference audio + text)
  voxcpm --text "Hello world" --prompt-audio voice.wav --prompt-text "reference text" --output out.wav --denoise

  # Batch processing
  voxcpm --input texts.txt --output-dir ./outs

  # Select model (from Hub)
  voxcpm --text "Hello" --output out.wav --hf-model-id openbmb/VoxCPM-0.5B
        )descriptionformatter_classepilogz--inputz-iz%Input text file (one line per sample))helpz--output-dirz-odz!Output directory (for batch mode)z--textz-tz'Text to synthesize (single-sample mode)z--outputz-oz+Output audio file path (single-sample mode)z--prompt-audioz-pazReference audio file pathz--prompt-textz-ptz)Reference text corresponding to the audioz--prompt-filez-pfz.Reference text file corresponding to the audioz	--denoise
store_truez,Enable prompt speech enhancement (denoising))actionr|   z--cfg-valueg       @z!CFG guidance scale (default: 2.0))typedefaultr|   z--inference-timesteps
   zInference steps (default: 10)z--normalizezEnable text normalizationz--model-pathz0Local VoxCPM model path (overrides Hub download))r   r|   z--hf-model-idr3   zEHugging Face repo id (e.g., openbmb/VoxCPM1.5 or openbmb/VoxCPM-0.5B)z--cache-dirz!Cache directory for Hub downloadsz--local-files-onlyz!Use only local files (no network)z--no-denoiserzDisable denoiser model loadingz--zipenhancer-pathz-iic/speech_zipenhancer_ans_multiloss_16k_basez;ZipEnhancer model id or local path (default reads from env)z--lora-pathzJPath to LoRA weights (.pth file or directory containing lora_weights.ckpt)z--lora-rr   zLoRA rank (default: 32)z--lora-alphar!   z'LoRA alpha scaling factor (default: 16)z--lora-dropoutr#   z LoRA dropout rate (default: 0.0)z--lora-enable-lmTz'Apply LoRA to LM layers (default: True))r~   r   r|   z--lora-enable-ditz(Apply LoRA to DiT layers (default: True)z--lora-enable-projFz0Apply LoRA to projection layers (default: False))argparseArgumentParserRawDescriptionHelpFormatteradd_argumentfloatintrX   )parserr   r   r   _build_unified_parser   s<   r   c                  C   s   t  } |  }|jr|jstd |   td t|S |j	r$|j
s1td |   td |js7|jrx|jse|jretj|jsHJ dt|jddd}| |_W d   n1 s`w   Y  |jrk|jsttd	 td t|S t|S )
z4Unified CLI entrypoint: route by provided arguments.z'Error: Batch mode requires --output-dirr1   z6Error: Single-sample mode requires --text and --outputz0Prompt file does not exist or is not accessible.r'   rc   rd   NzCError: Voice cloning requires both --prompt-audio and --prompt-text)r   
parse_argsrn   ro   r8   
print_helpr?   r@   rx   rJ   rV   rU   rL   prompt_filer:   r   isfilerp   readra   rb   )r   rB   rs   r   r   r   main#  s.   


r   __main__)r   )__doc__r   r:   r?   pathlibr   typingr   r   	soundfilerY   voxcpm.corer   rX   r   r   rE   ra   rb   rx   r   r   __name__r   r   r   r   <module>   s&   >. 2:$
