o
    i?                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	Z
ddlZddlZddlm  mZ ddlmZ dd Zdd Zdd
dZ				d dededejjjdejdededededefddZdd Zdd Zdd Ze dkre!e  dS dS )!ao  
This script supports to load dataset from huggingface and sends it to the server
for decoding, in parallel.

Usage:
num_task=2

# For offline F5-TTS
python3 client_grpc.py     --server-addr localhost     --model-name f5_tts     --num-tasks $num_task     --huggingface-dataset yuekai/seed_tts     --split-name test_zh     --log-dir ./log_concurrent_tasks_${num_task}
    N)Path)np_to_triton_dtypec                 C   sz  t |d,}| d }|d |d |d |d |d |d |d	 |d
 |D ]}d|vr<q5|d|d  d |d }t|d d d }t|d d d }t|d d d }t|d d d }	|d|dd|dd|dd|	dd	 |d }
|
D ]}t|d }|d }|d }|d }t|d }|d |d   kr|d ksJ  J t|d d }t|d d }t|d d }|d |d!d"|d#d$|d%d&|d%d'|d#d(|| d)d*|d%d'|d#d'| d(|| | d)d+ |d,|d%d-|| d)d. |d/|d%d-|| d)d+ qq5W d    d S 1 s6w   Y  d S )0Nwmodel_statsz`The log is parsing from triton_client.get_inference_statistics(), to better human readability. 
z/To learn more about the log, please refer to: 
z[1. https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md 
zC2. https://github.com/triton-inference-server/server/issues/5374 

zTo better improve throughput, we always would like let requests wait in the queue for a while, and then execute them with a larger batch size. 
z^However, there is a trade-off between the increased queue time and the increased batch size. 
z{You may change 'max_queue_delay_microseconds' and 'preferred_batch_size' in the model configuration file to achieve this. 
zSee https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#delayed-batching for more details. 

last_inferencezmodel name is namez 
inference_statsqueuensg    eAcompute_infercompute_inputcompute_outputzqueue time z<5.2fz s, compute infer time z s, compute input time z s, compute output time z s 
batch_stats
batch_sizecountg    .Az#execuate inference with batch_size z<2z total z<5z times, total_infer_time z<9.2fz ms, avg_infer_time /=.2fz ms, avg_infer_time_per_sample z ms 
zinput z	 ms, avg z ms, zoutput )openwriteint)statssummary_file	summary_fr   model_statemodel_inference_statstotal_queue_time_stotal_infer_time_stotal_input_time_stotal_output_time_smodel_batch_statsbatchr   r   r   r   batch_countcompute_infer_time_mscompute_input_time_mscompute_output_time_ms r&   \/home/ubuntu/.local/lib/python3.10/site-packages/f5_tts/runtime/triton_trtllm/client_grpc.pywrite_triton_stats2   sn   


$(^$r(   c                  C   s  t jt jd} | jdtddd | jdtddd | jd	td d
d | jdtddd | jdtddd | jdtddd | jdtdg ddd | jdtd dd | jdtddd | jdtddd | jdtdd d | jd!d"d#d$d% | jd&td#d'd(d) | jd*tdd+d |  S ),N)formatter_classz--server-addr	localhostzAddress of the server)typedefaulthelpz--server-portiA  z/Grpc port of the triton server, default is 8001z--reference-audiozWPath to a single audio file. It can't be specified at the same time with --manifest-dirz--reference-text z--target-textz--huggingface-datasetzyuekai/seed_ttsz'dataset name in huggingface dataset hubz--split-namewenetspeech4tts)r/   test_zhtest_en	test_hardz%dataset split name, default is 'test')r+   r,   choicesr-   z--manifest-pathz@Path to the manifest dir which includes wav.scp trans.txt files.z--model-namef5_ttsz(triton model_repo module name to requestz--num-tasks   z&Number of concurrent tasks for sendingz--log-interval   z)Controls how frequently we print the log.z--compute-wer
store_trueFzTrue to compute WER.
        )actionr,   r-   z	--log-dirz./tests/client_grpczlog directory)r+   requiredr,   r-   z--batch-sizez2Inference batch_size per request for offline mode.)argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentstrr   
parse_args)parserr&   r&   r'   get_argsj   s   rA   ]  c                 C   sp   |dksJ dt | tr| d }| d }nt| \}}||kr4ddlm} ||tt|||  }||fS )NrB   zhard coding in serverarraysampling_rater   )resample)
isinstancedictsfreadscipy.signalrE   r   len)wav_pathtarget_sample_ratewaveformsample_raterE   r&   r&   r'   
load_audio   s   

rP   ./manifest_item_listr   triton_clientprotocol_clientlog_interval
model_namepadding_durationaudio_save_dirsave_sample_ratec	               	      s  d}	g }
t |dd  }td|   t| D ]%\}}|| dkr1t| d| dt|   t|d dd	\}}t|| }tjt|ggtjd
}|d |d }}|t| t| }|rtjd|| t || | d  ftj	d
}||dd t|f< n|}|
ddtj	}|d|jt|j|d|jt|j|dddgd|dddgdg}|d | |d | tj|gtd
}|
d}|d | tj|gtd
}|
d}|d | |dg}d| |d  }t }|j||t||dI d H }|d
d}t | }tj||d  d}t|||d t|| }|
||f |	|7 }	q|	|
fS )N        r6   zmanifest_item_list: r   z: r   audio_filepathrB   )rM   dtypereference_texttarget_textr5   reference_wavreference_wav_lenBYTES)r5   r5         rN   i 
   )
request_idoutputstarget_audio_pathz.wavPCM_16)r   print	enumeraterK   rP   nprC   int32zerosfloat32reshapeastype
InferInputshaper   r]   set_data_from_numpyobjectInferRequestedOutputtimeinferr>   as_numpyospathjoinrH   r   append) rR   r   rS   rT   rU   rV   rW   rX   rY   total_durationlatency_datatask_idiitemrN   rO   durationlengthsr^   r_   estimated_target_durationsamplesinputsinput_data_numpyrh   sequence_idstartresponseaudioendaudio_save_pathactual_durationr&   r&   r'   send   sh   



r   c              	   C   s   t | dH}g }|D ]:}t| ddksJ | d\}}}}t|j}tj|s:tj	tj
| |}|||||d q
W d    |S 1 sPw   Y  |S )Nr|   )r[   r^   r_   ri   )r   rK   stripsplitr   stemr{   r|   isabsr}   dirnamer~   )manifest_pathfmanifest_listlineuttprompt_text
prompt_wavgt_textr&   r&   r'   load_manifests5  s(   

r   c           	      C   s   t | }||k rtd| d| d| d |}|| }|| }g }d}t|D ]}||k r5|| d }n|| }|| ||  |}q(|S )Nz'Warning: the length of the input list (z) is less than k (z). Setting k to .r   r5   )rK   rk   ranger~   )	dataknquotient	remainderresultr   r   r   r&   r&   r'   
split_dataJ  s   r   c                     s  t  } | j d| j }tj|dd}t}| jr+d| _d| _| j| j	| jddg}n<| j
rbdd l}|j| j
| jdd	}g }tt|D ]}||| d
 || d || d || d d qDnt| j}t| jt|| _t|| j}tj| jdd g }t }	t| jD ]}tt|| d| ||| j| j| jddd	}
||
 qtj| I d H }t }||	 }d}g }|D ]}||d 7 }||d 7 }q|| }d|dd}|d|dd7 }|d|d dd7 }|d|dd|d dd7 }d d! |D }t|tt| d" }t j!|t j"d#d" }|d$|dd7 }|d%t #|d&d" dd7 }|d't #|d(d" dd7 }|d)t #|d*d" dd7 }|d+t #|d,d" dd7 }|d-|dd7 }t$| | jrvt%| jj&}n| jr}| j}t'| j d.| d/d0}|(| W d    n	1 sw   Y  |j)d1dd2I d H }t*|| j d3| d/ |j+| jdd2I d H }t'| j d4| d5d0}t,j-||d6d7 W d    d S 1 sw   Y  d S )8N:F)urlverboser5   test)r^   r_   r[   ri   r   T)r   trust_remote_codeprompt_audior   idr_   )r[   r^   ri   r_   )exist_okztask-rB   )r   rS   rT   rU   rV   rX   rW   rY   rZ   zRTF: z.4f
ztotal_duration: z.3fz	 seconds
(i  r   z hours)
zprocessing time: z
 seconds (c                 S   s   g | ]\}}|qS r&   r&   ).0	chunk_endchunk_durationr&   r&   r'   
<listcomp>  s    zmain.<locals>.<listcomp>g     @@r\   zlatency_variance: zlatency_50_percentile_ms: 2   zlatency_90_percentile_ms: Z   zlatency_95_percentile_ms: _   zlatency_99_percentile_ms: c   zaverage_latency_ms: z/rtf-z.txtr   r.   )rV   as_jsonz/stats_summary-z/model_config-z.jsonr   )indent).rA   server_addrserver_port
grpcclientInferenceServerClientreference_audio	num_tasksrU   r^   r_   huggingface_datasetdatasetsload_dataset
split_namer   rK   r~   r   r   minr   r{   makedirslog_dirrx   asynciocreate_taskr   rV   gathersumfloatrm   varfloat64
percentilerk   r   r   r   r   get_inference_statisticsr(   get_model_configjsondump)argsr   rS   rT   rR   r   datasetr   tasks
start_timetaskans_listend_timeelapsedr   r   ansrtfslatency_list
latency_mslatency_variancer   r   r   metadatar&   r&   r'   maina  s   





$r   __main__)rB   )NrQ   rB   )"__doc__r:   r   r   r{   rx   typespathlibr   numpyrm   	soundfilerH   tritonclienttritonclient.grpc.aiogrpcaior   tritonclient.utilsr   r(   rA   rP   listr>   r   
ModuleTyper   r   r   r   r   __name__runr&   r&   r&   r'   <module>   sX   8
k	
Ri