o
    ٷi_a                  8   @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' e'e(Z)d dl*m+Z+ ej,Z-dd Z,e,e_,eG dd de Z.		dAdedej/dedB ded de.f
ddZ0	dBdedej/dedB de.fddZ1e0ed< devre2d e1ed< devre2d d dlm3Z3 d d l4m5Z5m6Z6m7Z7m8Z8 d d!l9m:Z:m;Z; e3j<Z=				"	dCd#e5d$e>d%e>d&e>d'e>d(e>d)ed*e?e d+e@dB d,eAd-eAd.eBd/e@d0eBd1e?e> d2e?eA d3eBd4eCe>eAf d5e@dB d6ee> dB d7eCdB d8eCdB d9ed: dB d;e@dB d<e@dB d=e@d>ejDeBB dB f6d?d@Z<e<e3_<dS )D    N)Iterable)	dataclass)datetime)Literal)AudioSegment)tqdm)PreTrainedTokenizerBase)datasets)SampleRequest)	ASYNC_REQUEST_FUNCSOPENAI_COMPATIBLE_BACKENDSRequestFuncInputRequestFuncOutputStreamedResponseHandler_get_chat_content_update_headers_common_update_payload_common_validate_api_url)init_logger)OmniRandomMultiModalDatasetc                 C   st   | j dvr
t| |S | jdkr5t| j| jd}|j|| j| j| j	| j
| j| j| j| j| j| j| jd}|S t| |S )N)openai-chat-omniopenai-audio-speechz	random-mm)random_seeddataset_path)	tokenizernum_requests
prefix_lenrange_ratio	input_len
output_lenbase_items_per_requestlimit_mm_per_promptnum_mm_items_range_ratiobucket_configrequest_id_prefixno_oversample)backendget_samples_olddataset_namer   seedr   samplenum_promptsrandom_prefix_lenrandom_range_ratiorandom_input_lenrandom_output_len random_mm_base_items_per_requestrandom_mm_limit_mm_per_prompt"random_mm_num_mm_items_range_ratiorandom_mm_bucket_configr$   r%   )argsr   datasetinput_requests r7   T/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/benchmarks/patch/patch.pyget_samples)   s(   



r9   c                   @   sJ   e Zd ZU dZeed< dZeed< dZeed< dZ	eed< dZ
eed< dS )	MixRequestFuncOutput        
audio_ttfpaudio_durationr   audio_frames	audio_rtftext_latencyN)__name__
__module____qualname__r<   float__annotations__r=   r>   intr?   r@   r7   r7   r7   r8   r:   D   s   
 r:   lastrequest_func_inputsessionpbarmm_position)firstrG   returnc              	      s  | j }t|dd t| |d}| jr| jn| jd|dgd| jdddid	}t||  |d
d}|dkr:tdddt	j
d d}t||  t }	| j|	_d}
d }d}t }||	_|}|}d}z9|j|||d4 I d H }|jdkr~t }|j 2 z3 d H W }| }|sq||}|D ]}|drq|d}|dkr3t }t|}|d }r$|d}|d d d}|dkr|dkr|| }||	_n|	j||  |
|pd7 }
|}|| |	_n6|dkr$|	j dkr|| |	_ || }|dkr$t!"|}t#$t%&|}|d ur$|d u r |}n|| }|d }r3|d d|	_'qq6 || |	_(|
|	_)|d urzt*|d! |	_+|j,}|dkr[t*|j-| |	_.nd|	_.t/0d" |	j+}|dkrr||	j+ |	_1nd|	_1t/0d# d|	_2n
|j3pd|	_4d$|	_2W d   I d H  n1 I d H sw   Y  W n t5y   d$|	_2t67 |	_4t/4d%|	j4  Y nw |r|8d& |	S )'NzOpenAI Chat Completions APIzchat/completions)rK   user)rolecontentr;   Tinclude_usage)modelmessagestemperature
max_tokensstreamstream_optionsresponse_formatwavpcmgpcm response format is not supported yet.         Please use other formats like wav, mp3, etc. instead.application/jsonBearer OPENAI_API_KEYzContent-TypeAuthorization urljsonheaders   :zdata: z[DONE]choicesmodalityr   deltarP   textaudiometricsnum_tokens_out     @@Audio frame width is zeroAudio duration is zeroF'ERROR: send request failed, reason is:    )9api_urlr   r   
model_namerR   r   r   get
ValueErrorosenvironr   r:   
prompt_lentimeperf_counter
start_timepoststatusr   rP   iter_anystrip	add_chunk
startswithremoveprefixrd   loadsttftitlappendr@   r<   base64	b64decoder   	from_fileioBytesIOoutput_tokenslatencygenerated_textlenr=   frame_widthraw_datar>   loggerwarningr?   successreasonerror	Exception	traceback
format_excupdate)rH   rI   rJ   rK   rt   rP   payloadrX   re   outputr   generated_audior   stmost_recent_timestamp	timestampaudio_generate_timeresponsehandlerchunk_bytesrS   messagechunkdatarh   ri   audio_bytessegrm   r   r=   r7   r7   r8   *async_request_openai_chat_omni_completionsM   s   















.




*E

r   c                    s<  | j }t|dd | jr| jn| j| jd}t||  |dd}|dkr*tddd	tj	d
 d}t
||  t }| j|_t }||_z|j|||d4 I dH }	|	jdkr|	 I dH }
t }|| |_|j|_|j|_zAtt|
}t|d |_|j}|dkrt|j| |_nd|_t d |jdkr|j|j |_!nd|_!t d d|_"W n, t#y } zd|_"d| |_$t$d|  W Y d}~nd}~ww |	j%pd|_$d|_"W d  I dH  n1 I dH sw   Y  W n t#y   d|_"t&' |_$t$d|j$  Y nw |r|(d |S )zNon-streaming request to /v1/audio/speech endpoint.

    The endpoint returns raw audio bytes (e.g. WAV). Pass voice, instructions,
    and other TTS-specific fields via ``extra_body``.
    zOpenAI Audio Speech APIzaudio/speech)rR   inputrX   rY   rZ   r[   r\   r]   r^   r_   rb   Nrf   ro   r   rp   rq   TFz Failed to parse audio response: z'ERROR: Failed to parse audio response: ra   rr   rs   ))rt   r   ru   rR   promptr   rv   rw   rx   ry   r   r:   rz   r{   r|   r}   r~   r   readr   r   r<   r   r   r   r   r   r=   r   r   r>   r   r   r?   r   r   r   r   r   r   r   )rH   rI   rJ   rt   r   rX   re   r   r   r   r   end_timeaudio_segmentr   er7   r7   r8   !async_request_openai_audio_speech   sv   







(#

r   r   r   )serve)TaskType calculate_metrics_for_embeddingsget_requestwait_for_endpoint)MultiModalsBenchmarkMetricscalculate_metricsX  	task_typeendpoint_typert   base_urlmodel_idru   r   r6   logprobsrequest_rate
burstinessdisable_tqdmnum_warmupsprofileselected_percentile_metricsselected_percentiles
ignore_eosgoodput_config_dictmax_concurrencylora_modulesextra_headers
extra_bodyramp_up_strategy)linearexponentialramp_up_start_rpsramp_up_end_rpsready_check_timeout_secssl_contextc           B         s  zt | W n ty   td| d w |d ur|nd|v }tj|p&d|p)dddddd|d}tj|dtjd	d
dtd |d j|d j	|d j
|d jf\}}}} | d u sst| tsst| trotdd | D ssJ dt|||||||| |||d|dkrt|dI d H }!|!jstd|!j td ntd |dkrtd| d |rd nt|d
|rt|nt 	g }"	fdd}#t|D ]}$t|# }%|"|% qtj|" I d H }$d ur  td td  rt fddtt|D  |r6td t||||d |||| |||d}&|&dI d H }'|'jr6td  |
d!kr=d"nd#}(|d urXtd$| d% td&| d'| d( ntd)|	  td*|
 d+|( d, td-|  |rvd ntt|d
})|rt|nt fd.d/}*t ! }+g },g }-d0}.|d ur|d ur|}.|-|.t"# $ d1 t%||	|
|||2 zs3 d H W \}/}0|d urt&|0}1|1|.krt"# $ }2t|.d2 |1d2 D ]}3|-|3|2d1 q|1}.|/j|/j	|/j
|/j|/j'f\}4}5}6}7}8||}9}: rt( };|;|;}9}:t|9|:|4||5|6||7||||8d3}<|,t|*|<|)d4 q6 tj|, I d H }=|)d urA|)  t ! |+ }>| t)j*kr_t+||=|>|||| ||	|>d5\}?n	t,|=|>|d6d}?tt-ri d7|>d8j.d9j/d:j0d;j1d<j2d=|rj3nd d>j4d?j5d@j6dAj7dBj8dCdDd |=D dE|?dFdGd |=D dHdId |=D dJdKd |=D dLd |=D j9j:dMn|>j.j0j2j5dNd |=D dOd |=D dP|-r|-dQ< dRt;ffdSdT}@| t)j*krD ]}A|@|A qn|@dU |r8tdV t|||dW |||dX}&|&dI d H }'|'jr8tdY  I d H  S )ZNzUnknown backend: zhttps://r   i,  T<   F)limitlimit_per_hostttl_dns_cacheuse_dns_cachekeepalive_timeoutenable_cleanup_closedforce_closessli`T  )total)	connector	trust_envtimeoutz*Starting initial single prompt test run...c                 s   s    | ]}t |tV  qd S N)
isinstancedict).0itemr7   r7   r8   	<genexpr>s  s    zbenchmark.<locals>.<genexpr>z-multi_modal_data must be a dict or list[dict])rR   ru   r   rt   rz   r   r   multi_modal_contentr   r   r   )timeout_secondsz_Initial test run failed - Please make sure benchmark arguments are correctly specified. Error: zInitial test run completed.zSkipping endpoint ready check.zWarming up with z requests...c                	      sR   4 I d H   dI d H W  d   I d H  S 1 I d H s"w   Y  d S NrH   rI   rJ   r7   r7   )request_funcrI   
test_inputwarmup_pbarwarmup_semaphorer7   r8   warmup_limited_request_func     0z.benchmark.<locals>.warmup_limited_request_funczWarmup run completed.zStarting main benchmark run...c                    s   g | ]}t  qS r7   )randomchoice)r   _)r   r7   r8   
<listcomp>  s    zbenchmark.<locals>.<listcomp>zStarting profiler...z/start_profile)rH   rI   zProfiler startedg      ?zPoisson processzGamma distributionzTraffic ramp-up strategy: .zWill increase RPS from z to z( RPS over the duration of the benchmark.zTraffic request rate: zBurstiness factor: z ()zMaximum request concurrency: c              	      sR   4 I d H   | ||dI d H W  d   I d H  S 1 I d H s"w   Y  d S r   r7   r   )r   	semaphorer7   r8   limited_request_func  r   z'benchmark.<locals>.limited_request_func)rpsr   rs   )rR   ru   r   rt   rz   r   r   r   r   r   r   
request_idr   )r6   outputsdur_sr   r   r   r   r   r   r   benchmark_duration)r   r   r   duration	completedfailedtotal_input_tokenstotal_output_tokensrequest_throughputrequest_goodputoutput_throughputtotal_token_throughputtotal_audio_duration_stotal_audio_framesaudio_throughput
input_lensc                 S      g | ]}|j qS r7   rz   r   r   r7   r7   r8   r   ;      output_lensttftsc                 S   r  r7   )r   r  r7   r7   r8   r   =  r  itlsc                 S   r  r7   )r   r  r7   r7   r8   r   >  r  generated_textsc                 S   r  r7   )r   r  r7   r7   r8   r   ?  r  c                 S   r  r7   r   r  r7   r7   r8   r   @  r  )errorsmax_output_tokens_per_smax_concurrent_requestsc                 S   r  r7   r  r  r7   r7   r8   r   K  r  c                 S   r  r7   r  r  r7   r7   r8   r   L  r  )r   r  r  r  r  r  r  rps_change_eventsmetric_attribute_namec                    s   | vrd S | dk}| dk}d}|rd}n|rd}d|  | }t  |d}||< d|  | }t  |d}||< t  d	|  | D ] \}}	t||krUtt|nt|}
|	d
|
 d|  | < qEd S )Nr?   r=   _ms_sra   mean_r;   median_percentiles_pr   )getattrrF   str)r  is_audio_rtfis_audio_durationsuffixmean_attr_name
mean_valuemedian_attr_namemedian_valuer   valuep_word)rm   resultr   r7   r8   process_one_metricR  s&    z%benchmark.<locals>.process_one_metrice2elzStopping profiler...z/stop_profile)rR   r   rt   rz   r   r   zProfiler stopped)<r   KeyErrorrw   aiohttpTCPConnectorClientSessionClientTimeoutprintr   rz   expected_output_lenmulti_modal_datar   r   listallr   r   r   r   r   asyncio	Semaphore
contextlibnullcontextrangecreate_taskr   gathercloseiterr   r{   r|   r   now	isoformatr   rF   r   nextr   
GENERATIONr   r   r   r  r  total_inputtotal_outputr  r  r  r  r	  r
  r  r  r  r"  )Br   r   rt   r   r   ru   r   r6   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ssl_settingr   test_prompttest_prompt_lentest_output_lentest_mm_contenttest_outputwarmup_tasksr   r   request_taskprofile_inputprofile_outputdistributionrJ   r   benchmark_start_timetasksr  last_int_rpsrequestcurrent_request_ratecurrent_int_rpsr   rps_valr   rz   r   
mm_contentr   req_model_idreq_model_namereq_lora_modulerH   r   r   actual_output_lensr-  metricr7   )
r   rm   r   r,  r   r   rI   r   r   r   r8   	benchmark3  s  







,
	


r`  )NrG   r   )NNNr   N)Er9  r   r;  r   rd   rx   r   r   r{   r   collections.abcr   dataclassesr   r   typingr   r0  pydubr   tqdm.asyncior   transformersr   vllm.benchmarksr	   vllm.benchmarks.datasetsr
   )vllm.benchmarks.lib.endpoint_request_funcr   r   r   r   r   r   r   r   r   vllm.loggerr   rA   r   <vllm_omni.benchmarks.data_modules.random_multi_modal_datasetr   r9   r'   r:   r2  r   r   r   r   vllm.benchmarks.server   r   r   r   $vllm_omni.benchmarks.metrics.metricsr   r   r`  benchmark_oldr"  r7  rF   rD   boolr   
SSLContextr7   r7   r7   r8   <module>   s    ,
 
Q

	




  
T