o
    ie                     @   s  U d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZ ddlmZmZmZ ddlZddlZddlmZ ejddZG d	d
 d
Ze
G dd dZe
G dd dZG dd deZdededeee B ddfddZdeeef deddfddZdeeef deddfddZ dHdedB deeef fdd Z!	dHded!ej"d"edB defd#d$Z#	%dIded&ed' de$eeef  fd(d)Z%		%dJded!ej"d"edB d&ed' def
d*d+Z&	dHded!ej"d"edB defd,d-Z'	dHd!ej"dedeeef deeef d"edB defd.d/Z(	dHded!ej"d"edB defd0d1Z)	dHded!ej"d"edB defd2d3Z*		%dJded!ej"d"edB d&ed' def
d4d5Z+defd6d7Z,defd8d9Z-defd:d;Z.	dHded!ej"d"edB defd<d=Z/	dHded!ej"d"edB defd>d?Z0	dHded!ej"d"edB defd@dAZ1	dHded!ej"d"edB defdBdCZ2e#e#e&e'e)e+e/e0e1e2e*dDZ3eeef e4dE< dFdG e35 D Z6dS )Kz'The request function for API endpoints.    N)	Awaitable)	dataclassfield)AnyLiteralProtocol)tqdmi`T  )totalc                   @   s.   e Zd ZdZdd Zdedee fddZdS )	StreamedResponseHandlerzbHandles streaming HTTP responses by accumulating chunks until complete
    messages are available.c                 C   s
   d| _ d S N )buffer)self r   _/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/benchmarks/lib/endpoint_request_func.py__init__   s   
z StreamedResponseHandler.__init__chunk_bytesreturnc                 C   s   | d}|  j|7  _g }d| jv r-| jdd\}| _| }|r(|| d| jv s| jdrm| jd }|dkrL|| j  d| _|S |rmzt| || j  d| _W |S  tj	yl   Y |S w |S )zLAdd a chunk of bytes to the buffer and return any complete
        messages.utf-8z

   data: [DONE]r   )
decoder   splitstripappend
startswithremoveprefixjsonloadsJSONDecodeError)r   r   	chunk_strmessagesmessagemessage_contentr   r   r   	add_chunk   s4   





z!StreamedResponseHandler.add_chunkN)	__name__
__module____qualname____doc__r   bytesliststrr%   r   r   r   r   r
      s    r
   c                   @   s   e Zd ZU dZeee B ed< eed< eed< eed< eed< dZedB ed< dZ	edB ed	< dZ
edB ed
< dZedB ed< dZeee B dB ed< dZeed< dZedB ed< dZedB ed< dS )RequestFuncInputz#The input for the request function.promptapi_url
prompt_len
output_lenmodelN
model_namelogprobsextra_headers
extra_bodymulti_modal_contentF
ignore_eoslanguage
request_id)r&   r'   r(   r)   r,   r+   __annotations__intr3   r4   r5   dictr6   r7   r8   boolr9   r:   r   r   r   r   r-   ?   s   
 r-   c                   @   s   e Zd ZU dZdZeed< dZeed< dZ	e
ed< dZeed	< dZe
ed
< eedZee
 ed< dZe
ed< dZeed< dZeed< dZe
ed< dS )RequestFuncOutputz5The output of the request function including metrics.r   generated_textFsuccess        latencyr   output_tokensttft)default_factoryitltpotr0   error
start_timeN)r&   r'   r(   r)   r@   r,   r;   rA   r>   rC   floatrD   r<   rE   r   r+   rG   rH   r0   rI   rJ   r   r   r   r   r?   R   s   
 r?   c                
   @   s4   e Zd Z	ddedejdedB dee fddZ	dS )	RequestFuncNrequest_func_inputsessionpbarr   c                 C   s   d S Nr   )r   rM   rN   rO   r   r   r   __call__c   s   zRequestFunc.__call__rP   )
r&   r'   r(   r-   aiohttpClientSessionr   r   r?   rQ   r   r   r   r   rL   b   s    rL   r/   api_nameexpected_suffixesr   c                 C   sB   t |tr|h}h |d}| t|st| d| dd S )Nprofilez URL must end with one of: .)
isinstancer,   endswithtuple
ValueError)r/   rT   rU   r   r   r   _validate_api_urlk   s   
r\   payloadrM   c                 C   s*   |j r|j | d< |jr| |j d S d S )Nr8   )r8   r6   update)r]   rM   r   r   r   _update_payload_commony   s
   
r_   headersc                 C   s(   |j r| |j O } |jr|j| d< d S d S )Nzx-request-id)r5   r:   )r`   rM   r   r   r   _update_headers_common   s
   
ra   content_typec                 C   s2   i }| r| |d< t jd}|rd| |d< |S )NzContent-TypeOPENAI_API_KEYzBearer Authorization)osenvironget)rb   r`   api_keyr   r   r   _get_headers   s   ri   rN   rO   c              	      s@  | j }t|dd | jr| jn| j| jd| j| jdddid}t||  t }t	||  t
 }| j|_d}t }||_|}	z|j|||d4 I d	H }
|
jd
krd}t }|
j 2 zi3 d	H W }| }|siq\||}|D ]T}|drxqp|d}|dkrt|}|d }r|d d}t }|sd}t | }||_n|j||	  |}	||pd7 }qp|d }r|d|_qpq\6 |rd|_nd|_d|_ ||_!|	| |_"n	|
j#pd|_ d|_W d	  I d	H  n1 I d	H sw   Y  W n t$y   d|_t%& }d't(j)| |_ Y nw |r|*d |S )zThe async request function for the OpenAI Completions API.

    Args:
        request_func_input: The input for the request function.
        pbar: The progress bar to display the progress.

    Returns:
        The output of the request function.
    zOpenAI Completions APIcompletionsg      ?Tinclude_usage)r2   r.   repetition_penalty
max_tokensr4   streamstream_optionsr   urlr   r`   N   F:r   r   choicesr   textusagecompletion_tokenszVNever received a valid chunk to calculate TTFT.This response will be marked as failed!r   )+r/   r\   r3   r2   r.   r1   r4   r_   ri   ra   r?   r0   timeperf_counterrJ   poststatusr
   contentiter_anyr   r%   r   r   r   r   rg   rE   rG   r   rD   rA   rI   r@   rC   reason	Exceptionsysexc_infojoin	tracebackformat_exceptionr^   )rM   rN   rO   r/   r]   r`   outputr@   stmost_recent_timestampresponsefirst_chunk_receivedhandlerr   r"   r#   chunkdatart   ru   	timestamprE   rv   r   r   r   r    async_request_openai_completions   s   






((:
r   lastmm_position)firstr   c                 C   sn   d| j dg}g }| jr+| j}t|tr|| j nt|tr'|| j ntd|dkr3|| S || S )Nru   typeru   z@multi_modal_content must be a dict or list[dict] for openai-chatr   )r.   r7   rX   r+   extendr=   r   	TypeError)rM   r   text_contentsmm_contents
mm_contentr   r   r   _get_chat_content  s   

r   c              	      s<  | j }t|dd t| |d}| jr| jn| jd|dg| jdddid}t||  td	}t||  t	 }| j
|_
d
}	d}
t }||_|}z|j|||d4 I d H }|jdkrt }|j 2 zi3 d H W }| }|spqc||}|D ]T}|drqw|d}|dkrt }t|}|d }r|d d d}|
dkr|| }
|
|_n|j||  |	|pd
7 }	n|d }r|d|_|}qwqc6 |	|_d|_|| |_ n	|j!pd
|_"d|_W d   I d H  n1 I d H sw   Y  W n t#y   d|_t$% }d
&t'j(| |_"Y nw |r|)d |S )NzOpenAI Chat Completions APIzchat/completionsr   userroler|   Trk   )r2   r"   max_completion_tokensrn   ro   application/jsonr   rB   rp   rr   rs   r   r   rt   r   deltar|   rv   rw   Fr   )*r/   r\   r   r3   r2   r1   r_   ri   ra   r?   r0   rx   ry   rJ   rz   r{   r
   r|   r}   r   r%   r   r   r   r   rg   rE   rG   r   rD   r@   rA   rC   r~   rI   r   r   r   r   r   r   r^   )rM   rN   rO   r   r/   r|   r]   r`   r   r@   rE   r   r   r   r   r   r"   r#   r   r   r   rt   rv   r   r   r   r   %async_request_openai_chat_completions  s   






$(-
r   c              
      s  dd l  | j}t|dddh d| jdg}| jr| jn| j| jddddd	}t||  t }t	||   fd
d}| j
}t|trGd|vrKtd||d  }	t }
|
jd|	dd | D ]\}}|
|t| qct }| j|_d}d}t }||_|}z|j||
|d4 I d H }|jdkrt }|j 2 zh3 d H W }| }|sq||}|D ]S}|d d}|dkrt }t!"|}|#d }r|d d #d}|dkr|| }||_$n|j%&||  ||pd7 }n|#d }r|#d|_'|}qq6 ||_(d|_)|| |_*n
|j+pd|_,d|_)W d   I d H  n1 I d H s1w   Y  W n t-yQ   d|_)t./ }d0t1j2| |_,Y nw W d    n	1 s]w   Y  |rj|3d |S )Nr   zOpenAI Audio APItranscriptionstranslationsru   r   Ten)r2   r   rn   r9   stream_include_usagestream_continuous_usage_statsc                    s(   t  } j|| |dd |d |S )NWAV)formatr   )ioBytesIOwriteseek)ysrr   	soundfiler   r   to_bytes  s   
z,async_request_openai_audio.<locals>.to_bytesaudioz5multi_modal_content must be a dict containing 'audio'filez	audio/wav)rb   r   rB   )rq   r   r`   rr   r   r   r   rt   r   r|   rv   rw   Fr   )4r   r/   r\   r.   r3   r2   r1   r_   ri   ra   r7   rX   r=   r   rR   FormData	add_fielditemsr,   r?   r0   rx   ry   rJ   rz   r{   r
   r|   r}   r   r%   r   r   r   r   rg   rE   rG   r   rD   r@   rA   rC   r~   rI   r   r   r   r   r   r   r^   )rM   rN   rO   r/   r|   r]   r`   r   mm_audiofformkeyvaluer   r@   rE   r   r   r   r   r   r"   r#   r   r   r   rt   rv   r   r   r   r   async_request_openai_audio{  s   




!*-A
r   c              
      s:  t  }t }||_zn| j|||d4 I d H U}|jdkrZt |  |_|_|dddkr?t	
|jd }|di }	n|	 I d H }
|
di }	d|_d	|_|	d
d|_n	d|_|jpad	|_W d   I d H  n1 I d H ssw   Y  W n ty } zd|_t||_W Y d }~nd }~ww |r|d |S )N)rq   r`   r   rr   encoding_formatrK   r*   metadatarv   Tr   prompt_tokensr   Fr   )r?   rx   ry   rJ   rz   r{   rE   rC   rg   r   r   r`   rA   r@   r0   r~   rI   r   r,   r^   )rN   r/   r]   r`   rO   r   r   r   r   rv   r   er   r   r   _run_pooling_request  s8   
(
r   c                    sd   | j }t|dd | jr| jn| j| jdd}t||  td}t||  t|||||dI d H S )NOpenAI Embeddings API
embeddings)r2   inputtruncate_prompt_tokensr   r]   r`   rO   )	r/   r\   r3   r2   r.   r_   ri   ra   r   rM   rN   rO   r/   r]   r`   r   r   r   async_request_openai_embeddings  s&   
	
r   c                    s   | j }t|dd t| jtrt| jdksJ | jr| jn| j| jd | jdd  dd}td}t	||  t
|||||dI d H S )	NzvLLM score APIrerankr   r   r   )r2   query	documentsr   r   r   )r/   r\   rX   r.   r+   lenr3   r2   ri   ra   r   r   r   r   r   async_request_vllm_rerank+  s,   

r   c                    sv   | j }t|dd t| |d}| jr| jn| jd|dgdd}t||  td}t||  t|||||d	I d H S )
Nr   r   r   r   r   r   )r2   r"   r   r   r   )	r/   r\   r   r3   r2   r_   ri   ra   r   )rM   rN   rO   r   r/   r|   r]   r`   r   r   r   $async_request_openai_embeddings_chatO  s*   

r   c                 C   sB   | j rtd| j }|rzt|dW S  ty   Y d S w d S )Nz(\d+)$r   )r:   researchr<   groupr[   )rM   matchr   r   r   _try_extract_request_idxs  s   r   c                 C   s   | j rd| _d S d S r   )r7   r.   )rM   r   r   r   _preprocess_clip  s   
r   c                 C   sF   | j r!t| }|d u p|d dk}|rd| _d S d| j | _d S d S )N   r   zRepresent the given image.z7Represent the given image with the following question: )r7   r   r.   )rM   request_idxuse_image_only_promptr   r   r   _preprocess_vlm2vec  s   
r   c                       t |  t| ||dI d H S N)rO   )r   r   rM   rN   rO   r   r   r   $async_request_openai_embeddings_clip     r   c                    s    t |  t| ||ddI d H S )Nr   )rO   r   )r   r   r   r   r   r   'async_request_openai_embeddings_vlm2vec  s   r   c                    s   | j }t|dd d| jr| jn| ji}| jr| j|d< n | j}t|ts(J |d }|| d |d< |ddd	 |d
< t	||  t
d}t||  t|||||dI d H S )NzInfinity Embeddings APIr   r2   r   r   rq   _r   r   modalityr   r   )r/   r\   r3   r2   r.   r7   rX   r=   r   r_   ri   ra   r   )rM   rN   rO   r/   r]   r   mm_typer`   r   r   r   !async_request_infinity_embeddings  s2   

r   c                    r   r   )r   r   r   r   r   r   &async_request_infinity_embeddings_clip  r   r   )vllmopenaizopenai-chatzopenai-audiozopenai-embeddingszopenai-embeddings-chatzopenai-embeddings-clipzopenai-embeddings-vlm2veczinfinity-embeddingszinfinity-embeddings-clipzvllm-rerankASYNC_REQUEST_FUNCSc                 C   s    g | ]\}}|t tfv r|qS r   )r   r   ).0kvr   r   r   
<listcomp>  s
    r   rP   )r   )Nr   )7r)   r   r   re   r   rx   r   collections.abcr   dataclassesr   r   typingr   r   r   rR   regexr   tqdm.asyncior   ClientTimeoutAIOHTTP_TIMEOUTr
   r-   r?   rL   r,   setr\   r=   r_   ra   ri   rS   r   r+   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r;   r   OPENAI_COMPATIBLE_BACKENDSr   r   r   r   <module>   s|  )	






 

q

`
q


(
"
'
$


)

