o
    -if                     @   s|  U d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZ ddlmZmZmZ ddlZddlZddlmZ ejddZG d	d
 d
Ze
G dd dZe
G dd dZG dd deZdededeee B ddfddZdeeef deddfddZdeeef deddfddZ 	dEdedej!dedB defd d!Z"	"dFded#ed$ de#eeef  fd%d&Z$		"dGdedej!dedB d#ed$ def
d'd(Z%	dEdedej!dedB defd)d*Z&	dEdej!dedeeef deeef dedB defd+d,Z'	dEdedej!dedB defd-d.Z(	dEdedej!dedB defd/d0Z)		"dGdedej!dedB d#ed$ def
d1d2Z*defd3d4Z+defd5d6Z,defd7d8Z-	dEdedej!dedB defd9d:Z.	dEdedej!dedB defd;d<Z/	dEdedej!dedB defd=d>Z0	dEdedej!dedB defd?d@Z1e"e"e%e&e(e*e.e/e0e1e)dAZ2eeef e3dB< dCdD e24 D Z5dS )Hz'The request function for API endpoints.    N)	Awaitable)	dataclassfield)AnyLiteralProtocol)tqdmi`T  )totalc                   @   s.   e Zd ZdZdd Zdedee fddZdS )	StreamedResponseHandlerzbHandles streaming HTTP responses by accumulating chunks until complete
    messages are available.c                 C   s
   d| _ d S N )buffer)self r   f/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/benchmarks/lib/endpoint_request_func.py__init__   s   
z StreamedResponseHandler.__init__chunk_bytesreturnc                 C   s   | d}|  j|7  _g }d| jv r-| jdd\}| _| }|r(|| d| jv s| jdrm| jd }|dkrL|| j  d| _|S |rmzt| || j  d| _W |S  tj	yl   Y |S w |S )zLAdd a chunk of bytes to the buffer and return any complete
        messages.utf-8z

   data: [DONE]r   )
decoder   splitstripappend
startswithremoveprefixjsonloadsJSONDecodeError)r   r   	chunk_strmessagesmessagemessage_contentr   r   r   	add_chunk   s4   





z!StreamedResponseHandler.add_chunkN)	__name__
__module____qualname____doc__r   bytesliststrr%   r   r   r   r   r
      s    r
   c                   @   s   e Zd ZU dZeee B ed< eed< eed< eed< eed< dZedB ed< dZ	edB ed	< dZ
edB ed
< dZedB ed< dZeee B dB ed< dZeed< dZedB ed< dZedB ed< dS )RequestFuncInputz#The input for the request function.promptapi_url
prompt_len
output_lenmodelN
model_namelogprobsextra_headers
extra_bodymulti_modal_contentF
ignore_eoslanguage
request_id)r&   r'   r(   r)   r,   r+   __annotations__intr3   r4   r5   dictr6   r7   r8   boolr9   r:   r   r   r   r   r-   ?   s   
 r-   c                   @   s   e Zd ZU dZdZeed< dZeed< dZ	e
ed< dZeed	< dZe
ed
< eedZee
 ed< dZe
ed< dZeed< dZeed< dZe
ed< dS )RequestFuncOutputz5The output of the request function including metrics.r   generated_textFsuccess        latencyr   output_tokensttft)default_factoryitltpotr0   error
start_timeN)r&   r'   r(   r)   r@   r,   r;   rA   r>   rC   floatrD   r<   rE   r   r+   rG   rH   r0   rI   rJ   r   r   r   r   r?   R   s   
 r?   c                
   @   s4   e Zd Z	ddedejdedB dee fddZ	dS )	RequestFuncNrequest_func_inputsessionpbarr   c                 C   s   d S Nr   )r   rM   rN   rO   r   r   r   __call__c   s   zRequestFunc.__call__rP   )
r&   r'   r(   r-   aiohttpClientSessionr   r   r?   rQ   r   r   r   r   rL   b   s    rL   r/   api_nameexpected_suffixesr   c                 C   sB   t |tr|h}h |d}| t|st| d| dd S )Nprofilez URL must end with one of: .)
isinstancer,   endswithtuple
ValueError)r/   rT   rU   r   r   r   _validate_api_urlk   s   
r\   payloadrM   c                 C   s*   |j r|j | d< |jr| |j d S d S )Nr8   )r8   r6   update)r]   rM   r   r   r   _update_payload_commony   s
   
r_   headersc                 C   s(   |j r| |j O } |jr|j| d< d S d S )Nzx-request-id)r5   r:   )r`   rM   r   r   r   _update_headers_common   s
   
ra   rN   rO   c              	      sP  | j }t|dd | jr| jn| j| jd| j| jdddid}t||  ddtj	
d	 i}t||  t }| j|_d
}t }||_|}	z|j|||d4 I dH }
|
jdkrd}t }|
j 2 zi3 dH W }| }|sqqd||}|D ]T}|drqx|d}|dkrt|}|
d }r|d 
d}t }|sd}t | }||_n|j||	  |}	||pd
7 }qx|
d }r|
d|_qxqd6 |rd|_ nd|_ d|_!||_"|	| |_#n	|
j$pd
|_!d|_ W d  I dH  n1 I dH sw   Y  W n t%y   d|_ t&' }d
(t)j*| |_!Y nw |r&|+d |S )zThe async request function for the OpenAI Completions API.

    Args:
        request_func_input: The input for the request function.
        pbar: The progress bar to display the progress.

    Returns:
        The output of the request function.
    zOpenAI Completions APIcompletionsg      ?Tinclude_usage)r2   r.   repetition_penalty
max_tokensr4   streamstream_optionsAuthorizationBearer OPENAI_API_KEYr   urlr   r`   N   F:r   r   choicesr   textusagecompletion_tokenszVNever received a valid chunk to calculate TTFT.This response will be marked as failed!r   ),r/   r\   r3   r2   r.   r1   r4   r_   osenvirongetra   r?   r0   timeperf_counterrJ   poststatusr
   contentiter_anyr   r%   r   r   r   r   rE   rG   r   rD   rA   rI   r@   rC   reason	Exceptionsysexc_infojoin	tracebackformat_exceptionr^   )rM   rN   rO   r/   r]   r`   outputr@   stmost_recent_timestampresponsefirst_chunk_receivedhandlerr   r"   r#   chunkdataro   rp   	timestamprE   rq   r   r   r   r    async_request_openai_completions   s   






((:
r   lastmm_position)firstr   c                 C   sn   d| j dg}g }| jr+| j}t|tr|| j nt|tr'|| j ntd|dkr3|| S || S )Nrp   typerp   z@multi_modal_content must be a dict or list[dict] for openai-chatr   )r.   r7   rX   r+   extendr=   r   	TypeError)rM   r   text_contentsmm_contents
mm_contentr   r   r   _get_chat_content   s   

r   c              	      sL  | j }t|dd t| |d}| jr| jn| jd|dg| jdddid}t||  d	d
tj	d d}t
||  t }| j|_d}	d}
t }||_|}z|j|||d4 I d H }|jdkrt }|j 2 zi3 d H W }| }|sxqk||}|D ]T}|drq|d}|dkrt }t|}|	d }r|d d 	d}|
dkr|| }
|
|_n|j||  |	|pd7 }	n|	d }r|	d|_|}qqk6 |	|_d|_ || |_!n	|j"pd|_#d|_ W d   I d H  n1 I d H sw   Y  W n t$y   d|_ t%& }d't(j)| |_#Y nw |r$|*d |S )NzOpenAI Chat Completions APIzchat/completionsr   userrolerz   Trc   )r2   r"   max_completion_tokensrf   rg   application/jsonri   rj   zContent-Typerh   r   rB   rk   rm   rn   r   r   ro   r   deltarz   rq   rr   Fr   )+r/   r\   r   r3   r2   r1   r_   rs   rt   ru   ra   r?   r0   rv   rw   rJ   rx   ry   r
   rz   r{   r   r%   r   r   r   r   rE   rG   r   rD   r@   rA   rC   r|   rI   r}   r~   r   r   r   r   r^   )rM   rN   rO   r   r/   rz   r]   r`   r   r@   rE   r   r   r   r   r   r"   r#   r   r   r   ro   rq   r   r   r   r   %async_request_openai_chat_completions  s   






$(-
r   c              
      s  dd l  | j}t|dddh d| jdg}| jr| jn| j| jddddd	}t||  d
dtj	
d i}t||   fdd}| j}t|trOd|vrStd||d  }	t }
|
jd|	dd | D ]\}}|
|t| qkt }| j|_d}d}t }||_|}z|j||
|d4 I d H }|jdkrt }|j 2 zh3 d H W }| }|sq| |}|D ]S}|!d"d}|dkrt }t#$|}|
d }r|d d 
d}|dkr|| }||_%n|j&'||  ||pd7 }n|
d }r|
d|_(|}qq6 ||_)d|_*|| |_+n
|j,p#d|_-d |_*W d   I d H  n1 I d H s9w   Y  W n t.yY   d |_*t/0 }d1t2j3| |_-Y nw W d    n	1 sew   Y  |rr|4d! |S )"Nr   zOpenAI Audio APItranscriptionstranslationsrp   r   Ten)r2   r   rf   r9   stream_include_usagestream_continuous_usage_statsrh   ri   rj   c                    s(   t  } j|| |dd |d |S )NWAV)formatr   )ioBytesIOwriteseek)ysrr   	soundfiler   r   to_bytes  s   
z,async_request_openai_audio.<locals>.to_bytesaudioz5multi_modal_content must be a dict containing 'audio'filez	audio/wav)content_typer   rB   )rl   r   r`   rm   r   r   r   ro   r   rz   rq   rr   Fr   )5r   r/   r\   r.   r3   r2   r1   r_   rs   rt   ru   ra   r7   rX   r=   r   rR   FormData	add_fielditemsr,   r?   r0   rv   rw   rJ   rx   ry   r
   rz   r{   r   r%   r   r   r   r   rE   rG   r   rD   r@   rA   rC   r|   rI   r}   r~   r   r   r   r   r^   )rM   rN   rO   r/   rz   r]   r`   r   mm_audiofformkeyvaluer   r@   rE   r   r   r   r   r   r"   r#   r   r   r   ro   rq   r   r   r   r   async_request_openai_audiov  s   




!*-A
r   c              
      s:  t  }t }||_zn| j|||d4 I d H U}|jdkrZt |  |_|_|dddkr?t	
|jd }|di }	n|	 I d H }
|
di }	d|_d	|_|	d
d|_n	d|_|jpad	|_W d   I d H  n1 I d H ssw   Y  W n ty } zd|_t||_W Y d }~nd }~ww |r|d |S )N)rl   r`   r   rm   encoding_formatrK   r*   metadatarq   Tr   prompt_tokensr   Fr   )r?   rv   rw   rJ   rx   ry   rE   rC   ru   r   r   r`   rA   r@   r0   r|   rI   r}   r,   r^   )rN   r/   r]   r`   rO   r   r   r   r   rq   r   er   r   r   _run_pooling_request  s8   
(
r   c                    st   | j }t|dd | jr| jn| j| jdd}t||  ddtjd d}t	||  t
|||||d	I d H S )
NOpenAI Embeddings API
embeddings)r2   inputtruncate_prompt_tokensr   ri   rj   r   r]   r`   rO   )r/   r\   r3   r2   r.   r_   rs   rt   ru   ra   r   rM   rN   rO   r/   r]   r`   r   r   r   async_request_openai_embeddings	  s*   
	
r   c                    s   | j }t|dd t| jtrt| jdksJ | jr| jn| j| jd | jdd  dd}ddtj	
d	 d
}t||  t|||||dI d H S )NzvLLM score APIrerankr   r   r   )r2   query	documentsr   r   ri   rj   r   r   )r/   r\   rX   r.   r+   lenr3   r2   rs   rt   ru   ra   r   r   r   r   r   async_request_vllm_rerank+  s0   

r   c                    s   | j }t|dd t| |d}| jr| jn| jd|dgdd}t||  dd	tjd
 d}t	||  t
|||||dI d H S )Nr   r   r   r   r   r   )r2   r"   r   r   ri   rj   r   r   )r/   r\   r   r3   r2   r_   rs   rt   ru   ra   r   )rM   rN   rO   r   r/   rz   r]   r`   r   r   r   $async_request_openai_embeddings_chatR  s.   

r   c                 C   sB   | j rtd| j }|rzt|dW S  ty   Y d S w d S )Nz(\d+)$r   )r:   researchr<   groupr[   )rM   matchr   r   r   _try_extract_request_idxy  s   r   c                 C   s   | j rd| _d S d S r   )r7   r.   )rM   r   r   r   _preprocess_clip  s   
r   c                 C   sF   | j r!t| }|d u p|d dk}|rd| _d S d| j | _d S d S )N   r   zRepresent the given image.z7Represent the given image with the following question: )r7   r   r.   )rM   request_idxuse_image_only_promptr   r   r   _preprocess_vlm2vec  s   
r   c                       t |  t| ||dI d H S N)rO   )r   r   rM   rN   rO   r   r   r   $async_request_openai_embeddings_clip     r   c                    s    t |  t| ||ddI d H S )Nr   )rO   r   )r   r   r   r   r   r   'async_request_openai_embeddings_vlm2vec  s   r   c                    s   | j }t|dd d| jr| jn| ji}| jr| j|d< n | j}t|ts(J |d }|| d |d< |ddd	 |d
< t	||  ddt
jd d}t||  t|||||dI d H S )NzInfinity Embeddings APIr   r2   r   r   rl   _r   r   modalityr   ri   rj   r   r   )r/   r\   r3   r2   r.   r7   rX   r=   r   r_   rs   rt   ru   ra   r   )rM   rN   rO   r/   r]   r   mm_typer`   r   r   r   !async_request_infinity_embeddings  s6   

r   c                    r   r   )r   r   r   r   r   r   &async_request_infinity_embeddings_clip  r   r   )vllmopenaizopenai-chatzopenai-audiozopenai-embeddingszopenai-embeddings-chatzopenai-embeddings-clipzopenai-embeddings-vlm2veczinfinity-embeddingszinfinity-embeddings-clipzvllm-rerankASYNC_REQUEST_FUNCSc                 C   s    g | ]\}}|t tfv r|qS r   )r   r   ).0kvr   r   r   
<listcomp>  s
    r   rP   )r   )Nr   )6r)   r   r   rs   r~   rv   r   collections.abcr   dataclassesr   r   typingr   r   r   rR   regexr   tqdm.asyncior   ClientTimeoutAIOHTTP_TIMEOUTr
   r-   r?   rL   r,   setr\   r=   r_   ra   rS   r   r+   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r;   r   OPENAI_COMPATIBLE_BACKENDSr   r   r   r   <module>   sz  )	







s

c
s


(
%
*
'


,

