o
    -ºiÕT  ã                   @   s<  U d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlZd dlZd dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@ZA e8eBƒZCe e,B e3B e1B ZDeeEd< G dd„ de&ƒZFG dd „ d e&ƒZGG d!d"„ d"e&ƒZHd#e>fd$d%„ZId&d'„ ZJd(ZKG d)d*„ d*ƒZLd+eMd,eMfd-d.„ZNd/eMd0eOeH d,dfd1d2„ZPd3eMd4eMd5eQd,dfd6d7„ZRd+eMd0eOeH d8eMd,dfd9d:„ZSd;eFd<eMd,eHfd=d>„ZTd;eFd<eMd,eHfd?d@„ZUdAed;eFdBeLd,eHfdCdD„ZVdEdF„ ZWdGedHed,dfdIdJ„ZXdHefdKdL„ZYeBdMkrœeJƒ ZZeC [dNeA¡ eC [dOeZ¡ eZj\rŽeC [dP¡ eeZj]eZj^dQ neC [dR¡ e  _eYeZƒ¡ dS dS )Sé    N)Ú	Namespace)Ú	AwaitableÚCallable)Ú
HTTPStatus)ÚStringIO)ÚAnyÚ	TypeAlias)Ústart_http_server)ÚTypeAdapterÚfield_validator)ÚValidationInfo)Útqdm)ÚAsyncEngineArgsÚoptional_type)ÚEngineClient)ÚRequestLogger)ÚChatCompletionRequestÚChatCompletionResponse)ÚOpenAIServingChat)ÚErrorResponseÚOpenAIBaseModel)ÚBaseModelPath)ÚOpenAIServingModels)ÚEmbeddingRequestÚEmbeddingResponse)ÚOpenAIServingEmbedding)ÚRerankRequestÚRerankResponseÚScoreRequestÚScoreResponse)ÚServingScores)Úinit_logger)ÚReasoningParserManager)Úrandom_uuid)ÚFlexibleArgumentParser)Ú__version__ÚBatchRequestInputBodyc                   @   sT   e Zd ZU dZeed< eed< eed< eed< edddede	d	e
fd
d„ƒƒZdS )ÚBatchRequestInputz‚
    The per-line object of the batch input file.

    NOTE: Currently only the `/v1/chat/completions` endpoint is supported.
    Ú	custom_idÚmethodÚurlÚbodyÚplain)ÚmodeÚvalueÚinfoc                 C   sl   |j d }|dkrt |¡S |dkrttƒ |¡S | d¡r%ttƒ |¡S | d¡r/t |¡S tt	ƒ |¡S )Nr*   ú/v1/chat/completionsú/v1/embeddingsú/scoreú/rerank)
Údatar   Úmodel_validater
   r   Úvalidate_pythonÚendswithr   r   r&   )Úclsr.   r/   r*   © r9   ú^/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/entrypoints/openai/run_batch.pyÚcheck_type_for_urlN   s   




z$BatchRequestInput.check_type_for_urlN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚstrÚ__annotations__r&   r   Úclassmethodr   r   r;   r9   r9   r9   r:   r'   8   s   
 
r'   c                   @   s>   e Zd ZU dZeed< eed< dZee	B e
B eB dB ed< dS )ÚBatchResponseDataéÈ   Ústatus_codeÚ
request_idNr+   )r<   r=   r>   rE   ÚintrA   r@   r+   r   r   r   r   r9   r9   r9   r:   rC   ^   s   
 	úÿþýü
ÿrC   c                   @   s:   e Zd ZU dZeed< eed< edB ed< edB ed< dS )ÚBatchRequestOutputzA
    The per-line object of the batch output and error files
    Úidr(   NÚresponseÚerror)r<   r=   r>   r?   r@   rA   rC   r   r9   r9   r9   r:   rH   o   s   
 rH   Úparserc                 C   sÈ   | j dddtdd | j dddtdd | j d	td d
d | j dttƒddd t | ¡} | j dtd dd | j dddd | j dtddd | j dtddd | j ddddd | j dddd d | S )!Nz-iz--input-fileTz´The path or url to a single input file. Currently supports local file paths, or the http protocol (http or https). If a URL is specified, the file should be available via HTTP GET.)ÚrequiredÚtypeÚhelpz-oz--output-filez¬The path or url to a single output file. Currently supports local file paths, or web (http or https) urls. If a URL is specified, the file should be available via HTTP PUT.z--output-tmp-dirzMThe directory to store the output file before uploading it to the output URL.)rN   ÚdefaultrO   z--response-roleÚ	assistantz@The role name to return if `request.add_generation_prompt=True`.z--max-log-lenz^Max number of prompt characters or prompt ID numbers being printed in log.

Default: Unlimitedz--enable-metricsÚ
store_truezEnable Prometheus metrics)ÚactionrO   z--urlz0.0.0.0zLURL to the Prometheus metrics server (only needed if enable-metrics is set).z--porti@  zUPort number for the Prometheus metrics server (only needed if enable-metrics is set).z--enable-prompt-tokens-detailsFz6If set to True, enable prompt_tokens_details in usage.)rS   rP   rO   z--enable-force-include-usagezZIf set to True, include usage on every request (even when stream_options is not specified))Úadd_argumentr@   r   r   Úadd_cli_argsrG   ©rL   r9   r9   r:   Úmake_arg_parser   sz   û	û	üü
ü	ÿüüüürW   c                  C   s   t dd} t| ƒ ¡ S )Nz$vLLM OpenAI-Compatible batch runner.)Údescription)r$   rW   Ú
parse_argsrV   r9   r9   r:   rY   Ï   s   
rY   z_{desc}: {percentage:3.0f}% Completed | {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]
c                   @   s2   e Zd Zdd„ Zdd„ Zdd„ Zdefdd	„Zd
S )ÚBatchProgressTrackerc                 C   s   d| _ d | _d S )Nr   )Ú_totalÚ_pbar©Úselfr9   r9   r:   Ú__init__Ü   s   
zBatchProgressTracker.__init__c                 C   s   |  j d7  _ d S )Né   )r[   r]   r9   r9   r:   Ú	submittedà   s   zBatchProgressTracker.submittedc                 C   s   | j r
| j  ¡  d S d S ©N)r\   Úupdater]   r9   r9   r:   Ú	completedã   s   ÿzBatchProgressTracker.completedÚreturnc                 C   s:   t j ¡  pt j ¡ dk}t| jddd| td| _| jS )Nr   ÚreqzRunning batché   )ÚtotalÚunitÚdescÚminintervalÚdisableÚ
bar_format)ÚtorchÚdistributedÚis_initializedÚget_rankr   r[   Ú_BAR_FORMATr\   )r^   Úenable_tqdmr9   r9   r:   Úpbarç   s   ÿúzBatchProgressTracker.pbarN)r<   r=   r>   r_   ra   rd   r   rt   r9   r9   r9   r:   rZ   Û   s
    rZ   Úpath_or_urlre   c              
   Ã   sð   |   d¡s|   d¡r\t ¡ 4 I d H š<}| | ¡4 I d H š}| ¡ I d H W  d   ƒI d H  W  d   ƒI d H  S 1 I d H s?w   Y  W d   ƒI d H  d S 1 I d H sUw   Y  d S t| dd}| ¡ W  d   ƒ S 1 sqw   Y  d S )Núhttp://úhttps://úutf-8©Úencoding)Ú
startswithÚaiohttpÚClientSessionÚgetÚtextÚopenÚread)ru   ÚsessionÚrespÚfr9   r9   r:   Ú	read_fileö   s   €&pÿ$ÿr…   Úoutput_pathÚbatch_outputsc                 Ã   sP   t | ddd}|D ]
}t| ¡ |d qW d  ƒ dS 1 s!w   Y  dS )zš
    Write the responses to a local file.
    output_path: The path to write the responses to.
    batch_outputs: The list of batch outputs to write.
    Úwrx   ry   ©ÚfileN)r€   ÚprintÚmodel_dump_json)r†   r‡   r„   Úor9   r9   r:   Úwrite_local_fileÿ   s   €
ÿ"ÿrŽ   Ú
output_urlÚdata_or_fileÚ	from_filec           
      Ã   sÔ  d}d}t d|d ƒD ]Û}z¤tjtjddd4 I dH š‰}|rit|dƒ;}|j| |d4 I dH š}|jd	krDtd
|j› d| ¡ › ƒ‚W d  ƒI dH  n1 I dH sTw   Y  W d  ƒ n1 scw   Y  n3|j| |d4 I dH š}|jd	kr‡td|j› d| ¡ › ƒ‚W d  ƒI dH  n1 I dH s—w   Y  W d  ƒI dH  n1 I dH s¬w   Y  W q tyç }	 z)||k rÏt	 
d||	|¡ t |¡I dH  ntd|› dt|	ƒ› dƒ|	‚W Y d}	~	qd}	~	ww dS )zí
    Upload a local file to a URL.
    output_url: The URL to upload the file to.
    data_or_file: Either the data to upload or the path to the file to upload.
    from_file: If True, data_or_file is the path to the file to upload.
    rg   r`   iè  )rh   )ÚtimeoutNÚrb)r4   rD   zFailed to upload file.
Status: z
Response: zFailed to upload data.
Status: zPFailed to upload data (attempt %d). Error message: %s.
Retrying in %d seconds...zFailed to upload data (attempt z). Error message: Ú.)Úranger|   r}   ÚClientTimeoutr€   ÚputÚstatusÚ	Exceptionr   ÚloggerrK   ÚasyncioÚsleepr@   )
r   r   r‘   Úmax_retriesÚdelayÚattemptr‚   rŠ   rJ   Úer9   r9   r:   Úupload_data  sl   €	
ÿ
ÿþÿ(þ€ÿ€	
ÿþÿ*þ(ó€üÿþþ€øçr¡   Úoutput_tmp_dirc                 Ã   s   |   d¡s|   d¡r€|du rBt d¡ tƒ }|D ]
}t| ¡ |d q| d¡ t d| ¡ t| | ¡  	¡  
d¡d	d
I dH  dS tjdd|ddd*}t d|j¡ t|j|ƒI dH  t d| ¡ t| |jdd
I dH  W d  ƒ dS 1 syw   Y  dS t d| ¡ t| |ƒI dH  dS )a  
    Write batch_outputs to a file or upload to a URL.
    path_or_url: The path or URL to write batch_outputs to.
    batch_outputs: The list of batch outputs to write.
    output_tmp_dir: The directory to store the output file before uploading it
    to the output URL.
    rv   rw   Nz Writing outputs to memory bufferr‰   r   zUploading outputs to %srx   F)r‘   rˆ   Útmp_batch_output_z.jsonl)r-   rz   ÚdirÚprefixÚsuffixz*Writing outputs to temporary local file %sTz Writing outputs to local file %s)r{   rš   r/   r   r‹   rŒ   Úseekr¡   r   ÚstripÚencodeÚtempfileÚNamedTemporaryFileÚnamerŽ   )ru   r‡   r¢   Úoutput_bufferr   r„   r9   r9   r:   Ú
write_fileC  s:   €


ýû"ör®   ÚrequestÚ	error_msgc                 C   s0   t dtƒ › | jttjdtƒ › d|d}|S )Núvllm-úvllm-batch-©rE   rF   ©rI   r(   rJ   rK   )rH   r#   r(   rC   r   ÚBAD_REQUEST)r¯   r°   Úbatch_outputr9   r9   r:   Úmake_error_request_outputl  s   

þù	r·   c                 Ã   s   t | |ƒS rb   )r·   )r¯   r°   r9   r9   r:   Úmake_async_error_request_output{  s   €
r¸   Úserving_engine_funcÚtrackerc                 Ã   s¢   | |j ƒI d H }t|ttttfƒr(tdtƒ › |jt	|dtƒ › dd d}n#t|t
ƒrEtdtƒ › |jt	|jjdtƒ › d|d}nt|dd}| ¡  |S )Nr±   r²   )r+   rF   r´   r³   z'Request must not be sent in stream mode©r°   )r+   Ú
isinstancer   r   r   r   rH   r#   r(   rC   r   rK   Úcoder·   rd   )r¹   r¯   rº   rJ   r¶   r9   r9   r:   Úrun_request  s:   €
þ
ÿú


þù
ÿr¾   c                 C   s@   t  ¡ }| jj }r||vrtd|› dd |¡› dƒ‚d S d S )Nzinvalid reasoning parser: z (chose from { ú,z }))r"   Úlist_registeredÚstructured_outputs_configÚreasoning_parserÚKeyErrorÚjoin)ÚargsÚvalid_reasoning_parsersrÂ   r9   r9   r:   Úvalidate_run_batch_args§  s   
ÿÿÿýrÇ   Úengine_clientrÅ   c                 ƒ   s&  ˆ j d ur
ˆ j }nˆ jg}ˆ jrtˆ jd}nd }‡ fdd„|D ƒ}| j}|  ¡ I d H }t d|¡ t	| |d d}d|v rUt
| |ˆ j|d dˆ jjˆ jˆ jtˆ dd ƒd	
nd }d
|v rdt| ||d ddnd }	d|v ort|jddƒdk}
d
|v sy|
rt| ||d dnd }tƒ }t dˆ j¡ g }tˆ jƒI d H  ¡  d¡D ]Ì}| ¡ }|s¥qœt |¡}|jdkrÔ|d ur¶|jnd }|d u rÆ| t|dd¡ qœ| t|||ƒ¡ |  ¡  qœ|jdkrþ|	d urà|	j!nd }|d u rð| t|dd¡ qœ| t|||ƒ¡ |  ¡  qœ|j "d¡r,|d ur|j#nd }|d u r| t|dd¡ qœ| t|||ƒ¡ |  ¡  qœ|j "d¡rZ|d ur;|j$nd }|d u rL| t|dd¡ qœ| t|||ƒ¡ |  ¡  qœ| t|d|j› dd¡ qœ| %¡  t&j'|Ž I d H }W d   ƒ n	1 sw   Y  t(ˆ j)|ˆ j*ƒI d H  d S )N)Úmax_log_lenc                    s   g | ]	}t |ˆ jd ‘qS ))r¬   Ú
model_path)r   Úmodel)Ú.0r¬   ©rÅ   r9   r:   Ú
<listcomp>À  s    ÿzrun_batch.<locals>.<listcomp>zSupported tasks: %s)rÈ   Úbase_model_pathsÚlora_modulesÚgenerateÚautoÚdefault_chat_template_kwargs)Úrequest_loggerÚchat_templateÚchat_template_content_formatrÂ   Úenable_prompt_tokens_detailsÚenable_force_include_usagerÓ   Úembed)rÔ   rÕ   rÖ   ÚclassifyÚ
num_labelsr   r`   )rÔ   Úscore_templatezReading batch from %s...Ú
r0   z/The model does not support Chat Completions APIr»   r1   z)The model does not support Embeddings APIr2   z%The model does not support Scores APIr3   z%The model does not support Rerank APIzURL z¥ was used. Supported endpoints: /v1/chat/completions, /v1/embeddings, /score, /rerank .See vllm/entrypoints/openai/api_server.py for supported score/rerank versions.)+Úserved_model_namerË   Úenable_log_requestsr   rÉ   Úmodel_configÚget_supported_tasksrš   r/   r   r   Úresponse_rolerÁ   rÂ   r×   rØ   Úgetattrr   Ú	hf_configr    rZ   Ú
input_filer…   r¨   Úsplitr'   Úmodel_validate_jsonr*   Úcreate_chat_completionÚappendr¸   r¾   ra   Úcreate_embeddingr7   Úcreate_scoreÚ	do_rerankrt   r›   Úgatherr®   Úoutput_filer¢   )rÈ   rÅ   Úserved_model_namesrÔ   rÏ   rà   Úsupported_tasksÚopenai_serving_modelsÚopenai_serving_chatÚopenai_serving_embeddingÚenable_serving_rerankingÚopenai_serving_scoresrº   Úresponse_futuresÚrequest_jsonr¯   Úchat_handler_fnÚembed_handler_fnÚscore_handler_fnÚrerank_handler_fnÚ	responsesr9   rÍ   r:   Ú	run_batch²  s  €

ÿýòÿöðùû÷þúüø

ÿýþÿ

ÿýþÿ

ÿý
þÿ

ÿý
þÿ
þÿ
ÿrý   c              	   Ã   s|   ddl m} ddlm} t| ƒ || |jdd4 I d H š}t|| ƒI d H  W d   ƒI d H  d S 1 I d H s7w   Y  d S )Nr   )Úbuild_async_engine_client)ÚUsageContextF)Úusage_contextÚ disable_frontend_multiprocessing)Ú"vllm.entrypoints.openai.api_serverrþ   Úvllm.usage.usage_librÿ   rÇ   ÚOPENAI_BATCH_RUNNERrý   )rÅ   rþ   rÿ   rÈ   r9   r9   r:   Úmainb  s   €ý.ûr  Ú__main__z$vLLM batch processing API version %szargs: %szPrometheus metrics enabled)ÚportÚaddrzPrometheus metrics disabled)`r›   rª   Úargparser   Úcollections.abcr   r   Úhttpr   Úior   Útypingr   r   r|   rn   Úprometheus_clientr	   Úpydanticr
   r   Úpydantic_core.core_schemar   r   Úvllm.engine.arg_utilsr   r   Úvllm.engine.protocolr   Úvllm.entrypoints.loggerr   Ú0vllm.entrypoints.openai.chat_completion.protocolr   r   Ú/vllm.entrypoints.openai.chat_completion.servingr   Ú'vllm.entrypoints.openai.engine.protocolr   r   Ú'vllm.entrypoints.openai.models.protocolr   Ú&vllm.entrypoints.openai.models.servingr   Ú'vllm.entrypoints.pooling.embed.protocolr   r   Ú&vllm.entrypoints.pooling.embed.servingr   Ú'vllm.entrypoints.pooling.score.protocolr   r   r   r   Ú&vllm.entrypoints.pooling.score.servingr    Úvllm.loggerr!   Úvllm.reasoningr"   Ú
vllm.utilsr#   Úvllm.utils.argparse_utilsr$   Úvllm.versionr%   ÚVLLM_VERSIONr<   rš   r&   rA   r'   rC   rH   rW   rY   rr   rZ   r@   r…   ÚlistrŽ   Úboolr¡   r®   r·   r¸   r¾   rÇ   rý   r  rÅ   r/   Úenable_metricsr  r*   Úrunr9   r9   r9   r:   Ú<module>   sÊ   

ÿ&N		ÿÿ
þ5ÿÿÿ
þ)ÿÿ
þÿÿ
þÿþý
ü&ÿþ
ý 1


ò