o
    -iT                     @   s<  U d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlZd dlZd dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@ZA e8eBZCe e,B e3B e1B ZDeeEd< G dd de&ZFG dd  d e&ZGG d!d" d"e&ZHd#e>fd$d%ZId&d' ZJd(ZKG d)d* d*ZLd+eMd,eMfd-d.ZNd/eMd0eOeH d,dfd1d2ZPd3eMd4eMd5eQd,dfd6d7ZRd+eMd0eOeH d8eMd,dfd9d:ZSd;eFd<eMd,eHfd=d>ZTd;eFd<eMd,eHfd?d@ZUdAed;eFdBeLd,eHfdCdDZVdEdF ZWdGedHed,dfdIdJZXdHefdKdLZYeBdMkreJ ZZeC[dNeA eC[dOeZ eZj\reC[dP eeZj]eZj^dQ neC[dR e _eYeZ dS dS )S    N)	Namespace)	AwaitableCallable)
HTTPStatus)StringIO)Any	TypeAlias)start_http_server)TypeAdapterfield_validator)ValidationInfo)tqdm)AsyncEngineArgsoptional_type)EngineClient)RequestLogger)ChatCompletionRequestChatCompletionResponse)OpenAIServingChat)ErrorResponseOpenAIBaseModel)BaseModelPath)OpenAIServingModels)EmbeddingRequestEmbeddingResponse)OpenAIServingEmbedding)RerankRequestRerankResponseScoreRequestScoreResponse)ServingScores)init_logger)ReasoningParserManager)random_uuid)FlexibleArgumentParser)__version__BatchRequestInputBodyc                   @   sT   e Zd ZU dZeed< eed< eed< eed< edddede	d	e
fd
dZdS )BatchRequestInputz
    The per-line object of the batch input file.

    NOTE: Currently only the `/v1/chat/completions` endpoint is supported.
    	custom_idmethodurlbodyplain)modevalueinfoc                 C   sl   |j d }|dkrt|S |dkrtt|S |dr%tt|S |dr/t|S tt	|S )Nr*   /v1/chat/completions/v1/embeddings/score/rerank)
datar   model_validater
   r   validate_pythonendswithr   r   r&   )clsr.   r/   r*    r9   ^/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/entrypoints/openai/run_batch.pycheck_type_for_urlN   s   




z$BatchRequestInput.check_type_for_urlN)__name__
__module____qualname____doc__str__annotations__r&   r   classmethodr   r   r;   r9   r9   r9   r:   r'   8   s   
 
r'   c                   @   s>   e Zd ZU dZeed< eed< dZee	B e
B eB dB ed< dS )BatchResponseData   status_code
request_idNr+   )r<   r=   r>   rE   intrA   r@   r+   r   r   r   r   r9   r9   r9   r:   rC   ^   s   
 	
rC   c                   @   s:   e Zd ZU dZeed< eed< edB ed< edB ed< dS )BatchRequestOutputzA
    The per-line object of the batch output and error files
    idr(   Nresponseerror)r<   r=   r>   r?   r@   rA   rC   r   r9   r9   r9   r:   rH   o   s   
 rH   parserc                 C   s   | j dddtdd | j dddtdd | j d	td d
d | j dttddd t| } | j dtd dd | j dddd | j dtddd | j dtddd | j ddddd | j dddd d | S )!Nz-iz--input-fileTzThe path or url to a single input file. Currently supports local file paths, or the http protocol (http or https). If a URL is specified, the file should be available via HTTP GET.)requiredtypehelpz-oz--output-filezThe path or url to a single output file. Currently supports local file paths, or web (http or https) urls. If a URL is specified, the file should be available via HTTP PUT.z--output-tmp-dirzMThe directory to store the output file before uploading it to the output URL.)rN   defaultrO   z--response-role	assistantz@The role name to return if `request.add_generation_prompt=True`.z--max-log-lenz^Max number of prompt characters or prompt ID numbers being printed in log.

Default: Unlimitedz--enable-metrics
store_truezEnable Prometheus metrics)actionrO   z--urlz0.0.0.0zLURL to the Prometheus metrics server (only needed if enable-metrics is set).z--porti@  zUPort number for the Prometheus metrics server (only needed if enable-metrics is set).z--enable-prompt-tokens-detailsFz6If set to True, enable prompt_tokens_details in usage.)rS   rP   rO   z--enable-force-include-usagezZIf set to True, include usage on every request (even when stream_options is not specified))add_argumentr@   r   r   add_cli_argsrG   rL   r9   r9   r:   make_arg_parser   sz   		
	rW   c                  C   s   t dd} t|  S )Nz$vLLM OpenAI-Compatible batch runner.)description)r$   rW   
parse_argsrV   r9   r9   r:   rY      s   
rY   z_{desc}: {percentage:3.0f}% Completed | {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]
c                   @   s2   e Zd Zdd Zdd Zdd Zdefdd	Zd
S )BatchProgressTrackerc                 C   s   d| _ d | _d S )Nr   )_total_pbarselfr9   r9   r:   __init__   s   
zBatchProgressTracker.__init__c                 C   s   |  j d7  _ d S )N   )r[   r]   r9   r9   r:   	submitted   s   zBatchProgressTracker.submittedc                 C   s   | j r
| j   d S d S N)r\   updater]   r9   r9   r:   	completed   s   zBatchProgressTracker.completedreturnc                 C   s:   t j  pt j dk}t| jddd| td| _| jS )Nr   reqzRunning batch   )totalunitdescminintervaldisable
bar_format)torchdistributedis_initializedget_rankr   r[   _BAR_FORMATr\   )r^   enable_tqdmr9   r9   r:   pbar   s   zBatchProgressTracker.pbarN)r<   r=   r>   r_   ra   rd   r   rt   r9   r9   r9   r:   rZ      s
    rZ   path_or_urlre   c              
      s   |  ds|  dr\t 4 I d H <}|| 4 I d H }| I d H W  d   I d H  W  d   I d H  S 1 I d H s?w   Y  W d   I d H  d S 1 I d H sUw   Y  d S t| dd}| W  d    S 1 sqw   Y  d S )Nhttp://https://utf-8encoding)
startswithaiohttpClientSessiongettextopenread)ru   sessionrespfr9   r9   r:   	read_file   s   &p$r   output_pathbatch_outputsc                    sP   t | ddd}|D ]
}t| |d qW d   dS 1 s!w   Y  dS )z
    Write the responses to a local file.
    output_path: The path to write the responses to.
    batch_outputs: The list of batch outputs to write.
    wrx   ry   fileN)r   printmodel_dump_json)r   r   r   or9   r9   r:   write_local_file   s   
"r   
output_urldata_or_file	from_filec           
         s  d}d}t d|d D ]}ztjtjddd4 I dH }|rit|d;}|j| |d4 I dH }|jd	krDtd
|j d|  W d  I dH  n1 I dH sTw   Y  W d   n1 scw   Y  n3|j| |d4 I dH }|jd	krtd|j d|  W d  I dH  n1 I dH sw   Y  W d  I dH  n1 I dH sw   Y  W q ty }	 z)||k rt	
d||	| t|I dH  ntd| dt|	 d|	W Y d}	~	qd}	~	ww dS )z
    Upload a local file to a URL.
    output_url: The URL to upload the file to.
    data_or_file: Either the data to upload or the path to the file to upload.
    from_file: If True, data_or_file is the path to the file to upload.
    rg   r`   i  )rh   )timeoutNrb)r4   rD   zFailed to upload file.
Status: z
Response: zFailed to upload data.
Status: zPFailed to upload data (attempt %d). Error message: %s.
Retrying in %d seconds...zFailed to upload data (attempt z). Error message: .)ranger|   r}   ClientTimeoutr   putstatus	Exceptionr   loggerrK   asynciosleepr@   )
r   r   r   max_retriesdelayattemptr   r   rJ   er9   r9   r:   upload_data  sl   	

(	
*(r   output_tmp_dirc                    s   |  ds|  dr|du rBtd t }|D ]
}t| |d q|d td|  t| | 	 
dd	d
I dH  dS tjdd|ddd*}td|j t|j|I dH  td|  t| |jdd
I dH  W d   dS 1 syw   Y  dS td|  t| |I dH  dS )a  
    Write batch_outputs to a file or upload to a URL.
    path_or_url: The path or URL to write batch_outputs to.
    batch_outputs: The list of batch outputs to write.
    output_tmp_dir: The directory to store the output file before uploading it
    to the output URL.
    rv   rw   Nz Writing outputs to memory bufferr   r   zUploading outputs to %srx   F)r   r   tmp_batch_output_z.jsonl)r-   rz   dirprefixsuffixz*Writing outputs to temporary local file %sTz Writing outputs to local file %s)r{   r   r/   r   r   r   seekr   r   stripencodetempfileNamedTemporaryFilenamer   )ru   r   r   output_bufferr   r   r9   r9   r:   
write_fileC  s:   


"r   request	error_msgc                 C   s0   t dt  | jttjdt  d|d}|S )Nvllm-vllm-batch-rE   rF   rI   r(   rJ   rK   )rH   r#   r(   rC   r   BAD_REQUEST)r   r   batch_outputr9   r9   r:   make_error_request_outputl  s   

	r   c                    s   t | |S rb   )r   )r   r   r9   r9   r:   make_async_error_request_output{  s   
r   serving_engine_functrackerc                    s   | |j I d H }t|ttttfr(tdt  |jt	|dt  dd d}n#t|t
rEtdt  |jt	|jjdt  d|d}nt|dd}|  |S )Nr   r   )r+   rF   r   r   z'Request must not be sent in stream moder   )r+   
isinstancer   r   r   r   rH   r#   r(   rC   r   rK   coder   rd   )r   r   r   rJ   r   r9   r9   r:   run_request  s:   





r   c                 C   s@   t  }| jj }r||vrtd| dd| dd S d S )Nzinvalid reasoning parser: z (chose from { ,z }))r"   list_registeredstructured_outputs_configreasoning_parserKeyErrorjoin)argsvalid_reasoning_parsersr   r9   r9   r:   validate_run_batch_args  s   
r   engine_clientr   c                    s&   j d ur
 j }n jg} jrt jd}nd } fdd|D }| j}|  I d H }td| t	| |d d}d|v rUt
| | j|d d jj j jt dd d	
nd }d
|v rdt| ||d ddnd }	d|v ort|jdddk}
d
|v sy|
rt| ||d dnd }t }td j g }t jI d H  dD ]}| }|sqt|}|jdkr|d ur|jnd }|d u r|t|dd q|t||| |   q|jdkr|	d ur|	j!nd }|d u r|t|dd q|t||| |   q|j"dr,|d ur|j#nd }|d u r|t|dd q|t||| |   q|j"drZ|d ur;|j$nd }|d u rL|t|dd q|t||| |   q|t|d|j dd q|%  t&j'| I d H }W d    n	1 sw   Y  t( j)| j*I d H  d S )N)max_log_lenc                    s   g | ]	}t | jd qS ))r   
model_path)r   model).0r   r   r9   r:   
<listcomp>  s    zrun_batch.<locals>.<listcomp>zSupported tasks: %s)r   base_model_pathslora_modulesgenerateautodefault_chat_template_kwargs)request_loggerchat_templatechat_template_content_formatr   enable_prompt_tokens_detailsenable_force_include_usager   embed)r   r   r   classify
num_labelsr   r`   )r   score_templatezReading batch from %s...
r0   z/The model does not support Chat Completions APIr   r1   z)The model does not support Embeddings APIr2   z%The model does not support Scores APIr3   z%The model does not support Rerank APIzURL z was used. Supported endpoints: /v1/chat/completions, /v1/embeddings, /score, /rerank .See vllm/entrypoints/openai/api_server.py for supported score/rerank versions.)+served_model_namer   enable_log_requestsr   r   model_configget_supported_tasksr   r/   r   r   response_roler   r   r   r   getattrr   	hf_configr    rZ   
input_filer   r   splitr'   model_validate_jsonr*   create_chat_completionappendr   r   ra   create_embeddingr7   create_score	do_rerankrt   r   gatherr   output_filer   )r   r   served_model_namesr   r   r   supported_tasksopenai_serving_modelsopenai_serving_chatopenai_serving_embeddingenable_serving_rerankingopenai_serving_scoresr   response_futuresrequest_jsonr   chat_handler_fnembed_handler_fnscore_handler_fnrerank_handler_fn	responsesr9   r   r:   	run_batch  s  













r   c              	      s|   ddl m} ddlm} t|  || |jdd4 I d H }t|| I d H  W d   I d H  d S 1 I d H s7w   Y  d S )Nr   )build_async_engine_client)UsageContextF)usage_context disable_frontend_multiprocessing)"vllm.entrypoints.openai.api_serverr   vllm.usage.usage_libr   r   OPENAI_BATCH_RUNNERr   )r   r   r   r   r9   r9   r:   mainb  s   .r  __main__z$vLLM batch processing API version %szargs: %szPrometheus metrics enabled)portaddrzPrometheus metrics disabled)`r   r   argparser   collections.abcr   r   httpr   ior   typingr   r   r|   rn   prometheus_clientr	   pydanticr
   r   pydantic_core.core_schemar   r   vllm.engine.arg_utilsr   r   vllm.engine.protocolr   vllm.entrypoints.loggerr   0vllm.entrypoints.openai.chat_completion.protocolr   r   /vllm.entrypoints.openai.chat_completion.servingr   'vllm.entrypoints.openai.engine.protocolr   r   'vllm.entrypoints.openai.models.protocolr   &vllm.entrypoints.openai.models.servingr   'vllm.entrypoints.pooling.embed.protocolr   r   &vllm.entrypoints.pooling.embed.servingr   'vllm.entrypoints.pooling.score.protocolr   r   r   r   &vllm.entrypoints.pooling.score.servingr    vllm.loggerr!   vllm.reasoningr"   
vllm.utilsr#   vllm.utils.argparse_utilsr$   vllm.versionr%   VLLM_VERSIONr<   r   r&   rA   r'   rC   rH   rW   rY   rr   rZ   r@   r   listr   boolr   r   r   r   r   r   r   r  r   r/   enable_metricsr  r*   runr9   r9   r9   r:   <module>   s   

&N		
5
)


&
 1


