o
    i}                     @   s  U d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
mZmZ d dlmZmZ d dlmZ d dlZd dlZd dlmZ d d	lmZ d d
lmZmZmZmZ d dlmZ d dlmZ d dl m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z: d dl;m<Z<m=Z= d dl>m?Z?m@Z@ d dlAmBZB d dlCmDZDmEZEmFZFmGZG d dlHmIZI d dlJmKZK d dlLmMZM d dlNmOZO d dlPmQZQ d dlRmSZS d d lTmUZV eKeWZXG d!d" d"e5ZYG d#d$ d$e8ZZe(e?B eFB eDB eYB eZB Z[ee\d%< G d&d' d'e/Z]G d(d) d)e/Z^G d*d+ d+e/Z_d,eSfd-d.Z`d/d0 Zad1ZbG d2d3 d3Zcd4edd5edfd6d7Zed8edd9efe_ d5dfd:d;Zgd<edd=edd>ehd5dfd?d@Zid4edd9efe_ dAedd5dfdBdCZjdDedd5ekfdEdFZldGe]dHedd5e_fdIdJZmdGe]dHedd5e_fdKdLZndMedGe]dNecd5e_fdOdPZoeegef Zpee\dQ< 	dndGe]dNecdReedgehf dSeg edB f dTepdB d5ee_ dB fdUdVZqdWehd5epfdXdYZrdZe$d[ed\efe1 d]e&dB d^eseOd_f d5etedetedef f fd`daZudbdc ZvdZe$d[ed5dfdddeZwd[efdfdgZxeWdhkrHea ZyeXzdieV eXzdjey eyj{r:eXzdk eeyj|eyj}dl neXzdm e ~exey dS dS )o    N)	Namespace)	AwaitableCallable)
HTTPStatus)BytesIOStringIO)Any	TypeAlias)urlparse)
UploadFile)start_http_server)FieldTypeAdapterfield_validatormodel_validator)ValidationInfo)tqdm)AsyncEngineArgsoptional_type)EngineClient)RequestLogger)ChatCompletionRequestChatCompletionResponse)OpenAIServingChat)	ErrorInfoErrorResponseOpenAIBaseModel)BaseModelPath)OpenAIServingModels)TranscriptionRequestTranscriptionResponseTranscriptionResponseVerboseTranslationRequestTranslationResponseTranslationResponseVerbose)OpenAIServingTranscriptionOpenAIServingTranslation)EmbeddingRequestEmbeddingResponse)OpenAIServingEmbedding)RerankRequestRerankResponseScoreRequestScoreResponse)ServingScores)init_logger)ReasoningParserManager)SupportedTask)random_uuid)FlexibleArgumentParser)__version__c                   @   Z   e Zd ZU dZedddZeed< edddZe	dB ed	< e
d
dedefddZdS )BatchTranscriptionRequestz
    Batch transcription request that uses file_url instead of file.

    This class extends TranscriptionRequest but replaces the file field
    with file_url to support batch processing from audio files written in JSON format.
    .HEither a URL of the audio or a data URL with base64 encoded audio data. descriptionfile_urlNTdefaultexcludefilebeforemodedatac                 C      t |trd|v rtd|S z4Ensure file field is not provided in batch requests.r>   zLThe 'file' field is not supported in batch requests. Use 'file_url' instead.
isinstancedict
ValueErrorclsrB    rK   W/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/entrypoints/openai/run_batch.pyvalidate_no_fileY   
   z*BatchTranscriptionRequest.validate_no_file__name__
__module____qualname____doc__r   r:   str__annotations__r>   r   r   classmethodr   rM   rK   rK   rK   rL   r6   G      
 r6   c                   @   r5   )BatchTranslationRequestz
    Batch translation request that uses file_url instead of file.

    This class extends TranslationRequest but replaces the file field
    with file_url to support batch processing from audio files written in JSON format.
    .r7   r8   r:   NTr;   r>   r?   r@   rB   c                 C   rC   rD   rE   rI   rK   rK   rL   rM   w   rN   z(BatchTranslationRequest.validate_no_filerO   rK   rK   rK   rL   rX   e   rW   rX   BatchRequestInputBodyc                   @   sT   e Zd ZU dZeed< eed< eed< eed< edddede	d	e
fd
dZdS )BatchRequestInputz
    The per-line object of the batch input file.

    NOTE: Currently only the `/v1/chat/completions` endpoint is supported.
    	custom_idmethodurlbodyplainr@   valueinfoc                 C   s   |j d }|dkrt|S |dkrtt|S |dr%tt|S |dr/t|S |dkr8t	|S |dkrAt
|S tt|S )Nr]   /v1/chat/completions/v1/embeddings/score/rerank/v1/audio/transcriptions/v1/audio/translations)rB   r   model_validater   r'   validate_pythonendswithr,   r*   r6   rX   rY   )rJ   r`   ra   r]   rK   rK   rL   check_type_for_url   s   






z$BatchRequestInput.check_type_for_urlN)rP   rQ   rR   rS   rT   rU   rY   r   rV   r   r   rk   rK   rK   rK   rL   rZ      s   
 
rZ   c                   @   sN   e Zd ZU dZeed< eed< dZee	B e
B eB eB eB eB eB dB ed< dS )BatchResponseData   status_code
request_idNr^   )rP   rQ   rR   rn   intrU   rT   r^   r   r(   r-   r+   r    r!   r#   r$   rK   rK   rK   rL   rl      s.   
 
rl   c                   @   s:   e Zd ZU dZeed< eed< edB ed< edB ed< dS )BatchRequestOutputzA
    The per-line object of the batch output and error files
    idr[   Nresponseerror)rP   rQ   rR   rS   rT   rU   rl   r   rK   rK   rK   rL   rq      s   
 rq   parserc                 C   s   | j dddtdd | j dddtdd | j d	td d
d | j dttddd t| } | j dtd dd | j dddd | j dtddd | j dtddd | j ddddd | j dddd d | S )!Nz-iz--input-fileTzThe path or url to a single input file. Currently supports local file paths, or the http protocol (http or https). If a URL is specified, the file should be available via HTTP GET.)requiredtypehelpz-oz--output-filezThe path or url to a single output file. Currently supports local file paths, or web (http or https) urls. If a URL is specified, the file should be available via HTTP PUT.z--output-tmp-dirzMThe directory to store the output file before uploading it to the output URL.)rw   r<   rx   z--response-role	assistantz@The role name to return if `request.add_generation_prompt=True`.z--max-log-lenz^Max number of prompt characters or prompt ID numbers being printed in log.

Default: Unlimitedz--enable-metrics
store_truezEnable Prometheus metrics)actionrx   z--urlz0.0.0.0zLURL to the Prometheus metrics server (only needed if enable-metrics is set).z--porti@  zUPort number for the Prometheus metrics server (only needed if enable-metrics is set).z--enable-prompt-tokens-detailsFz6If set to True, enable prompt_tokens_details in usage.)r{   r<   rx   z--enable-force-include-usagezZIf set to True, include usage on every request (even when stream_options is not specified))add_argumentrT   r   r   add_cli_argsrp   ru   rK   rK   rL   make_arg_parser   sz   		
	r   c                  C   s   t dd} t|  S )Nz$vLLM OpenAI-Compatible batch runner.r8   )r3   r   
parse_argsr~   rK   rK   rL   r   ,  s   
r   z_{desc}: {percentage:3.0f}% Completed | {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]
c                   @   s2   e Zd Zdd Zdd Zdd Zdefdd	Zd
S )BatchProgressTrackerc                 C   s   d| _ d | _d S )Nr   )_total_pbarselfrK   rK   rL   __init__9  s   
zBatchProgressTracker.__init__c                 C   s   |  j d7  _ d S )N   )r   r   rK   rK   rL   	submitted=  s   zBatchProgressTracker.submittedc                 C   s   | j r
| j   d S d S N)r   updater   rK   rK   rL   	completed@  s   zBatchProgressTracker.completedreturnc                 C   s:   t j  pt j dk}t| jddd| td| _| jS )Nr   reqzRunning batch   )totalunitdescminintervaldisable
bar_format)torchdistributedis_initializedget_rankr   r   _BAR_FORMATr   )r   enable_tqdmrK   rK   rL   pbarD  s   zBatchProgressTracker.pbarN)rP   rQ   rR   r   r   r   r   r   rK   rK   rK   rL   r   8  s
    r   path_or_urlr   c              
      s   |  ds|  dr\t 4 I d H <}|| 4 I d H }| I d H W  d   I d H  W  d   I d H  S 1 I d H s?w   Y  W d   I d H  d S 1 I d H sUw   Y  d S t| dd}| W  d    S 1 sqw   Y  d S )Nhttp://https://utf-8encoding)
startswithaiohttpClientSessiongettextopenread)r   sessionrespfrK   rK   rL   	read_fileS  s   &p$r   output_pathbatch_outputsc                    sP   t | ddd}|D ]
}t| |d qW d   dS 1 s!w   Y  dS )z
    Write the responses to a local file.
    output_path: The path to write the responses to.
    batch_outputs: The list of batch outputs to write.
    wr   r   r>   N)r   printmodel_dump_json)r   r   r   orK   rK   rL   write_local_file\  s   
"r   
output_urldata_or_file	from_filec           
         s  d}d}t d|d D ]}ztjtjddd4 I dH }|rit|d;}|j| |d4 I dH }|jd	krDtd
|j d|  W d  I dH  n1 I dH sTw   Y  W d   n1 scw   Y  n3|j| |d4 I dH }|jd	krtd|j d|  W d  I dH  n1 I dH sw   Y  W d  I dH  n1 I dH sw   Y  W q ty }	 z)||k rt	
d||	| t|I dH  ntd| dt|	 d|	W Y d}	~	qd}	~	ww dS )z
    Upload a local file to a URL.
    output_url: The URL to upload the file to.
    data_or_file: Either the data to upload or the path to the file to upload.
    from_file: If True, data_or_file is the path to the file to upload.
    r   r   i  )r   )timeoutNrb)rB   rm   zFailed to upload file.
Status: z
Response: zFailed to upload data.
Status: zPFailed to upload data (attempt %d). Error message: %s.
Retrying in %d seconds...zFailed to upload data (attempt z). Error message: .)ranger   r   ClientTimeoutr   putstatus	Exceptionr   loggerrt   asynciosleeprT   )
r   r   r   max_retriesdelayattemptr   r>   rs   erK   rK   rL   upload_datak  sl   	

(	
*(r   output_tmp_dirc                    s   |  ds|  dr|du rBtd t }|D ]
}t| |d q|d td|  t| | 	 
dd	d
I dH  dS tjdd|ddd*}td|j t|j|I dH  td|  t| |jdd
I dH  W d   dS 1 syw   Y  dS td|  t| |I dH  dS )a  
    Write batch_outputs to a file or upload to a URL.
    path_or_url: The path or URL to write batch_outputs to.
    batch_outputs: The list of batch outputs to write.
    output_tmp_dir: The directory to store the output file before uploading it
    to the output URL.
    r   r   Nz Writing outputs to memory bufferr   r   zUploading outputs to %sr   F)r   r   tmp_batch_output_z.jsonl)rA   r   dirprefixsuffixz*Writing outputs to temporary local file %sTz Writing outputs to local file %s)r   r   ra   r   r   r   seekr   r   stripencodetempfileNamedTemporaryFilenamer   )r   r   r   output_bufferr   r   rK   rK   rL   
write_file  s:   


"r   r]   c              
      s8  t | }|jdkr-d| v r&| dd\}}d|v rt|S td| td|  |jdv rt 4 I dH L}|| 4 I dH ,}|j	d	krUt
d
|  d|j	 | I dH W  d  I dH  W  d  I dH  S 1 I dH svw   Y  W d  I dH  dS 1 I dH sw   Y  dS td|j d)z
    Download data from a URL or decode from a data URL.

    Args:
        url: Either an HTTP/HTTPS URL or a data URL (data:...;base64,...)

    Returns:
        Data as bytes
    rB   ,r   base64zUnsupported data URL encoding: zInvalid data URL format: )httphttpsNrm   z"Failed to download data from URL: z
. Status: zUnsupported URL scheme: z&. Supported schemes: http, https, data)r
   schemesplitr   	b64decoderH   r   r   r   r   r   r   )r]   parsedheaderrB   r   r   rK   rK   rL   download_bytes_from_url  s2   






pr   request	error_msgc                 C   s0   t dt  | jttjdt  d|d}|S )Nvllm-vllm-batch-rn   ro   rr   r[   rs   rt   )rq   r2   r[   rl   r   BAD_REQUEST)r   r   batch_outputrK   rK   rL   make_error_request_output  s   

	r   c                    s   t | |S r   )r   )r   r   rK   rK   rL   make_async_error_request_output  s   
r   serving_engine_functrackerc              
      s   | |j I d H }t|tttttttt	fr,t
dt  |jt|dt  dd d}n#t|trIt
dt  |jt|jjdt  d|d}nt|dd}|  |S )Nr   r   )r^   ro   r   r   z'Request must not be sent in stream moder   )r^   rF   r   r(   r-   r+   r    r!   r#   r$   rq   r2   r[   rl   r   rt   coder   r   )r   r   r   rs   r   rK   rK   rL   run_request	  sJ   




r   	WrapperFnurl_matcherhandler_getter
wrapper_fnc                 C   sX   || j sdS | }|du rd| j  }t| |dS |dur"||}|  t|| |S )a  
    Generic handler for endpoint requests.

    Args:
        request: The batch request input
        tracker: Progress tracker for the batch
        url_matcher: Function that takes a URL and returns True if it matches
        handler_getter: Function that returns the handler function or None
        wrapper_fn: Optional function to wrap the handler (e.g., for transcriptions)

    Returns:
        Awaitable[BatchRequestOutput] if the request was handled,
        None if URL didn't match
    Nz!Model does not support endpoint: r   )r]   r   r   r   )r   r   r   r   r   
handler_fnr   rK   rK   rL   handle_endpoint_request;  s   
r   is_translationc                    s   dt f fdd}|S )a  
    Factory function to create a wrapper for transcription/translation handlers.
    The wrapper converts BatchTranscriptionRequest or BatchTranslationRequest
    to TranscriptionRequest or TranslationRequest and calls the appropriate handler.

    Args:
        is_translation: If True, process as translation; otherwise process
            as transcription

    Returns:
        A function that takes a handler and returns a wrapped handler
    r   c                    s0   dt tB dttB tB tB tB f fdd}|S )Nbatch_request_bodyr   c              
      s   z9t | jI d H }tt|dd}| jdhd}||d< r-t|} ||I d H W S t|} ||I d H W S  tyf } z rFdnd}t	t
d| d	t| d
tjjddW  Y d }~S d }~ww )Nz	audio.bin)r>   filenamer:   )r=   r>   translationtranscriptionzFailed to process z: BadRequestError)messagerw   r   )rt   )r   r:   r   r   
model_dumpr"   rh   r   r   r   r   rT   r   r   r`   )r   
audio_data	mock_filerequest_dicttranslation_requesttranscription_requestr   	operation)r   r   rK   rL   transcription_wrappero  s:   	zJmake_transcription_wrapper.<locals>.wrapper.<locals>.transcription_wrapper)r6   rX   r    r!   r#   r$   r   )r   r  r   )r   rL   wrappern  s   .z+make_transcription_wrapper.<locals>.wrapper)r   )r   r  rK   r  rL   make_transcription_wrapper`  s   1r  engine_clientargsbase_model_pathsrequest_loggersupported_tasks.c           	         s|  | j }t| |dd}d|v r%t| ||j|dd|jj|j|jt|ddd
nd d|v r4t	| ||dddndd	|v oBt|j
d
ddk}d|v sI|rQt| ||ddndd|v r`t| |||jdndd|v rot| |||jdnddd  fdddddd fdddddd fdddddd fdddddd fddtddddd fddtd ddd!}|S )"a  
    Build the endpoint registry with all serving objects and handler configurations.

    Args:
        engine_client: The engine client
        args: Command line arguments
        base_model_paths: List of base model paths
        request_logger: Optional request logger
        supported_tasks: Tuple of supported tasks

    Returns:
        Dictionary mapping endpoint keys to their configurations
    N)r  r  lora_modulesgenerateautodefault_chat_template_kwargs)r	  chat_templatechat_template_content_formatreasoning_parserenable_prompt_tokens_detailsenable_force_include_usager  embed)r	  r  r  classify
num_labelsr   r   )r	  score_templater   )r	  r  c                 S      | dkS )Nrb   rK   r]   rK   rK   rL   <lambda>      z)build_endpoint_registry.<locals>.<lambda>c                          d ur j S d S r   )create_chat_completionrK   )openai_serving_chatrK   rL   r       r   r   r   c                 S   r  )Nrc   rK   r  rK   rK   rL   r    r  c                      r  r   )create_embeddingrK   )openai_serving_embeddingrK   rL   r    r  c                 S   
   |  dS )Nrd   rj   r  rK   rK   rL   r       
 c                      r  r   )create_scorerK   openai_serving_scoresrK   rL   r    r  c                 S   r#  )Nre   r$  r  rK   rK   rL   r  "  r%  c                      r  r   )	do_rerankrK   r'  rK   rL   r  #  r  c                 S   r  )Nrf   rK   r  rK   rK   rL   r  +  r  c                      r  r   )create_transcriptionrK   )openai_serving_transcriptionrK   rL   r  ,  r  Fr  c                 S   r  )Nrg   rK   r  rK   rK   rL   r  4  r  c                      r  r   )create_translationrK   )openai_serving_translationrK   rL   r  5  r  T)completions
embeddingsscorereranktranscriptionstranslations)model_configr   r   response_rolestructured_outputs_configr  r  r  getattrr)   	hf_configr.   r%   r&   r  )	r  r  r  r	  r
  r4  openai_serving_modelsenable_serving_rerankingendpoint_registryrK   )r  r"  r(  r+  r-  rL   build_endpoint_registry  s   










9r<  c                 C   s@   t  }| jj }r||vrtd| dd| dd S d S )Nzinvalid reasoning parser: z (chose from { r   z }))r0   list_registeredr6  r  KeyErrorjoin)r  valid_reasoning_parsersr  rK   rK   rL   validate_run_batch_argsA  s   
rA  c                    s   j d ur
 j }n jg} jrt jd}nd } fdd|D }|  I d H }td| t|  |||d}t	 }td j
 g }t j
I d H  dD ]G}	|	 }	|	s[qRt|	}
|
jdd	 }d }||v r|| }t|
||d
 |d |d d}|d ur|| qR|t|
d|
j dd qR|  tj| I d H }W d    n1 sw   Y  t j| jI d H  d S )N)max_log_lenc                    s   g | ]	}t | jd qS ))r   
model_path)r   model).0r   r  rK   rL   
<listcomp>Z  s    zrun_batch.<locals>.<listcomp>zSupported tasks: %s)r  r  r  r	  r
  zReading batch from %s...
/r   r   r   r   zURL z was used. Supported endpoints: /v1/chat/completions, /v1/embeddings, /v1/audio/transcriptions, /v1/audio/translations, /score,  /rerank. See vllm/entrypoints/openai/api_server.py for supported score/rerank versions.r   )served_model_namerD  enable_log_requestsr   rB  get_supported_tasksr   ra   r<  r   
input_filer   r   r   rZ   model_validate_jsonr]   r   appendr   r   r   gatherr   output_filer   )r  r  served_model_namesr	  r  r
  r;  r   response_futuresrequest_jsonr   endpoint_keyresultendpoint_config	responsesrK   rF  rL   	run_batchL  sd   



rZ  c              	      s|   ddl m} ddlm} t|  || |jdd4 I d H }t|| I d H  W d   I d H  d S 1 I d H s7w   Y  d S )Nr   )build_async_engine_client)UsageContextF)usage_context disable_frontend_multiprocessing)"vllm.entrypoints.openai.api_serverr[  vllm.usage.usage_libr\  rA  OPENAI_BATCH_RUNNERrZ  )r  r[  r\  r  rK   rK   rL   main  s   .rb  __main__z$vLLM batch processing API version %szargs: %szPrometheus metrics enabled)portaddrzPrometheus metrics disabledr   )r   r   r   argparser   collections.abcr   r   r   r   ior   r   typingr   r	   urllib.parser
   r   r   fastapir   prometheus_clientr   pydanticr   r   r   r   pydantic_core.core_schemar   r   vllm.engine.arg_utilsr   r   vllm.engine.protocolr   vllm.entrypoints.loggerr   0vllm.entrypoints.openai.chat_completion.protocolr   r   /vllm.entrypoints.openai.chat_completion.servingr   'vllm.entrypoints.openai.engine.protocolr   r   r   'vllm.entrypoints.openai.models.protocolr   &vllm.entrypoints.openai.models.servingr   /vllm.entrypoints.openai.speech_to_text.protocolr   r    r!   r"   r#   r$   .vllm.entrypoints.openai.speech_to_text.servingr%   r&   'vllm.entrypoints.pooling.embed.protocolr'   r(   &vllm.entrypoints.pooling.embed.servingr)   'vllm.entrypoints.pooling.score.protocolr*   r+   r,   r-   &vllm.entrypoints.pooling.score.servingr.   vllm.loggerr/   vllm.reasoningr0   
vllm.tasksr1   
vllm.utilsr2   vllm.utils.argparse_utilsr3   vllm.versionr4   VLLM_VERSIONrP   r   r6   rX   rY   rU   rZ   rl   rq   r   r   r   r   rT   r   listr   boolr   r   bytesr   r   r   r   r   r   r  tuplerG   r<  rA  rZ  rb  r  ra   enable_metricsrd  r]   runrK   rK   rK   rL   <module>   s*  
 

*N		
5
)+


/

%B

  
M


