o
    پiL                  	   @   s  U d dl Z d dlZd dlmZ d dlZd dlmZmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ dZdZe ZdZdZedde
fddZe ddede
fddZ!e"dkre # Z$e$j%de&dd e$j%de'dd e$j%de&dd e$j%de&dd e(e$Z$e$) Z*e+e*Z,ee-d< e.e,ZeejjjdZej/ee*j0e*j1dee*j2e*j3d dS dS )     N)AsyncGenerator)FastAPIRequest)JSONResponseResponseStreamingResponse)AsyncEngineArgs)AsyncLLMEngine)SamplingParams)random_uuid)adapt_tokenizer)JSONLogitsProcessorRegexLogitsProcessor      z/healthreturnc                      s   t ddS )zHealth check.   status_code)r    r   r   H/home/ubuntu/.local/lib/python3.10/site-packages/outlines/serve/serve.pyhealth0   s   
r   z	/generaterequestc                    sJ  t dusJ |  I dH }|d |dd}|dd}|dd}|dur0t|tg}n|dur;t|tg}ng }tdi |d|i}t }t  ||dt	t
df ffd	d
}|rft| S d}	2 z3 dH W }
|  I dH rt |I dH  tdd  S |
}	qj6 |	dusJ |	j  fdd|	jD }d|i}t|S )a  Generate completion for the request.

    The request should be a JSON object with the following fields:
    - prompt: the prompt to use for the generation.
    - schema: the JSON schema to use for the generation (if regex is not provided).
    - regex: the regex to use for the generation (if schema is not provided).
    - stream: whether to stream the results or not.
    - other fields: the sampling parameters (See `SamplingParams` for details).
    NpromptstreamFschemaregexlogits_processorsr   c                    sR   2 z"3 d H W } | j   fdd| jD }d|i}t|d dV  q6 d S )Nc                       g | ]} |j  qS r   text.0outputr   r   r   
<listcomp>[       z4generate.<locals>.stream_results.<locals>.<listcomp>r     zutf-8)r   outputsjsondumpsencode)request_outputtext_outputsret)results_generatorr$   r   stream_resultsX   s   z generate.<locals>.stream_resultsi  r   c                    r   r   r   r!   r$   r   r   r%   m   r&   zgenerate.<locals>.<listcomp>r    r   )enginer)   popr   	tokenizerr   r
   r   generater   bytesr   is_disconnectedabortr   r   r(   r   )r   request_dictr   json_schemaregex_stringr   sampling_params
request_idr0   final_outputr,   r-   r.   r   )r   r/   r   r4   6   sD   

r4   __main__z--host)typedefaultz--porti@  z--ssl-keyfilez--ssl-certfileengine_args)r3   debug)hostport	log_leveltimeout_keep_alivessl_keyfilessl_certfile)4argparser)   typingr   uvicornfastapir   r   fastapi.responsesr   r   r   vllm.engine.arg_utilsr   vllm.engine.async_llm_enginer	   vllm.sampling_paramsr
   
vllm.utilsr   outlines.models.vllmr   outlines.processorsr   r   TIMEOUT_KEEP_ALIVETIMEOUT_TO_PREVENT_DEADLOCKappr1   r3   getr   postr4   __name__ArgumentParserparseradd_argumentstrintadd_cli_args
parse_argsargsfrom_cli_argsrA   __annotations__from_engine_argsrunrC   rD   rG   rH   r   r   r   r   <module>   sT   
;


