o
    پiG                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	 d dl
Z
d dlZd dlmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZmZ d d	lmZ G d
d deZ dd Z!G dd dZ"dS )    N)DictListOptionalUnion)global_config)BaseBackend)get_chat_templateget_chat_template_by_model_path)ChoicesDecisionChoicesSamplingMethod)StreamExecutor)
REGEX_BOOLREGEX_FLOAT	REGEX_INT	REGEX_STRSglSamplingParams)http_requestc                
       s*  e Zd Z			d3dedee dee dee f fddZdd	 Zd
d Zdd Zdd Z	defddZ
dd Zdd ZdefddZdefddZdefddZdedefdd Zdedefd!d"Zded#ee d$ed%ed&ef
d'd(Zd)ee d*efd+d,Zdefd-d.Zdefd/d0Zd1d2 Z  ZS )4RuntimeEndpointNbase_urlapi_keyverifychat_template_namec                    st   t    d| _|| _|| _|| _t| jd | j| jd}| | | | _	|r0t
|| _d S t| j	d | _d S )NTz/get_model_infor   r   
model_path)super__init__support_concate_and_appendr   r   r   r   _assert_successjson
model_infor   chat_templater	   )selfr   r   r   r   res	__class__ X/home/ubuntu/.local/lib/python3.10/site-packages/sglang/lang/backend/runtime_endpoint.pyr      s"   



zRuntimeEndpoint.__init__c                 C   s
   | j d S )Nr   )r   r!   r%   r%   r&   get_model_name8   s   
zRuntimeEndpoint.get_model_namec                 C   s(   t | jd | j| jdd}| | d S )Nz/flush_cachePOST)r   r   methodr   r   r   r   r   r!   r"   r%   r%   r&   flush_cache;   s   zRuntimeEndpoint.flush_cachec                 C   s*   t | jd | j| jd}| | | S )N/get_server_infor   )r   r   r   r   r   r   r,   r%   r%   r&   get_server_infoD   s   
zRuntimeEndpoint.get_server_infoc                 C   s   | j S N)r    r'   r%   r%   r&   r   M   s   z!RuntimeEndpoint.get_chat_template
prefix_strc                 C   s2   t | jd |ddid| j| jd}| | d S )N	/generatemax_new_tokensr   textsampling_paramsr   r   r   r+   )r!   r1   r"   r%   r%   r&   cache_prefixP   s   zRuntimeEndpoint.cache_prefixc                 C   &   t | jd | j| jd}| | d S )Nz/start_profiler   r+   r,   r%   r%   r&   start_profileY      zRuntimeEndpoint.start_profilec                 C   r9   )Nz/stop_profiler   r+   r,   r%   r%   r&   stop_profilea   r;   zRuntimeEndpoint.stop_profilesc                 C   D   |j ddid}| || t| jd || j| jd}| | d S Nr3   r   r4   r2   r7   text__add_imagesr   r   r   r   r   r!   r=   datar"   r%   r%   r&   commit_lazy_operationsi      z&RuntimeEndpoint.commit_lazy_operationsc                 C   r>   r?   r@   rC   r%   r%   r&   
fill_imaget   rF   zRuntimeEndpoint.fill_imager6   c                 C   s   |j d u rd S |jdkrg |_d }|j dtfv r#t}|jddg n.|j dtfv r5t}|jddg n|j dtfv r?t}n|j dt	fv rIt
}ntd|j  |d urg|jd urgtd	|j  d
|j  ||_d S )Nr%   int 
floatstrboolzInvalid dtype: z>Both dtype and regex are set. Only dtype will be used. dtype: z	, regex: )dtypestoprH   r   extendrK   r   rL   r   rM   r   RuntimeErrorregexwarningswarn)r!   r6   dtype_regexr%   r%   r&   _handle_dtype_to_regex   s*   


z&RuntimeEndpoint._handle_dtype_to_regexc           	      C   s   |  | |jtjtjd| d}dD ]}t||d }|d ur&|||< q| || t| j	d || j
| jd}| | | }|d }||d fS )Nskip_special_tokensspaces_between_special_tokensr4   return_logproblogprob_start_lentop_logprobs_numreturn_text_in_logprobsr2   r7   r5   	meta_info)rV   rA   r   skip_special_tokens_in_output$spaces_between_special_tokens_in_outto_srt_kwargsgetattrrB   r   r   r   r   r   r   )	r!   r=   r6   rD   itemvaluer"   objcompr%   r%   r&   generate   s0   
	
zRuntimeEndpoint.generatec                 c   s   |  | |jtjtjd| d}dD ]}t||d }|d ur'|||< qd|d< | || t| j	d |d| j
| jd}| | d}|jd	d
D ]8}|d}|r|dr|dkrb d S t|dd  d}|d |d  }	|d }
|t|	7 }|	|
fV  qMd S )NrW   r4   rZ   Tstreamr2   )r   ri   r   r   r   F)decode_unicodeutf-8data:zdata: [DONE]   rJ   r5   r_   )rV   rA   r   r`   ra   rb   rc   rB   r   r   r   r   r   
iter_linesdecode
startswithr   loadsstriplen)r!   r=   r6   rD   rd   re   r"   poschunk
chunk_textr_   r%   r%   r&   generate_stream   sJ   
	


zRuntimeEndpoint.generate_streamchoicestemperaturechoices_methodreturnc                    sp  |dksJ  j ddid}|  |}|d d }t|d d} fdd	|D ddd
dd|d}|  |}dd	 |D }	dd	 |D }
dd	 |D }tt|	D ]8}|	| d d } j |r|	| d d }|| t|	|  | t|	| d  ||< |	| dd  |	|< qT|jrdd	 |	D }|ddidd}|  |}dd	 |D }nd }||||	|
|dS )Ngh㈵>r3   r   r4   r_   prompt_tokens   c                    s   g | ]} j | qS r%   )rA   ).0cr=   r%   r&   
<listcomp>	  s    z*RuntimeEndpoint.select.<locals>.<listcomp>)r3   ry   T)r5   r6   r[   r^   r\   c                 S      g | ]}|d  d qS r_   input_token_logprobsr%   r~   rr%   r%   r&   r         c                 S   r   )r_   output_token_logprobsr%   r   r%   r%   r&   r     r   c                 S   s   g | ]
}t |d  d qS r   )"compute_normalized_prompt_logprobsr   r%   r%   r&   r     s       c                 S   s   g | ]	}d d |D qS )c                 S   s   g | ]}|d  qS )r   r%   )r~   elr%   r%   r&   r   (  s    z5RuntimeEndpoint.select.<locals>.<listcomp>.<listcomp>r%   )r~   sublr%   r%   r&   r   (  s    )	input_idsr6   r[   c                 S   r   r   r%   r   r%   r%   r&   r   /  s    )rx   normalized_prompt_logprobsr   r   unconditional_token_logprobs)rA   _generate_http_requestmaxrangers   endswithrequires_unconditional_logprobs)r!   r=   rx   ry   rz   rD   rf   
prompt_lenr\   r   r   r   ihealed_token_strhealed_token_logprobr   r   r%   r   r&   select   s`   
zRuntimeEndpoint.selectsrc_ridsdst_ridc                 C   s.   t | jd ||d| j| jd}| | d S )Nz/concate_and_append_request)r   r   r7   r+   )r!   r   r   r"   r%   r%   r&   concatenate_and_append=  s   z&RuntimeEndpoint.concatenate_and_appendc                 C   s8   |  || t| jd || j| jd}| | | S )Nr2   r7   )rB   r   r   r   r   r   r   rC   r%   r%   r&   r   F  s   
z&RuntimeEndpoint._generate_http_requestc                 C   s6   |j rt|j dksJ d|j d d |d< d S d S )Nr   zOnly support one image.r   
image_data)images_rs   )r!   r=   rD   r%   r%   r&   rB   Q  s   zRuntimeEndpoint._add_imagesc                 C   sB   |j dkrz	| }W t| tjy   |j}Y t|w d S )N   )status_coder   JSONDecodeErrorr5   rQ   )r!   r"   contentr%   r%   r&   r   V  s   

zRuntimeEndpoint._assert_success)NNN)__name__
__module____qualname__rL   r   r   r(   r-   r/   r   r8   r:   r<   r   rE   rG   r   rV   rh   rw   r   rK   r   r
   r   r   r   rB   r   __classcell__r%   r%   r#   r&   r      s`    			 
'
2
E	r   c                 C   s   dd | D }t |t| S )Nc                 S   s   g | ]
}|d  r|d  qS )r   r%   )r~   xr%   r%   r&   r   `  s    z6compute_normalized_prompt_logprobs.<locals>.<listcomp>)sumrs   )input_logprobsvaluesr%   r%   r&   r   _  s   r   c                   @   s(  e Zd ZdZ		d%dedefddZdd	 Zd
d Zdd Z	defddZ
dd Z	d&dedee fddZeZ					d'deeee f dee deeee ef  deeee ef  deeee ef  deeee   fddZdeeee ee eee  f fdd Zd!d" Zd#d$ ZdS )(Runtimea5  
    A wrapper for the HTTP server.
    This is used for launching the server in a python program without
    using the command line interface.

    It is mainly used for the frontend language.
    You should use the Engine class if you want to do normal offline processing without the frontend language.
    error     r@	log_levellaunch_timeoutc              	   O   s  ddl m} ddlm} ddlm} ||d|i|| _t| jjdD ]}||r+ nq#|| j_| j	 | _	| j	d | _
d| _td	}	|	j|| jfd
}
|
  |
j| _t| j t }t N}t | |k rz|| j	 d}|jdkr}W n-W n
 tjy   Y nw |
 s|   tdtd t | |k sl|   tdW d   n1 sw   Y  t| j	| _dS )aL  See the arguments in server_args.py::ServerArgs

        Args:
            log_level: Log level for the server.
            timeout: Timeout in seconds for waiting for the server to start.
            *args: Additional arguments passed to ServerArgs.
            **kwargs: Additional keyword arguments passed to ServerArgs.
        r   )launch_server)
ServerArgs)is_port_availabler   i@  r2   Nspawn)targetargsz/health_generater   z;Initialization failed. Please see the error messages above.r}   z1Server failed to start within the timeout period.)"sglang.srt.entrypoints.http_serverr   sglang.srt.server_argsr   sglang.srt.utilsr   server_argsr   porturlgenerate_urlpidmultiprocessingget_contextProcessstartatexitregistershutdowntimerequestsSessiongetr   RequestExceptionis_aliverQ   sleepTimeoutErrorr   endpoint)r!   r   r   r   kwargsr   r   r   r   ctxproc
start_timesessionresponser%   r%   r&   r   n  sV   



zRuntime.__init__c                 C   s.   ddl m} | jd ur|| j d | _d S d S )Nr   )kill_process_tree)r   r   r   )r!   r   r%   r%   r&   r     s
   


zRuntime.shutdownc                 C      | j   d S r0   )r   r:   r'   r%   r%   r&   r:        zRuntime.start_profilec                 C   r   r0   )r   r<   r'   r%   r%   r&   r<     r   zRuntime.stop_profileprefixc                 C   s   | j | d S r0   )r   r8   )r!   r   r%   r%   r&   r8     s   zRuntime.cache_prefixc                 C   s,   ddl m} || jj| jj| jj| jjdS )Nr   )get_tokenizer)tokenizer_modetrust_remote_coderevision)&sglang.srt.utils.hf_transformers_utilsr   r   tokenizer_pathr   r   r   )r!   r   r%   r%   r&   r     s   zRuntime.get_tokenizerNpromptr6   c              
   C  sP  | j jr||dd}n||dd}d}tjdd}tj|dd4 I d H v}|j| j|d4 I d H S}|j 2 zA3 d H W \}}	|	d	}|ry|
d
ry|dkrR n)t|dd  d}
d|
v rv|
d |d  }|ro|V  |t|7 }q8|
V  q86 W d   I d H  n1 I d H sw   Y  W d   I d H  d S 1 I d H sw   Y  d S )NT)r   r6   ri   )r5   r6   ri   r   i0*  )total)timeout	trust_envr   rk   rl   zdata: [DONE]

rm   rJ   r5   )r   skip_tokenizer_initaiohttpClientTimeoutClientSessionpostr   r   iter_chunksro   rp   r   rq   rr   rs   )r!   r   r6   	json_datart   r   r   r   ru   _rD   curr%   r%   r&   async_generate  s<   
*.zRuntime.async_generateFr[   r\   r]   	lora_pathc           	      C   sR   ||||||d}t |trt|t|ksJ tj| jd |d}t| S )N)r5   r6   r[   r\   r]   r   r2   r   )
isinstancelistrs   r   r   r   r   dumps)	r!   r   r6   r[   r\   r]   r   r   r   r%   r%   r&   rh     s   
zRuntime.generatec                 C   s*   d|i}t j| jd |d}t| S )Nr5   z/encoder   )r   r   r   r   r   )r!   r   r   r   r%   r%   r&   encode  s   zRuntime.encodec              
      s   t  4 I d H W}|| j d4 I d H 3}|jdkr8| I d H W  d   I d H  W  d   I d H  S | I d H }td|d d  1 I d H sPw   Y  W d   I d H  d S 1 I d H sfw   Y  d S )Nr.   r   zFailed to get server info. r   message)r   r   r   r   statusr   rQ   )r!   r   r   
error_datar%   r%   r&   r/     s   
.zRuntime.get_server_infoc                 C   s   |    d S r0   )r   r'   r%   r%   r&   __del__  s   zRuntime.__del__)r   r   r0   )NFNNN)r   r   r   __doc__rL   rK   r   r   r:   r<   r8   r   r   r   r   add_requestr   r   rM   rH   rh   r   r/   r   r%   r%   r%   r&   r   d  sX    
F
$

r   )#r   r   r   r   rS   typingr   r   r   r   r   r   sglang.global_configr    sglang.lang.backend.base_backendr   sglang.lang.chat_templater   r	   sglang.lang.choicesr
   r   sglang.lang.interpreterr   sglang.lang.irr   r   r   r   r   sglang.utilsr   r   r   r   r%   r%   r%   r&   <module>   s(      G