o
    پi                     @   sD  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZmZmZmZmZ ddlZddlmZ ddlmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z'm(Z(m)Z) d	d
 Z*		dddZ+	dddZ,dd Z-dd Z.G dd dZ/G dd dZ0G dd dZ1dS )z*The interpreter that executes SGL programs    N)ThreadPoolExecutor)contextmanager)AnyCallableDictListOptional)global_config)SglCommitLazySglConcateAndAppendSglConstantTextSglExprSglExprListSglGenSglImageSglRoleBegin
SglRoleEnd	SglSelectSglSeparateReasoningSglVariableSglVarScopeBeginSglVarScopeEndSglVideo)encode_image_base64encode_video_base64get_exception_tracebackc              
   C   s   z%z|j | g|R i || _W n ty } z|d }~ww W | j  n| j  w |r3| j  tjdkr@t| 	  d S d S )N   )
func	ret_value	Exceptionstream_executorendsyncr	   	verbosityprinttext)stateprogram	func_argsfunc_kwargsr"   e r+   K/home/ubuntu/.local/lib/python3.10/site-packages/sglang/lang/interpreter.pyrun_internal*   s   

r-   FTc              	   C   s   t |dr|j}|d usJ d|| j t|||d || j|d}t|}	|r:tjt	|	| |||fd}
|

  |	S t	|	| ||| |	S )NendpointzPlease specify a backend)chat_templatestreamnum_api_spec_tokens
use_threadtargetargs)hasattrr.   updatebind_argumentsStreamExecutorr1   ProgramState	threadingThreadr-   start)r'   backendr(   r)   default_sampling_parar0   r"   r2   r    r&   tr+   r+   r,   run_program9   s,   

	rA   c                    s|  t |dr|j}tjrt|dkrt| | |dkr#tdt d }t	|t|}|r5t
| |||||S |dkrhg }|rTt|D ]}|t| |d||dd qB|S |D ]}|t| |d||dd qV|S |rrtjt|d	 t|2}	g }
|D ]}|
|	t| |d||dd |r|
d
  fdd q{dd |
D }W d    n1 sw   Y  |d
   |r   |S )Nr.      auto`      r+   FTtotalc                          S Nr7   _pbarr+   r,   <lambda>       z#run_program_batch.<locals>.<lambda>c                 S   s   g | ]}|  qS r+   )result).0fr+   r+   r,   
<listcomp>       z%run_program_batch.<locals>.<listcomp>)r6   r.   r	   enable_precache_with_tracinglencache_programmaxmultiprocessing	cpu_countmin_run_program_batch_generatortqdmappendrA   r   submitadd_done_callbackr"   close)r'   r>   batch_argumentsr?   num_threadsprogress_bargenerator_stylerets	argumentsexecutorfuturesr+   rN   r,   run_program_batch]   s   
	

4'
rl   c                 #   s$   |dkr |rt  |n|}|D ]}t| |d||ddV  qdS |r*t j t|dnd d}t|N}	tdt||D ]=}
t|
| t|}g }t|
|D ] }|	t| |d|| |dd} rh| fd	d
 || qM|D ]}|	 V  qpq;W d   n1 sw   Y   r 
  dS dS )zgHelper function that yields results one by one using chunking to avoid overwhelming ThreadPoolExecutor.rB   r+   FTrF   N   r   c                    rI   rJ   rK   rL   rN   r+   r,   rP      rQ   z._run_program_batch_generator.<locals>.<lambda>)r_   rA   rX   r   ranger]   ra   rb   r`   rR   rc   )r'   r>   rd   r?   re   rf   iteratorri   
chunk_sizerj   chunk_start	chunk_endchunk_futuresifuturer+   rN   r,   r^      sT   	

r^   c                 C   s<   ddl m} || |}|rt|dkr|| d S d S d S )Nr   )extract_prefix_by_tracing@   )sglang.lang.tracerrv   rX   cache_prefix)r'   r>   rv   prefixr+   r+   r,   rY      s
   
rY   c                   @   s  e Zd ZdZ		dHddZdefddZd	d
 Zdd Zdd Z	dIddZ
		dJdedeee  fddZdd Zdd Zdd Zdd Zdd Zd d! ZdKd#efd$d%Zdefd&d'Zdefd(d)Zd*d+ Zdefd,d-Zdefd.d/Zde fd0d1Z!de"fd2d3Z#de$fd4d5Z%de&fd6d7Z'de(fd8d9Z)de*fd:d;Z+de,fd<d=Z-de,fd>d?Z.de/fd@dAZ0dBdC Z1dDdE Z2dFdG Z3dS )Lr9   zGA stream executor that executes SGL expressions in a background thread.NTc           
         s   ddl m} t j _| _| _| _| _	i  _
i  _i  _d _d  _d _g  _|p3 j  _d  _d  _g  _g  _d  _| _d _| _ jrlt  _ fdd}	tjt  j!|	fd _" j"#  |rxt$  _%i  _&d S d  _%d  _&d S )Nr   )BaseBackendF c                      s       d S rJ   )_thread_worker_funcr+   selfr+   r,   _run_worker_in_context.     z7StreamExecutor.__init__.<locals>._run_worker_in_contextr3   )' sglang.lang.backend.base_backendr{   uuiduuid4hexsidr>   ri   r?   r0   	variablesvariable_event	meta_infois_finishederror_text_	messages_get_chat_templater/   cur_rolecur_role_begin_posimages_
cur_imagesfork_start_text_posr1   speculated_textr2   queueQueuer;   r<   contextvarscopy_contextrunworkerr=   Eventstream_text_eventstream_var_event)
r   r>   ri   r?   r/   r0   r1   r2   r{   r   r+   r~   r,   __init__   sD   





zStreamExecutor.__init__exprc                 C   s.   |  | | jr| j| d S | | d S rJ   )_init_var_eventr2   r   put_executer   r   r+   r+   r,   ra   >  s   
zStreamExecutor.submitc                 C   s   | j r
| j  d S d S rJ   )r2   r   joinr~   r+   r+   r,   r"   F  s   zStreamExecutor.syncc                 C   s"   || j v r| j |   | j| S rJ   )r   waitr   r   namer+   r+   r,   get_varJ  s   

zStreamExecutor.get_varc                 C   s   || j |< d S rJ   )r   r   r   valuer+   r+   r,   set_varO     zStreamExecutor.set_varc                 C   s@   || j v r| j | |}|std| d| j|d }|S )Nz!Timeout while waiting for event '')r   r   TimeoutErrorr   get)r   r   timeoutgotretr+   r+   r,   get_meta_infoR  s   
zStreamExecutor.get_meta_inforB   sizeposition_ids_offsetc                    s   |dkrt  jr t     t|} fddt|D }t|D ]6}t j|| _t  j|| _t	 j
|| _
 j|| _ j|| _t j|| _t	 j|| _q&|S )NrB   c                    s&   g | ]}t  j j j j jqS r+   )r9   r>   ri   r?   r/   r0   )rS   rM   r~   r+   r,   rU   e  s    z'StreamExecutor.fork.<locals>.<listcomp>)strr   ra   r
   r"   intrn   dictr   listr   r   r   rX   r   r   )r   r   r   exesrt   r+   r~   r,   forkZ  s    

zStreamExecutor.forkc                 C      |    | jS rJ   )r"   r   r~   r+   r+   r,   r%   |     zStreamExecutor.textc                 C   r   rJ   )r"   r   r~   r+   r+   r,   messages  r   zStreamExecutor.messagesc                 C   r   rJ   )r"   r   r~   r+   r+   r,   error  r   zStreamExecutor.errorc                 C   s,   | j r| j r| jd  | j|  d S rJ   )r2   r   is_aliver   r   r>   end_programr~   r+   r+   r,   r!     s   
zStreamExecutor.endc              
   C   s  d }	 | j  }|d u r| j   n3z| | W n ty6 } ztdt   |}W Y d }~nd }~ww | j   | jrD| j	  q|d urz	 | j   | j 
  qK t jy_   Y nw | jD ]	}| j| 	  qc| jr}| jD ]	}| j| 	  qs|| _| jr| j	  d| _d S )NTzError in stream_executor: )r   r   	task_doner   r   warningswarnr   r   set
get_nowaitEmptyr   r   r   r   )r   r   r   r*   r   r+   r+   r,   r}     sH   









z"StreamExecutor._thread_worker_funcc                 C   s  t |tr	t|}t |tsJ | t |tr | |j d S t |tr,| | d S t |tr8| 	| d S t |t
rJ|jD ]}| | q@d S t |trV| | d S t |trb| | d S t |trn| | d S t |trz| | d S t |tr| | d S t |tr| | d S t |tr| | d S t |tr| | d S t |trtjr| j j!r| "| d S | #| d S t |t$r| %| d S t&dt'| )NzUnknown type: )(
isinstancer   r   r   _execute_fillr   r   _execute_genr   _execute_selectr   	expr_listr   r   _execute_role_beginr   _execute_role_endr   _execute_imager   _execute_videor   _execute_variabler   _execute_var_scope_beginr   _execute_var_scope_endr
   _execute_commit_lazy_operationsr   r	   enable_parallel_encodingr>   support_concate_and_append(_execute_concatenate_and_append_kv_cache$_execute_concatenate_and_append_textr   _execute_separate_reasoning
ValueErrortype)r   otherxr+   r+   r,   r     sN   















zStreamExecutor._executeFr   c                 C   sr   t |}| jdkr| jd ur| jjr|s| j| d S | j|r-| jt|d  | _nd| _|  j	|7  _	d S )N	assistantr|   )
r   r   r1   r>   is_chat_model	spec_fillr   
startswithrX   r   )r   r   rz   r+   r+   r,   r     s   

zStreamExecutor._execute_fillc                 C   sD   |j }t|}| j||f | j||f |  j| jj7  _d S rJ   )pathr   r   r`   r   r   r/   image_token)r   r   r   base64_datar+   r+   r,   r     s
   zStreamExecutor._execute_imagec                 C   sL   |j }|j}t||}| j||f | j||f |  j| jj7  _d S rJ   )	r   
num_framesr   r   r`   r   r   r/   r   )r   r   r   r   r   r+   r+   r,   r     s   
zStreamExecutor._execute_videoc                    s   j j}i   fdd}fdd}d u r8tj|k r%|  jd | }j|d  _| fS ttttfrkjdkrH|  | }|dkrXtjtj}jd | }j|d  _| fS t	d)Nc                      s0   t jj_d _jjd\_ d S )Nsampling_params)rZ   max_new_tokensr1   stopr>   generater   r+   )r   r   r   r+   r,   regen  s   z'StreamExecutor._spec_gen.<locals>.regenc                     sf   t tr jS t ttfr/d} D ]} j|}|dkr,| dks*|| k r,|} q| S td)NrH   *Wrong type of stop in sampling parameters.)r   r   r   findtupler   r   )posstop_strstop_pos)r   r   r+   r,   	find_stop  s   
z+StreamExecutor._spec_gen.<locals>.find_stopr|   rH   r   )
r   r   rX   r   r   r   r   r   r]   r   )r   r   r   r   r   compr   r+   )r   r   r   r   r,   	_spec_gen  s0   
zStreamExecutor._spec_genc                 C   sr  |  |j}|j}| jsd| jd u r| jj| |d\}}n| jjr-| jj| ||d\}}d S | |\}}t	|t
rC|  j|d 7  _nt	|tsJJ |  j|7  _|| j|< || j|< | j|   d S | jd u smJ d| jj| |d}d| j|< | j|   |D ]%\}}|  j|7  _| j|  |7  < || j|< | j|   | j  q| j|   | j|   d S )Nr   )r   spec_var_namer   z6stream is not supported with api speculative executionr|   )_resolve_sampling_paramsr   r   r0   r1   r>   r   r   r   r   r   r   r   r   r   r   r   generate_streamr   r   )r   r   r   r   r   r   	generatorr+   r+   r,   r   9  sN   






zStreamExecutor._execute_genc                 C   sv   | j | |j|j|j}|jd ur1|j}|j| j|< |j| j|< | j	| 
  | jr1| j| 
  |  j|j7  _d S rJ   )r>   selectchoicestemperaturechoices_methodr   decisionr   r   r   r   r   r   )r   r   choices_decisionr   r+   r+   r,   r   o  s   
zStreamExecutor._execute_selectc                 C   s    |j }||j}| | d S rJ   )source_stream_executorr   r   r   )r   r   src_executorr   r+   r+   r,   r   |  s   z StreamExecutor._execute_variablec                 C   s   | j d u s	J dt| jdkr.|jdkr.| jj}|r.| td | | | 	t
d |j| _ | j|j| j\}}| j|dd t| j| _d S )NzNested roles are not allowed.r   systemT)rz   )r   rX   r   roler/   default_system_promptr   r   r   r   r   get_prefix_and_suffixr   r   )r   r   default_systemrz   rM   r+   r+   r,   r     s   
z"StreamExecutor._execute_role_beginc                 C   s   | j dkr| jd ur| jjr| j|  d | _ | j| jd   }| j	|j
| j\}}| | | jr`|j
d|dgd}| jD ]\}}|d ddd| id	 qA| j| g | _d S | j|j
|d d S )
Nr   r%   )r   r%   )r  contentr  	image_urlurlzdata:image/jpeg;base64,)r   r  )r   r1   r>   r   role_end_generater   r   lstripr/   r  r  r   r   r   r`   )r   r   new_textrM   suffixlast_msg
image_pathimage_base64_datar+   r+   r,   r     s.   





z StreamExecutor._execute_role_endc                 C   s   t t| j| j|j< d S rJ   )r   rX   r   r   r   r   r+   r+   r,   r     s   z'StreamExecutor._execute_var_scope_beginc                 C   s2   | j | j|j d  | j|j< | j|j   d S rJ   )r   r   r   r   r   r   r+   r+   r,   r     s   z%StreamExecutor._execute_var_scope_endc                 C   s   | j |  d S rJ   )r>   commit_lazy_operationsr   r+   r+   r,   r        z.StreamExecutor._execute_commit_lazy_operationsc                 C   s@   d}|j D ]}|j}|  ||j|jd  7 }q| | d S )Nr|   )statesr    r"   r   r   r   )r   r   r  sexer+   r+   r,   r     s   
z3StreamExecutor._execute_concatenate_and_append_textc                 C   s   t | j}t|jD ]\}}|j}|t  q
t|jD ]\}}|j}|  |j|ks/J |  j|j|jd  7  _qdd |jD }| j	
|| j d S )Nc                 S   s   g | ]}|j jqS r+   )r    r   )rS   r&   r+   r+   r,   rU     rV   zKStreamExecutor._execute_concatenate_and_append_kv_cache.<locals>.<listcomp>)rX   r   	enumerater  r    ra   r
   r"   r   r>   concatenate_and_appendr   )r   r   self_lenrt   r  r  src_ridsr+   r+   r,   r     s   
z7StreamExecutor._execute_concatenate_and_append_kv_cachec           
      C   s  | j rd S | jdkr| jd ur| jjr| j|  ddlm} ||j}|j	}|s+d S t
|ts5t
|tri| |j}||\}}||j}| |j| | || | j|   | jd | j | | _d S t
|tr}|jD ]}	| t|j|	 qqd S d S )Nr   r   )ReasoningParser)r0   r   r1   r>   r   r	  "sglang.srt.parser.reasoning_parserr  
model_typer   r   r   r   r   r   parse_non_streamprocess_name_for_reasoningr   r   r   r   r   r   r   r   r   )
r   r   r  reasoning_parserr   cur_text	reasoningnormal_textreasoning_namer   r+   r+   r,   r     s6   





z*StreamExecutor._execute_separate_reasoningc                 C   sh   t |ttttfr t | j|j< | j	rt | j
|j< d S d S t |tr0|jD ]	}| | q(d S d S rJ   )r   r   r   r   r   r;   r   r   r   r0   r   r   r   r   )r   r   r*   r+   r+   r,   r     s   

zStreamExecutor._init_var_eventc                 C   s|   t | j}dD ]}t||d}|durt||| q| jjr<|jdkr(g |_nt|jt	r3|jg|_| j| jj7  _|S )a  
        Construct sampling param based on default + override values

        The default values of sampling are populated in `default_sampling_para` via sgl.function.run(...sampling_args)
        , and `sampling_params` contains the override values from sgl.gen().

        Here we use default_sampling_para as the base and override the values if they exist in `sampling_params`.
        It also extends the stop tokens based on the chat template.
        )r   min_new_tokensnr   stop_token_ids
stop_regexr   top_ptop_kmin_pfrequency_penaltypresence_penalty
ignore_eosreturn_logproblogprob_start_lentop_logprobs_numreturn_text_in_logprobsdtyperegexjson_schemaNr+   )
copydeepcopyr?   getattrsetattrr/   r   r   r   r   )r   r   cloneitemr   r+   r+   r,   r     s   

z'StreamExecutor._resolve_sampling_paramsc                 C   s   |    d S rJ   )r!   r~   r+   r+   r,   __del__8  r   zStreamExecutor.__del__)NTrJ   rB   NF)4__name__
__module____qualname____doc__r   r   ra   r"   r   r   r   r   r   r   r   r%   r   r   r!   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   r   r   r   r   r   r   r   r   r:  r+   r+   r+   r,   r9      sR    	
A



"',	
26$	"1r9   c                   @   s\  e Zd ZdZdefddZd;dedee fdd	Z	d;dee fd
dZ
d;dee fddZd;dee fddZedefddZ		d<dedeee  fddZed;deee  fddZdd Zdd Zdd Zdd  Zd;d!ee fd"d#Z	$d=d!ee d%efd&d'Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Z d6d7 Z!d8efd9d:Z"dS )>r:   zThe state of an SGL program.r    c                 C   s
   || _ d S rJ   )r    )r   r    r+   r+   r,   r   ?     
zProgramState.__init__Nr   r   c                    sF   |d urt t |t g}j| |S t fdd}| S )Nc                   3   s,    j t  d V  j t  d S rJ   )r    ra   r   r   r+   r   r   r+   r,   
role_scopeI     z-ProgramState._role_common.<locals>.role_scope)r   r   r   r    ra   r   )r   r   r   	role_exprrC  r+   rB  r,   _role_commonB  s   zProgramState._role_commonc                 C      |  d|S )Nr  rF  r   r+   r+   r,   r  Q  r   zProgramState.systemc                 C   rG  )NuserrH  r   r+   r+   r,   rI  T  r   zProgramState.userc                 C   rG  )Nr   rH  r   r+   r+   r,   r   W  r   zProgramState.assistantc                 c   s,    | j t| d V  | j t| d S rJ   )r    ra   r   r   r   r+   r+   r,   	var_scopeZ  rD  zProgramState.var_scoperB   r   r   c                 C   s*   | j ||}dd |D }t|| }|S )Nc                 S   s   g | ]}t |qS r+   )r:   )rS   r   r+   r+   r,   rU   f  rV   z%ProgramState.fork.<locals>.<listcomp>)r    r   ProgramStateGroup)r   r   r   stream_executorsr  state_groupr+   r+   r,   r   `  s   
zProgramState.forkc                 c   s2    |  d|}z|d V  W |  d S |  w )NrB   r   )r   r   )r   r   rM  r+   r+   r,   r4  j  s
   zProgramState.copyc                 C   
   | j  S rJ   )r    r%   r~   r+   r+   r,   r%   r  rA  zProgramState.textc                 C   rN  rJ   )r    r   r~   r+   r+   r,   r   u  rA  zProgramState.messagesc                 C   rN  rJ   )r    r"   r~   r+   r+   r,   r"   x  rA  zProgramState.syncc                 C   rN  rJ   )r    r   r~   r+   r+   r,   r   {  rA  zProgramState.errorvar_namec                 c   s   | j jr{d}|d u r5| j j}	 |  |  t| j j|d  }|t|7 }|r-|V  | j jr2nqd S d }|sP|| j j	v rE| j j	| }| j jrNdV  d S |r9	 |  |  t| j j
| |d  }|t|7 }|rp|V  | j j|  rzd S qQ|d u r|  V  d S | |V  d S Nr   Tr|   )r    r0   r   r   clearr   r   rX   r   r   r   r   is_setr%   r   )r   rO  preveventoutr+   r+   r,   	text_iter~  sJ   
zProgramState.text_iterFreturn_meta_datac                 C  sT  t  }| jjrd}|d u r?| jj}	 |d |jI d H  |  t| jj	|d  }|t
|7 }|r7|V  | jjr<nqd S d }|sZ|| jjv rO| jj| }| jjrXdV  d S |rC	 |d |jI d H  |  t| jj| |d  }|t
|7 }|r|r|| jj| fV  n|V  | jj|  rd S q[|d u r|  V  d S | |V  d S rP  )asyncioget_running_loopr    r0   r   run_in_executorr   rQ  r   r   rX   r   r   r   r   r   rR  r%   r   )r   rO  rW  looprS  rT  rU  r+   r+   r,   text_async_iter  sP   zProgramState.text_async_iterc                 C      | j |S rJ   )r    r   r   r+   r+   r,   r     r   zProgramState.get_varc                 C   s   | j ||S rJ   )r    r   r   r+   r+   r,   r     r   zProgramState.set_varc                 C   r]  rJ   )r    r   r   r+   r+   r,   r     r   zProgramState.get_meta_infoc                 C   s    |d u rt d| j| | S )NzTried to append None to state.)r   r    ra   )r   r   r+   r+   r,   __iadd__  s   zProgramState.__iadd__c                 C   s
   |  |S rJ   )r   r   r+   r+   r,   __getitem__  rA  zProgramState.__getitem__c                 C   s   |  || d S rJ   )r   r   r+   r+   r,   __setitem__  r  zProgramState.__setitem__c                 C   s   || j jv S rJ   )r    r   r   r+   r+   r,   __contains__  r   zProgramState.__contains__c                 C   s   | j   d S rJ   )r    r!   r~   r+   r+   r,   r:    r   zProgramState.__del__returnc                 C   s   d|    dS )NzProgramState())r%   r~   r+   r+   r,   __repr__  r  zProgramState.__repr__rJ   r;  )NF)#r=  r>  r?  r@  r9   r   r   r   r   rF  r  rI  r   r   rJ  r   r   r   r4  r%   r   r"   r   rV  boolr\  r   r   r   r^  r_  r`  ra  r:  rd  r+   r+   r+   r,   r:   <  sL    


'
-r:   c                   @   s^   e Zd Z	ddee dee fddZddefdd	Zd
e	fddZ
d
e	fddZdd ZdS )rK  Nr  	src_statec                 C   s   || _ || _d S rJ   )r  rf  )r   r  rf  r+   r+   r,   r     s   
zProgramStateGroup.__init__gather_variablemodec           	      C   s   |dkrA| j jj}t| }| jD ]-}|j  |jj}t| | }|D ]}||v r7|| ||  q'|| g||< q'qn|dkrV|  j t| j7  _ | j j  nt	d| | jD ]}|j
  q`d S )Nrg  concate_and_appendzInvalid join mode: )rf  r    r   r   keysr  r"   r`   r   r   r!   )	r   rh  src_varssrc_var_setchild_state
child_varsnew_varskr  r+   r+   r,   r     s(   




zProgramStateGroup.joinrt   c                 C   s
   | j | S rJ   r  )r   rt   r+   r+   r,   r_    rA  zProgramStateGroup.__getitem__c                 C   s   | j | |ks	J d S rJ   rq  )r   rt   r   r+   r+   r,   r`    s   zProgramStateGroup.__setitem__c                 C   s   t |trtt| jD ]}| j|  ||7  < q| S t |tr6tt| jD ]}| j|  |7  < q(| S t |ttfrTtt| jD ]}| j|  || 7  < qD| S td| )NzInvalid value: )	r   r   rn   rX   r  r   r   r   r   )r   r   rt   r+   r+   r,   r^    s   


zProgramStateGroup.__iadd__rJ   )rg  )r=  r>  r?  r   r:   r   r   r   r   r   r_  r`  r^  r+   r+   r+   r,   rK    s    
rK  )FTr<  )2r@  rX  r   r4  r[   r   r;   r   r   concurrent.futuresr   
contextlibr   typingr   r   r   r   r   r_   sglang.global_configr	   sglang.lang.irr
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   sglang.utilsr   r   r   r-   rA   rl   r^   rY   r9   r:   rK  r+   r+   r+   r,   <module>   s@    D
+
[:    F 5