o
    پi0                     @   s  d dl Z d dlmZ d dlmZmZ d dlZd dlmZ d dl	m
Z
mZmZmZ eG dd dZeG dd	 d	Zd
ddddddddddddgdddZd
dddddddidgdddZd
i ed
 ddidZdPd d!Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:ed;fd<ed=fded>fd?ed@fdAedBfdedCfdDedEfdFedGfdHedIfg	Z 	dQdJedKedLee! dMefdNdOZ"dS )R    N)	dataclass)ListOptional)kill_process_tree)!DEFAULT_TIMEOUT_FOR_SERVER_LAUNCHDEFAULT_URL_FOR_TESTModelLaunchSettingspopen_launch_serverc                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed< dZ
eed< dZeed< dZeed	< d
Zeed< d
Zeed< dZeed< dZeed< dS )ToolCallTestParamsT
test_basic	test_autotest_streamingtest_required	test_nonetest_specifictest_stricttest_multiturnFtest_thinkingtest_reasoning_usagetest_paralleltest_streaming_parallelN)__name__
__module____qualname__r   bool__annotations__r   r   r   r   r   r   r   r   r   r   r    r   r   U/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/tool_call_test_runner.pyr
      s   
 r
   c                   @   sJ   e Zd ZU eed< eed< eed< eed< ee ed< dZe	e ed< dS )ToolCallTestResultmodelpassed
num_passed	num_totalfailuresNvariant)
r   r   r   strr   r   intr   r$   r   r   r   r   r   r       s   
 r   functionaddzCompute the sum of two integersobjectintegerzFirst integer)typedescriptionzSecond integer)abr-   r.   )r+   
propertiesrequired)namer,   
parametersr+   r'   get_weatherz"Get the current weather for a citycitystringz	City namestrictTr0   c                 K   s.   | j jjd|d|dg|ptg|dd|S )zFSingle-turn tool call request. Defaults to ADD_TOOL_STRICT + required.userrolecontent皙?r   messagestoolstool_choicetemperatureNr   )chatcompletionscreateADD_TOOL_STRICT)clientr   r;   r?   r@   kwargsr   r   r   _callQ   s   

rH   c                 C   s   t | |d}|jd j}|jrt|jdksJ |jr#J d|j |jd }|jjdks8J d|jj dtt	
|jjtsDJ |jd jdksNJ dS )	zMFormat + field placement: tool_calls present, content empty, valid JSON args.Compute 3 + 5r   content should be empty, got: r(   expected 'add', got ''
tool_callsN)rH   choicesmessagerM   lenr;   r'   r1   
isinstancejsonloads	argumentsdictfinish_reason)rF   r   responsemsgtcr   r   r   _test_basic_format`   s   
 rZ   c                 C   sb   t | |ddd}|jd j}|jrt|jdksJ |jr%J d|j |jd jdks/J dS )zBtool_choice=auto should populate tool_calls, not content (#17942).rI   autor@   r   rJ   rM   N)rH   rN   rO   rM   rP   r;   rV   )rF   r   rW   rX   r   r   r   
_test_autol   s
   r]   c           	      C   s   t | |ddd}t|}t|dksJ g }d}|D ]#}|jd jjr=|jd jjd }|jjp1|}|jjr=|	|jj q|dksJJ d| dt
d	|}d
|v rZd|v s\J |d jd jdkshJ dS )z2Streaming chunks should concatenate to valid JSON.zCompute 5 + 7T)streamr   Nr(   rK   rL    r-   r.   rM   )rH   listrP   rN   deltarM   r'   r1   rT   appendrR   rS   joinrV   )	rF   r   rW   chunksarg_fragmentsr1   chunkrY   argsr   r   r   _test_streamingu   s    ri   c                 C   s*   t | |dttgd}|jd jjsJ dS )zJtool_choice='required' must return a tool call even for unrelated queries.What is the capital of France?r?   r   N)rH   ADD_TOOLWEATHER_TOOLrN   rO   rM   rF   r   rW   r   r   r   _test_required   s   ro   c                 C   s>   t | |ddd}|jd jjdu sJ |jd jdksJ dS )z1tool_choice='none' must not return any tool call.zWhat is 1+1?noner\   r   Nstop)rH   rN   rO   rM   rV   rn   r   r   r   
_test_none   s   rr   c                 C   sJ   t | |dttgdddidd}|jd jj}|r!|d jjdks#J dS )	z7Specifying a function name should return that function.rj   r'   r1   r4   r3   )r?   r@   r   N)rH   rl   rm   rN   rO   rM   r'   r1   rF   r   rW   rY   r   r   r   _test_specific   s   rt   c                 C   s@   t | |d}t|jd jjd jj}d|v rd|v sJ dS )z0strict: true should enforce schema on arguments.zCompute 5 - 7r   r-   r.   N)rH   rR   rS   rN   rO   rM   r'   rT   )rF   r   rW   rh   r   r   r   _test_strict   s   ru   c                 C   s   dddg}| j jj||tgddd}|jd jjd }||jd j |d|jd	|j	j
d
 | j jj||tgdd}d	|jd jjpGdv sKJ dS )z6Pass tool result back, model should reply based on it.r8   What is 3 + 5?r9   r0   r<   r=   r   tool8r:   tool_call_idr;   r1   )r   r>   r?   rA   r_   NrB   rC   rD   rE   rN   rO   rM   rc   idr'   r1   rl   r;   )rF   r   r>   r1rY   r2r   r   r   _test_multiturn   s0   r   c                 C   s   ddddi}dddg}| j jj||tgdd	|d
}|jd jjd }||jd j |d|jd|j	j
d | j jj||tgd	|d}|jd jjpOd}d|v s[J d| dS )z[After tool result with thinking enabled, output should be in content not reasoning_content.thinkingenabled   r+   budget_tokensr8   rv   r9   r0   r<   r   r>   r?   r@   rA   
extra_bodyr   rw   rx   ry   )r   r>   r?   rA   r   r_   zexpected '8' in content, got: Nr{   )rF   r   thinking_bodyr>   r}   rY   r~   r;   r   r   r   _test_thinking   s8   r   c                 C   s   ddddi}| j jj|dddgtgdd	|d
}|j}|dus$J d|jr,|jdks4J d|j |jrI|jdd}|dksKJ d| dS dS )zHWith thinking enabled, usage.reasoning_tokens should be reported as > 0.r   r   r   r   r8   rv   r9   r0   r<   r   Nzusage should not be Noner   z#expected reasoning_tokens > 0, got reasoning_tokensz=expected completion_tokens_details.reasoning_tokens > 0, got )rB   rC   rD   rE   usager   completion_tokens_detailsget)rF   r   r   rW   r   detail_reasoningr   r   r   _test_reasoning_usage   s.   


r   c                 C   sV   t | |dttgd}|jd jj}|rt|dks)J d|r$t| d dS )z1Single request should return multiple tool calls.-What is 3+5 and what is the weather in Tokyo?rk   r      expected >= 2 tool calls, got N)rH   rl   rm   rN   rO   rM   rP   rs   r   r   r   _test_parallel  s   4r   c                 C   s  t | |dttgddd}i }|D ]>}|jd jjsq|jd jjD ],}|j}||vr1ddd||< |jjr=|jj|| d< |jj	rM|| d	  |jj	7  < q!qt
|d
ks^J dt
| | D ]$\}}|d srJ d| dt|d	 }t|tsJ d| dqbdS )zBStreaming with tool_choice=auto should return multiple tool calls.r   r[   T)r?   r@   r^   r   r_   )r1   rT   r1   rT   r   r   z
tool call z missing function namez arguments not a dictN)rH   rl   rm   rN   rb   rM   indexr'   r1   rT   rP   itemsrR   rS   rQ   rU   )rF   r   rW   rM   rg   rY   idxrh   r   r   r   _test_streaming_parallel  s8   	r   basic_formatr   	streamingr   r   rp   r   specificr   r   	multiturnr   r   r   parallelr   r   paramsbase_urlreturnc                 C   s  |pt }tdd  td| j  | jrtd| j  td d d}zzt| j|| jt| jd}tj	d|d d	}g }g }t
D ]E\}}}	t||	sOqDz||| j || td
|  W qD ty }
 z|| d|
  td| d|
  W Y d}
~
qDd}
~
ww t|t| }tdt| d| d t| jt|dkt|||| jdW W |rt|j S S  ty }
 z&td|
  t| jdddd|
 g| jdW  Y d}
~
W |rt|j S S d}
~
ww |rt|j w w )z6Launch server, run enabled test cases, return results.
z<============================================================zRunning TOOL CALL test for z  Variant: N)
other_argstimeoutenvzsk-testz/v1)api_keyr   z  PASS: z: z  FAIL: z
  Result: /z passedr   )r   r    r!   r"   r#   r$   z  Server launch failed: FzServer launch failed: )r   print
model_pathr$   r	   
extra_argsr   r   openaiClient_TESTSgetattrrc   	ExceptionrP   r   r   pid)r   r   r   processrF   passed_listfailed_listr1   fnflagetotalr   r   r   run_tool_call_testD  sz   

 


r   )Nr0   )N)#rR   dataclassesr   typingr   r   r   sglang.srt.utilsr   sglang.test.test_utilsr   r   r   r	   r
   r   rl   rm   rE   rH   rZ   r]   ri   ro   rr   rt   ru   r   r   r   r   r   r   r%   r   r   r   r   r   <module>   s    

	#