o
    پi                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
 d dlZd dlmZmZ d dlmZ d dlmZmZmZmZmZmZ eZe ZG dd	 d	eZdS )
    N)ThreadPoolExecutor)SimpleNamespace)DictListLiteralOptional)is_hipkill_process_tree)run_eval)!DEFAULT_TIMEOUT_FOR_SERVER_LAUNCHDEFAULT_URL_FOR_TESTCustomTestCaseis_in_cipopen_launch_serverwrite_github_step_summaryc                   @   st   e Zd Z	dded ded deeef deee  fdd	Z	d
edeeef dee fddZ
dd Zdd ZdS )BaseTestGptOssNmodel_variant)20b120bquantization)mxfp4bf16"expected_score_of_reasoning_effort
other_argsc                 C   sb   |d u rg }ddddd||f }|dkr|ddg7 }t r'd	tjvr'd
tjd	< | j|||d d S )Nzlmsys/gpt-oss-20b-bf16zlmsys/gpt-oss-120b-bf16zopenai/gpt-oss-20bzopenai/gpt-oss-120b))r   r   )r   r   )r   r   )r   r   r   z--cuda-graph-max-bs600SGLANG_USE_AITER0)modelr   r   )_is_hiposenviron_run_test_raw)selfr   r   r   r   r    r#   N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/gpt_oss_common.pyrun_test   s$   

zBaseTestGptOss.run_testr   c                    s   t  tt|d}z?  tdd}t|fdd fdd| D  W d    n1 s3w   Y  W t|j	 d S W t|j	 d S t|j	 w )N)timeoutr      )max_workersc                    s    j di | S )Nr#   )_run_one_eval)d)r"   r#   r$   <lambda>J   s    z.BaseTestGptOss._run_test_raw.<locals>.<lambda>c                    s   g | ]\}}t  ||d qS ))r   reasoning_effortexpected_score)dict).0r,   r-   )r   r#   r$   
<listcomp>K   s    z0BaseTestGptOss._run_test_raw.<locals>.<listcomp>)
r   	_base_urlr   &_check_streaming_responses_api_requestr   listmapitemsr	   pid)r"   r   r   r   processexecutorr#   )r   r"   r$   r!   5   s*   


zBaseTestGptOss._run_test_rawc              	   C   s  t  d}|dddd}tj||dd}|jdkr!td|j  |  d	}| D ]?}|rj|d
}|	drj|dd  }|
 dkrG n$zt|}	|	ddkr^|	dd	}
||
7 }W q+ tjyi   Y q+w q+td|  | t|dk | d| d S )Nz/v1/responseszWhat is 1 + 1?Tr   )r   inputstreamtemperature)jsonr:      zResponse API failed:  zutf-8zdata:    z[DONE]typezresponse.output_text.deltadeltazStreaming check response: 2)r1   requestspoststatus_codeprinttextraise_for_status
iter_linesdecode
startswithstripr<   loadsgetJSONDecodeError
assertTruelenassertIn)r"   r   urlpayloadresponsecontentlinedecoded_linedata_strdatarA   r#   r#   r$   r2   X   s>   




z5BaseTestGptOss._check_streaming_responses_api_requestc              
   C   s   t t|ddddd|d}d| d| d| }td	|  t|}td
| d| | |d | t rGtd| d|d dd d S d S )Ngpqa   i   g?)base_urlr   	eval_namenum_examplesnum_threads
max_tokensr;   r,   zmodel=z reasoning_effort=z expected_score=zEvaluation start: zEvaluation end: z	 metrics=scorez### test_gpt_oss_common
Setup: z
Score: z.2f
)r   r1   rF   r
   assertGreaterEqualr   r   )r"   r   r,   r-   argssetupmetricsr#   r#   r$   r)   |   s0   zBaseTestGptOss._run_one_eval)N)__name__
__module____qualname__r   r   strfloatr   r   r%   r!   r2   r)   r#   r#   r#   r$   r      s(    




#$r   )r<   r   concurrent.futuresr   typesr   typingr   r   r   r   rC   sglang.srt.utilsr   r	   sglang.test.run_evalr
   sglang.test.test_utilsr   r   r   r   r   r   r1   r   r   r#   r#   r#   r$   <module>   s     	