o
    پiC7                    @   sN	  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlZddlZddlZddlZddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%Z%ddl&Z'ddl(Z(ddl)Z)ddl*m+  m,Z- dd
l.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z= ddl>m?Z? ddl@mAZA dZBdZCdZDdZEdZFdZGdZHdZIdZJdZKdZLdZMdZNdZOdZPd ZQd!ZRd"ZSd#ZTd#ZUd$ZVd%ZWd&ZXd'ZYd(ZZd)Z[d*Z\d+Z]d,Z^d-Z_dZ`d.Zad/Zbd0ZcdZddZed1Zfd2Zgd3ZhdZId4Zid5Zjd6Zkd7Zld/Zmd8Znd/Zod9Zpd:Zqd;Zrd<Zsd=Ztd>Zud?Zvd@ZwdAZxdBZyd)dDezdEe{dFe/j/fdGdHZ|dIdJ Z}dKdL Z~dMdN ZdOdP ZdQezfdRdSZe} rtdTe{ejdUdVd dW  ZndXe{ejdUdVd dY  ZdZedY  Ze~ rd[Zye rd\Zye rd[Zyd*d]d^Zd_e{fd`daZd+dcddZ	d,dedfZd*dgdhZ	d-didjZd.dkdlZd.dmdnZd.dodpZdqejfdrdsZdFezfdtduZdqejfdvdwZdxejfdydzZdxejfd{d|Zdxejfd}d~ZdxejfddZdxejfddZdd ZdQezfddZd/deez defddZ	d.dezdede#eez  dFe#ez fddZdedFefddZde"ez dede#e dezdFejf
ddZdejdezde#ez dedFe$ee#ez f f
ddZ							d0dezdezdede#ez de#eez  de#e de#e dezdede#e{ fddZ			d1dezdezdede#ez deez de#e fddZdd Zddddddddddedddddddddddddddbdfde{defddZ													d2de{de#e!ezejge d f  de#ez fddZdezdezde"e de{de!egef f
ddZ					d3ddńZ		b				d4ddǄZ		d5ddɄZdd˄ Zdd̈́ Z	d.ddτZddф Zddӄ ZdZdZefde"ez dezfddلZ	d.de#ez fddۄZd.de#e fdd݄Z				d6dd߄Z				d6ddZdd Zdejde$fddZdezde{dFe"ez fddZdezde{dFe"ez fddZdezde"eezef  dFe$e{ef fddZd7ddZdd ZG dd dejZdezfddZdezdezfddZG dd dZG d d dZȐdezdFezfddZɐdezfddZ		d*dd	Zːd
ezdezfddZdezdFe"ez fddZ͐d8ddZΐd*ddZϐdd ZАdd Zѐdd ZҐde!def eB fddZӐd.dezd ed!e#e fd"d#ZdFeezezf fd$d%ZՐd&ezdFezfd'd(ZdS (9  z-Common utilities for testing and benchmarking    N)ThreadPoolExecutor)datetime)partialwraps)BytesIO)Path)
ModuleTypeSimpleNamespace)Any	AwaitableCallableListOptionalTuple)Imagerun_benchmark)global_config)envs)get_bool_env_var
get_deviceis_cudais_port_availableis_xpukill_process_treeretry)run_eval)get_exception_tracebackz meta-llama/Llama-3.1-8B-Instructz meta-llama/Llama-3.2-1B-Instructzmeta-llama/Llama-3.2-1BzQwen/Qwen3-Reranker-0.6Bz$mistralai/Mixtral-8x7B-Instruct-v0.1zQwen/Qwen1.5-MoE-A2.7BzQwen/Qwen1.5-MoE-A2.7B-Chatz#Alibaba-NLP/gte-Qwen2-1.5B-instructz#cross-encoder/ms-marco-MiniLM-L6-v2z+deepseek-ai/DeepSeek-Coder-V2-Lite-Instructz/neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8zlmsys/sglang-ci-dsv3-testzlmsys/sglang-ci-dsv3-test-NextNz Qwen/Qwen3-Next-80B-A3B-InstructzQwen/Qwen3-VL-2B-Thinkingzzai-org/GLM-4.1V-9B-Thinkingznvidia/DeepSeek-V3-0324-FP4znvidia/Qwen3-30B-A3B-FP4z*neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8z2neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8-dynamicz nvidia/Llama-3.1-8B-Instruct-FP8zQwen/Qwen3-1.7B-FP8z#gaunernst/DeepSeek-V2-Lite-Chat-FP8zopenai/gpt-oss-20bz$RedHatAI/Llama-3.2-3B-quantized.w8a8znytopop/Qwen3-30B-A3B.w8a8z2hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4meta-llama/Llama-2-7b-chat-hfz!lmsys/sglang-EAGLE-llama2-chat-7Bz(lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8BzQwen/Qwen3-30B-A3BzTengyunw/qwen3_30b_moe_eagle3zQwen/Qwen2.5-Coder-7B-Instruct)z'OPEA/Qwen2.5-0.5B-Instruct-int4-sym-incz,Intel/Qwen2-0.5B-Instruct-int4-sym-AutoRoundz)meta-llama/Llama-4-Scout-17B-16E-Instructz'deepseek-ai/DeepSeek-R1-Distill-Qwen-7Bzdeepseek-ai/DeepSeek-V3-0324zlmsys/DeepSeek-V3-NextNz2hugging-quants/Mixtral-8x7B-Instruct-v0.1-AWQ-INT4zBarrrrry/DeepSeek-R1-W4AFP8zmeta-llama/Llama-3.1-8B-Instruct,mistralai/Mistral-7B-Instruct-v0.3,deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct,google/gemma-2-27b-itzcmeta-llama/Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instructzneuralmagic/Meta-Llama-3.1-8B-Instruct-FP8,neuralmagic/Mistral-7B-Instruct-v0.3-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8,neuralmagic/gemma-2-2b-it-FP8zneuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8,zai-org/GLM-4.5-Air-FP8zhugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4,hugging-quants/Mixtral-8x7B-Instruct-v0.1-AWQ-INT4zQwen/Qwen2.5-1.5B-InstructzQwen/Qwen2.5-VL-3B-Instructz[https://raw.githubusercontent.com/sgl-project/sglang/main/examples/assets/example_image.pngz]https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4iX     	image_urlmax_retriesreturnc                 C   s   t |D ]H}ztj| dd}|  tt|j}|  |W   S  t	yL } z||d kr;t
d| d|  |td|  W Y d }~qd }~ww d S )N   timeout   zFailed to download image after z
 retries:    )rangerequestsgetraise_for_statusr   openr   contentload	ExceptionRuntimeErrortimesleep)r    r!   iresponseimagee r7   J/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/test_utils.pydownload_image_with_retry   s$   
r9   c                   C      t dS )z"Return whether it is in CI runner.SGLANG_IS_IN_CIr   r7   r7   r7   r8   is_in_ci      r=   c                   C   r:   )z)Return whether it is in an AMD CI runner.SGLANG_IS_IN_CI_AMDr<   r7   r7   r7   r8   is_in_amd_ci   r>   r@   c                   C   
   t j S )z:Return whether it is running on a Blackwell (B200) system.)r   IS_BLACKWELLr*   r7   r7   r7   r8   is_blackwell_system      
rC   c                   C   rA   )z/Return whether it is running on an H200 system.)r   IS_H200r*   r7   r7   r7   r8   is_h200_system   rD   rF   
model_repoc                 C   s<   t d}|r| rt j|| }t j|rt j|S dS )NDEFAULT_MODEL_CACHE_DIR )osgetenvpathjoinisdirabspath)rG   	cache_dir
model_pathr7   r7   r8   _use_cached_default_models   s   
rR   i'  CUDA_VISIBLE_DEVICES0i  i N    http://127.0.0.1:i  i  c                 C   sN   |d usJ | |||dd}t j||d}|jdksJ | d d }|S )N)temperaturemax_new_tokensstop_sequencesinputs
parametersjson   generated_textr   r)   poststatus_coder^   )promptrW   
max_tokensstopurldatarespredr7   r7   r8   call_generate_lightllm   s   rk   	base_portc                 C   s:   | t dd }	 t|r|S |dk r|d7 }n|d8 }q	)Nd   rU   Ti`  *   +   )randomrandintr   )rl   portr7   r7   r8   find_available_port   s   
rs   r&   c           	         s~   |d usJ  ||||d}t j||d}|jdksJ |dkr0| d d t d  }|S  fdd| d D }|S )	N)rd   rW   re   rf   nr]   r_   r&   textr   c                       g | ]
}|t  d  qS Nlen.0xrd   r7   r8   
<listcomp>      z&call_generate_vllm.<locals>.<listcomp>r)   rb   rc   r^   ry   )	rd   rW   re   rf   rt   rg   rh   ri   rj   r7   r}   r8   call_generate_vllm   s   r   c           
         s   |d usJ  |||||d}t j||d}|jdksJ |dkr1| d d t d  }	|	S  fdd| d D }	|	S )	N)rd   rW   re   rf   regexrt   r]   r_   r&   ru   r   c                    rv   rw   rx   rz   r}   r7   r8   r~     r   z*call_generate_outlines.<locals>.<listcomp>r   )
rd   rW   re   rf   r   rt   rg   rh   ri   rj   r7   r}   r8   call_generate_outlines
  s   r   c           	      C   sN   |d usJ | |||dd}t j||d}|jdksJ | }|d }|S )N)rW   rX   rf   ru   sampling_paramsr]   r_   ru   ra   )	rd   rW   re   rf   rg   rh   ri   objrj   r7   r7   r8   call_generate_srt_raw   s   r   c              	   C   sf   |d usJ ddl m} g }t|D ]}	||  |d||||d }
||
d  q|dkr/|S |d S )Nr   )genanswer)namere   rW   rf   r   r&   )guidancer   r(   append)rd   rW   re   rf   rt   r   modelr   rets_outr7   r7   r8   call_generate_guidance2  s$   r   c                 C   sh   |d usJ g }t t|D ] }| ||  ddid}tj||d}|jdks)J |d qt|S )NrX   r&   rZ   r]   r_   r   )r(   ry   r)   rb   rc   r   npargmaxcontextchoicesrg   scoresr3   rh   ri   r7   r7   r8   call_select_lightllmI  s   

r   c                 C   sr   |d usJ g }t t|D ]%}| ||  ddd}tj||d}|jdks(J || dd qt	|S )Nr&   )rd   re   prompt_logprobsr]   r_   prompt_scorer   )
r(   ry   r)   rb   rc   r   r^   r*   r   r   r   r7   r7   r8   call_select_vllmZ  s   

r   c                 C   s:   |d usJ ddl m} ||  ||dd }||d S )Nr   )selectr   )r   )r   r   index)r   r   r   r   r   r7   r7   r8   call_select_guidancer  s   r   parserc                 C   s   | j dtdd | j dtdd | j dtd d | j dtdg d	d
 | j dtdd | j dtdd | j dtdd |  }|jd u rTdddddd}||jd |_|S )N
--parallel@   typedefault--hosthttp://127.0.0.1--port	--backendT)vllmoutlineslightllmgserverr   srt-rawz	llama.cpp)r   requiredr   z--n-ctx   --model-pathr   --result-fileresult.jsonliR  iU  0u  i'  )r   r   r   r   r   )add_argumentintstr
parse_argsrr   r*   backend)r   argsdefault_portr7   r7   r8   add_common_other_args_and_parsez  s0   
r   c               
   C   sL   zt  } W | S  ttfy% } ztd| d d} W Y d}~| S d}~ww )z%Auto-config available device platformz	Warning: z - Falling back to CPUcpuN)r   r0   ImportErrorprint)devicer6   r7   r7   r8   auto_config_device  s   r   c                 C   s   | j dtdd | j dtdd | j dtdd | j dtd	d | j d
tdg ddd | j dtdd | j dtd |  }|S )Nr   r   r   r   r   r   r   r   srt--deviceauto)r   cudarocmr   zFDevice type (auto/cuda/rocm/cpu). Auto will detect available platforms)r   r   r   helpr   r   z--raw-result-file)r   )r   r   r   r   )r   r   r7   r7   r8    add_common_sglang_args_and_parse  s   r   r   c                 C   sx   ddl m} ddlm} | jdr'| jdkrdt_|| j d| j	 }|S | jdr4|| j}|S t
d	| j )
Nr   )OpenAI)RuntimeEndpointr   zsrt-no-parallelF:zgpt-Invalid backend: )sglang.lang.backend.openair   $sglang.lang.backend.runtime_endpointr   r   
startswithr   enable_parallel_encodinghostrr   
ValueError)r   r   r   r   r7   r7   r8   select_sglang_backend  s   

r   c                 C   s   | j dkrtt| j d| j ddS | j dkr&tt| j d| j ddS | j dkr9tt| j d| j ddS | j dkrLtt| j d| j ddS | j dkrpd	d
lm	} |j
| jd| jd}tt|d}|dddd |S td| j  )Nr   r   	/generaterg   r   r   r   r   r   modelsn_gpu_layersn_ctxr   Hello,g      ?   .r   )r   r   rk   r   rr   r   r   r   r   r   LlamaCpprQ   r   r   r   )r   r   r   call_generater7   r7   r8   _get_call_generate  s   




r   c                 C   s   | j dkrtt| j d| j ddS | j dkr&tt| j d| j ddS | j dkrJddlm} |j| j	d	| j
d
}tt|d}|dddg |S td| j  )Nr   r   r   r   r   r   r   r   r   r   r   r   worldearthr   )r   r   r   r   rr   r   r   r   r   rQ   r   r   r   )r   r   r   call_selectr7   r7   r8   _get_call_select  s   


r   c                       t |   fdd}|S )Nc                     0   z | i |W S  t y   tdt    w )NzException in call_generate:
r/   r   r   r   kwargsr   r7   r8   func     zget_call_generate.<locals>.func)r   r   r   r7   r   r8   get_call_generate     r   c                    r   )Nc                     r   )NzException in call_select:
r   r   r   r7   r8   r     r   zget_call_select.<locals>.func)r   r   r7   r   r8   get_call_select  r   r   c                  C   s   dd l } | t}t }|j D ]1\}}t|trBd|v rBd|v rBt|trBd|v r;dd |dD }|	| q|
|  qtt|S )Nr   DEFAULT_MODEL_,c                 S   s   g | ]}|  qS r7   strip)r{   partr7   r7   r8   r~         z'_get_default_models.<locals>.<listcomp>)inspect	getmodule_get_default_modelsset__dict__items
isinstancer   splitupdateaddr   r^   dumpslist)r   current_moduledefault_modelsr   valuepartsr7   r7   r8   r     s    
r   c                 C   s   t | }|r|S | S rw   )rR   )rG   	model_dirr7   r7   r8   try_cached_model   s   r  Fcommand
allow_exitc                    s<   t jt jt jd fdd}tj|d}|  S )Nstdoutstderrc                     s`     \} } d u rtd  d u s rjdkr.t dj d| d|d S )N   r   z exited with code z
stdout=z
stderr=)communicatepollr1   r2   
returncoder/   r  r  r  processr7   r8   _run_and_check(  s   
z.popen_with_error_check.<locals>._run_and_check)target)
subprocessPopenPIPE	threadingThreadstart)r  r  r  tr7   r  r8   popen_with_error_check%  s
   r   model_name_or_pathenv
other_argsc                 C   s  ddl m}m}m}m} ddlm} |pg }d|v pd|v }|r)td|   dS |dd	kr9td
|   dS t	j
| rAdS z|| dd}	|	rPt	j
|	sSW dS W n
 ty^   Y dS w d}
t|D ] \}}|dkr|d t|k r||d   }|dv rd}
 nqe||	}|dur||	|
}|std|
 d|   dS d	|d< ||	}td|
 d|   |S ||	|
}|std|
 d|   dS d	|d< ||	|  ||	}t	j
|	}td|  d| d|
 d |S )a>  
    CI helper: Check if model cache is complete and enable offline mode.

    Uses per-run validation markers that are NOT shared across runners.
    Each runner independently validates its cache using lightweight checks
    before enabling offline mode.

    IMPORTANT: Even if a per-run marker exists, this function ALWAYS validates
    the current launch's requirements (e.g., hf_quant_config.json for modelopt).
    The marker is only a hint that this snapshot was validated earlier in the run.

    Args:
        model_name_or_path: Model identifier or path
        env: Environment dict to modify (will add HF_HUB_OFFLINE=1 if validation passes)
        other_args: Launch command arguments (used to detect quantization requirement)

    Returns:
        Per-run marker path if offline mode was enabled, None otherwise
    r   )_get_per_run_marker_path_read_per_run_marker_write_per_run_markervalidate_cache_lightweight)find_local_repo_dirz--enable-loraz--lora-pathsz.CI_OFFLINE: LoRA enabled, skip offline mode - NHF_HUB_OFFLINE1z@CI_OFFLINE: Subprocess env already has HF_HUB_OFFLINE=1, skip - )revisionFz--quantizationr&   )modelopt_fp4modelopt_fp8modeloptTzYCI_OFFLINE: Per-run marker found but current validation failed (requires_hf_quant_config=z), will use online mode - zYCI_OFFLINE: Per-run marker found and current validation passed (requires_hf_quant_config=z), enabling offline mode - z>CI_OFFLINE: Cache validation failed (requires_hf_quant_config=zLCI_OFFLINE: Enabled HF_HUB_OFFLINE=1 for subprocess - validation passed for z (snapshot=z, requires_hf_quant_config=)),sglang.srt.model_loader.ci_weight_validationr$  r%  r&  r'  sglang.srt.utilsr(  r   r*   rJ   rL   rN   r/   	enumeratery   lowerbasename)r!  r"  r#  r$  r%  r&  r'  r(  is_lora_enabledsnapshot_dirrequires_hf_quant_configr3   argquant_valueper_run_markeris_validmarker_pathsnapshot_basenamer7   r7   r8   *_try_enable_offline_mode_if_cache_complete8  s   


r>  c                 C   s*   |   }dd |D }|D ]}||= q|S )a4  Create a clean subprocess environment without internal CI keys.

    Removes all keys starting with '_CI_OFFLINE_' or 'CI_OFFLINE' to prevent
    leaking implementation details to the server subprocess.

    Args:
        env: Source environment dict

    Returns:
        Clean copy of environment dict
    c                 S   s   g | ]	}| d r|qS ))_CI_OFFLINE_CI_OFFLINE_)r   )r{   kr7   r7   r8   r~     s
    
z0_create_clean_subprocess_env.<locals>.<listcomp>)copy)r"  	child_envkeys_to_removerA  r7   r7   r8   _create_clean_subprocess_env  s   rE  return_stdout_stderrr   c                 C   s   t |}|dd}td| d|  |rMtj| tjtj|ddd}dd	 }tj||j|d
 t	jgfdd
  tj||j|d t	jgfdd
  |S tj| dd|d}|S )ab  Launch server subprocess with clean environment.

    Args:
        command: Command list for subprocess
        env: Environment dict (will be cleaned before use)
        return_stdout_stderr: Optional tuple of (stdout_file, stderr_file) for output capture
        model: Model name for logging

    Returns:
        Started subprocess.Popen object
    r)  rT   z,CI_OFFLINE: Launching server HF_HUB_OFFLINE=z model=Tr&   )r  r  r"  ru   bufsizec                 S   s:   t | jdD ]}|D ]}|| |  q
q|   d S )NrI   )iterreadlinewriteflushclose)srcsinkslinesinkr7   r7   r8   _dump  s   

z%_launch_server_process.<locals>._dumpr   )r  r   daemonNr  r  r"  )rE  r*   r   r  r  r  r  r  r  sysr  r  )r  r"  rF  r   rC  hf_hub_offlineprocrQ  r7   r7   r8   _launch_server_process  s8   	rW  rV  base_urlapi_keytimeout_durationc           	   	   C   s&  t  }t }t  | |k r|  }|dur'dd| fW  d   S z#dd| d}|j| d|dd	}|jd
krIW W d   dS W n
 tjyT   Y nw |  }|durldd| dfW  d   S t d t  | |k sW d   dS W d   dS 1 sw   Y  dS )a$  Wait for server health check to pass.

    Args:
        proc: Server subprocess
        base_url: Base URL for health check
        api_key: Optional API key for authorization
        timeout_duration: Maximum wait time in seconds

    Returns:
        Tuple of (success, error_message)
    NFz Server process exited with code zapplication/json; charset=utf-8zBearer )zContent-TypeAuthorizationz/health_generater  )headersr%   r_   )TNz(Server unexpectedly exited (return_code=r/  
   )Fz0Server failed to start within the timeout period)	r1   perf_counterr)   Sessionr  r*   rc   RequestExceptionr2   )	rV  rX  rY  rZ  
start_timesessionreturn_coder\  r4   r7   r7   r8   _wait_for_server_health  sH   




rd  r   r%   r   pd_separatednum_replicasc
              
   C   s  |pg }|dkrt  }t|}|dt|g7 }|du r!tj }ntj }
|
| |
}d}zddlm} | r?t	| ||}W n t
yY } ztd|  W Y d}~nd}~ww |d\}}}|dd }| on|	du}|ss|rvd	}nd
}dd|d| gdd |D }|s|r|d|d|g n	|d|d|g |r|ddt|	g |r|d|g7 }tdt|  |ddk}t|||| }t||||\}}|s|rtd| d z| du rt|j n|jdd W n t
y } ztd|  W Y d}~nd}~ww |r?tj|r?zt| td W n t
y> } ztd |  W Y d}~nd}~ww d!|d< t|||| }t||||\}}|r\td" |S zt|j W n t
y} } ztd#|  W Y d}~nd}~ww d$|v rt
|d% t||r|S zt|j W n t
y } ztd&|  W Y d}~nd}~ww d$|v rt
|d% t|)'a  Launch a server process with automatic device detection and offline/online retry.

    Args:
        model: Model path or identifier
        base_url: Base URL for the server
        timeout: Timeout for server startup
        api_key: Optional API key for authentication
        other_args: Additional command line arguments
        env: Environment dict for subprocess
        return_stdout_stderr: Optional tuple for output capture
        device: Device type ("auto", "cuda", "rocm" or "cpu")
        pd_separated: Whether to use PD separated mode
        num_replicas: Number of replicas for mixed PD mode

    Returns:
        Started subprocess.Popen object
    r   r   Nr   )r=   z(CI cache validation failed (non-fatal): r   r'   zsglang.launch_pd_serversglang.launch_serverpython3-mr   c                 S      g | ]}t |qS r7   r   rz   r7   r7   r8   r~   z  r   z'popen_launch_server.<locals>.<listcomp>z	--lb-hostz	--lb-portr   r   z--mixedz--num-replicas	--api-keycommand=r)  r*  z#CI_OFFLINE: Offline launch failed (z), retrying with online mode...r  r$   z6CI_OFFLINE: Error cleaning up failed offline process: z=CI_OFFLINE: Invalidated per-run marker due to offline failurez-CI_OFFLINE: Failed to remove per-run marker: rT   z"CI_OFFLINE: Online retry succeededz>CI_OFFLINE: Error killing process after online retry failure: exitedz. Check server logs for errors.z?CI_OFFLINE: Error killing process after first attempt failure: )r   r  r   rJ   environrB  r  sglang.utilsr=   r>  r/   r   r  extendshlexrM   r*   rW  rd  r  r   pidwaitrL   existsremoveTimeoutError)r   rX  r%   rY  r#  r"  rF  r   re  rf  mergedper_run_marker_pathr=   r6   r   r   rr   use_mixed_pd_enginer  offline_enabledr  success	error_msgr7   r7   r8   popen_launch_server1  s   

	



r~  r7   c                 C   s   | d\}}}|dd  }d}	dd|	d| gdd |D }	|	d	|d
|g |r/|	d|g7 }	tdd|	  tj|	d d |d}
|
S )Nr   r'   rg  rh  ri  r   c                 S   rj  r7   rk  rz   r7   r7   r8   r~     r   z*popen_launch_pd_server.<locals>.<listcomp>r   r   rl  rm   rS  )r  rq  r   rM   r  r  )r   rX  r%   rY  r#  r"  r   r   rr   r  r  r7   r7   r8   popen_launch_pd_server  s.   		r  c                 C   s   t jt| t|ddS )Nr   )dim)Fcosine_similaritytorchtensor)vec1vec2r7   r7   r8   get_similarities  s   r  rI   sglang  r      infuniformg      ?          seedc                 C   s   t d+i d|d| dd dd d|d|dd d|d	|d
|d|	d|d|ddd|
dd dd ddd|ddddd|d|dd dddd d|d|d|d d!d"|d#|d$|d%|d&|d'|d(|d)|d*|S ),Nr   rX  r   rr   dataset_namedataset_pathr   	tokenizernum_promptssharegpt_output_lensharegpt_context_lenrandom_input_lenrandom_output_lenrandom_range_ratio        request_ratemultioutput_filedisable_tqdmFdisable_streamreturn_logprobreturn_routed_expertsr  disable_ignore_eosextra_request_bodyapply_chat_templateprofile	lora_namelora_request_distributionlora_zipf_alphaprompt_suffixrI   r   re  gsp_num_groupsgsp_prompts_per_groupgsp_system_prompt_lengsp_question_lengsp_output_lengsp_num_turnsheaderr7   )r	   )rX  r   r  r  r  r  r  r  r  r  r  r  r  r  r   re  r  r  r  r  r  r  r  r  r  r  r7   r7   r8   get_benchmark_args  s   	
 !"#$%&'r  rp   background_taskr  c                    s   |dkrt  }tt| t|d}t|||||||	||
||||d  fdd}zt| }W t|j nt|j w |d |ksHJ |S )Nr   r%   r#  )rX  r  r  r  r  r  r  r  r  r  r  r  r   r  c                     s   rt  } d| _tt| I d H  t }t }r't||nd }z|  tt I d H }W |rC|  |I d H  |S |rQ|  |I d H  w w )N   )	rB  deepcopyr  asyncio	to_threadr   Eventcreate_taskr   )warmup_argsstart_event
stop_eventtask_handleresultr   r  rX  need_warmupr7   r8   _runp  s,   

zrun_bench_serving.<locals>._run	completed)	r   DEFAULT_URL_FOR_TESTr~  !DEFAULT_TIMEOUT_FOR_SERVER_LAUNCHr  r  runr   rs  )r   r  r  other_server_argsr  r  r  r  r  r  r  r  r  r  r   r  r  r  r  ri   r7   r  r8   run_bench_serving@  s>   r  endpointtest_requestsnum_requestsresponse_validatorc                    s|  t  }d}d}g }t 4 I dH p}	|D ]b}
zWt  }|	j|  | |
tjddd4 I dH 2}|jdkrY| I dH }t  }||rY|| d }|| ||7 }|d7 }W d  I dH  n1 I dH siw   Y  W q t	yx   Y qw W d  I dH  n1 I dH sw   Y  t  }|| }|dkr|| }|| }|rt
|d	nd}||||||d
S d|ddddd
S )a  
    Helper function to run API benchmark requests and collect metrics.

    Args:
        base_url: The base URL of the server
        endpoint: The API endpoint to test (e.g., "/v1/score", "/v1/embeddings")
        test_requests: List of request payloads to send
        num_requests: Total number of requests expected
        response_validator: Function to validate if response contains expected data

    Returns:
        Dictionary with benchmark metrics
    r   Nr#   totalr^   r%   r_   rU   r&   _   )r  total_requests
throughputavg_latency_msp95_latency_mssuccessful_requests)r1   	monotonicaiohttpClientSessionrb   ClientTimeoutstatusr^   r   r/   r   
percentile)rX  r  r  r  r  ra  r  total_latency	latenciesrb  request_datarequest_startr4   response_datarequest_end
latency_msend_time
total_timer  avg_latencyp95_latencyr7   r7   r8   _run_api_benchmark_requests  sd   




((
r  rm   r  c           	         s   |du rg }|dkrt  }t t t|d} fdd}zt| }W t|j nt|j w |d |d ks?J |S )zFScore API benchmark function compatible with run_bench_serving patternNr   r  c            
   	      s:  ddl m}  | d}dddg}dfdd	 ri | fd
dtdD |dd}t 4 I d H $}z|j d|tjdddI d H  W n   Y W d   I d H  n1 I d H sdw   Y  g }tD ]} |} fddtD }|||dd}	||	 qotd|dd dI d H S )Nr   get_tokenizerx      i$  i
  <|im_start|>c                    s@    |  }t j|dd}|| kr | t j dd  }|S )z>Generate text with precise token count using replicated token.Fadd_special_tokens)ry   encode)
num_tokensru   actual_tokens)special_tokenr  r7   r8   generate_text_with_token_count  s   zSrun_score_benchmark.<locals>._run_benchmark.<locals>.generate_text_with_token_countc                       g | ]} qS r7   r7   r{   r   r  score_item_tokensr7   r8   r~     s    z?run_score_benchmark.<locals>._run_benchmark.<locals>.<listcomp>r   T)queryr   label_token_idsr   apply_softmaxz	/v1/scorer#   r  r  c                    r  r7   r7   r  r  r7   r8   r~   !  s    c                 S   s   d| v pd| v S )Nr   logprobsr7   respr7   r7   r8   <lambda>5      z=run_score_benchmark.<locals>._run_benchmark.<locals>.<lambda>rX  r  r  r  r  )	&sglang.srt.utils.hf_transformers_utilsr  r(   r  r  rb   r  r   r  )
r  score_query_tokensscore_label_token_idswarmup_datarb  r  r3   r  r   
score_datarX  
batch_sizer   r  r  )r  r  r  r  r8   _run_benchmark  s`   

(
z+run_score_benchmark.<locals>._run_benchmarkr  r  r   r  r~  r  r  r  r   rs  )	r   r  r  r  r  r   r  r  ri   r7   r  r8   run_score_benchmark  s"   	Fr
  c                    s   |du rg }|dkrt  }dg| }t t t|d} fdd}	zt|	 }
W t|j nt|j w |
d |
d ksEJ |
S )	zKEmbeddings API benchmark function compatible with run_bench_serving patternNr   z--is-embeddingr  c               	      s  ddl m}  | fdd}| rS d}t 4 I d H $}z|j d|tjddd	I d H  W n   Y W d   I d H  n1 I d H sNw   Y  g }tD ]}d
krb }n fddtD }|d}|| qYtd|dd dI d H S )Nr   r  c                    s   d} j |dd}||  }|S )z<Generate text with precise token count using special tokens.r  Fr  )r  )r  r  test_tokensru   )r  r7   r8   r  d  s   zXrun_embeddings_benchmark.<locals>._run_benchmark.<locals>.generate_text_with_token_count)inputr   z/v1/embeddingsr#   r  r  r&   c                    s   g | ]} qS r7   r7   r  )
input_textr7   r8   r~     r  zDrun_embeddings_benchmark.<locals>._run_benchmark.<locals>.<listcomp>c                 S   s   d| v S )Nrh   r7   r  r7   r7   r8   r    s    zBrun_embeddings_benchmark.<locals>._run_benchmark.<locals>.<lambda>r   )	r  r  r  r  rb   r  r(   r   r  )r  r  r  rb  r  r3   
input_dataembeddings_datarX  r  input_tokensr   r  r  )r  r  r8   r  ]  sH   

(
z0run_embeddings_benchmark.<locals>._run_benchmarkr  r  r	  )r   r  r  r  r  r  r   server_argsr  r  ri   r7   r  r8   run_embeddings_benchmarkA  s$   

:r  c                 C   sp   t | |t||d}g }z&|D ]}|rt|}	d|	_t|	 t|}
|||
f qW t|j |S t|j w )N)r%   r#  re  r  )	r~  r  rB  r  r  r   r   r   rs  )r   rX  r  benchmark_argsr  re  r  res_lr   r  ri   r7   r7   r8   run_bench_serving_multi  s(   		

	r  c              	   C   s  t  }td| dd |dt|g7 }ddddd	d
dddg	dd |D }| dur0|d| g7 }tj|tjtjd}d}d}d}zf| \}}	|jdd}
|	jdd}td|
 dd td| dd d}t	||
}|rs|d nd}d}t	d| |}|rt
|d}t	d| |}|rt
|d}t
|d}W t|j nt|j w |du s|du s|du rtd| d | d!| |||fS )"zLaunch a offline process with automatic device detection.

    Args:
        device: Device type ("auto", "cuda", "rocm" or "cpu").
                If "auto", will detect available platforms automatically.
    zAuto-configed device: TrK  r   rh  ri  zsglang.bench_one_batchz--batch-sizer*  z--input128z--output8c                 S   rj  r7   rk  rz   r7   r7   r8   r~     r   z'run_bench_one_batch.<locals>.<listcomp>Nr   r  backslashreplaceerrorsOutput: Error: zBenchmark[\s\S]*Totalr   rI   zJ.*?latency: (?P<latency>\d+\.\d+).*?throughput:\s*(?P<throughput>\d+\.\d+)zPrefill.latencyzDecode.r  z2Failed to parse benchmark output. prefill_latency=z, decode_throughput=z, decode_latency=)r   r   r   r  r  r  r  decoderesearchfloatgroupr   rs  r0   )r   r#  r   r  r  prefill_latencydecode_throughputdecode_latencyr  r  outputerrorpatternmatchbench_outputr7   r7   r8   run_bench_one_batch  sf   	

r-  c           
      C   s   ddddddddd	d
d	d| gdd |D }t dd|  tj|tjtjd}zC| \}}|jdd}|jdd}t d| dd t d| dd d}|dD ]}	d|	v ret|	dd }qVW t	|j
 |S t	|j
 w )Nrh  ri  zsglang.bench_offline_throughputz--num-promptsr*  z--dataset-namerp   z--random-input-len256z--random-output-lenr   c                 S   rj  r7   rk  rz   r7   r7   r8   r~     r   z0run_bench_offline_throughput.<locals>.<listcomp>rm  r  r  r  r  r  Tr  r  r   
z#Last generation throughput (tok/s):r   )r   rM   r  r  r  r  r   r  r#  r   rs  )
r   r#  r  r  r  r  r(  r)  output_throughputrO  r7   r7   r8   run_bench_offline_throughput  sB   
r1  c           	      C   sl   ddl m} |d uri tjdt|i}nd }t| |t||d}z|||d W t|j d S t|j w )Nr   r   SIMULATE_ACC_LEN)r%   r#  r"  )r  
bench_args)	sglang.bench_one_batch_serverr   rJ   ro  r   r~  r  r   rs  )	r   rX  r  r3  r  simulate_spec_acc_lensr   r"  r  r7   r7   r8   run_bench_one_batch_server(  s   r6  c                    s   t | }t |  fddt|d D }t|d D ]L}t d D ]C}|dks-|dkr4d|| |< q#| |d  ||d  krQ||d  |d  d || |< q#t||d  | || |d  || |< q#q||   S )Nc                    s   g | ]	}d g d  qS )r   r&   r7   r  rt   r7   r8   r~   G  s    zlcs.<locals>.<listcomp>r&   r   )ry   r(   max)XYmLr3   jr7   r7  r8   lcsD  s   ",r>  c           	      C   s   g }t | |D ]=\}}t||}t|dkr|t| nd}t|dkr*|t| nd}|| dkr=d| | ||  }nd}|| q|S )zcalculate the ROUGE-L scorer   r'   r  )zipr>  ry   r   )	output_strs_list1output_strs_list2rouge_l_scoress1s2lcs_len	precisionrecallfmeasurer7   r7   r8   calculate_rouge_lU  s   
rI  z/tmp/stderr.txtz/tmp/stdout.txtoutput_linesfilenamec              
   C   s   t j|std t j|rd}|dkrf|dkr#t j|s#dS zt| }W n tyA   td|dt j|  w ||d D ]}t|ddd | 	| |d	7 }qHtd
 |dksdS dS )z2Print the output in real time with another thread.g{Gz?r   zpt=z, os.path.exists(filename)=NrI   T)endrK  r&   g?)
rJ   rL   ru  r1   r2   r,   	readlinesFileNotFoundErrorr   r   )rJ  rK  ptlinesrO  r7   r7   r8   read_outputj  s&   



rQ  c                 C   s\  dt |ddg}|r|dg7 }|r|dg7 }|r|dg7 }t}tdd}	d	|	 }
ttd
}ttd
}t||
t|||f|d}g }t	j
t|fd}|  | |
| t|j |  |  tjtrktt tjtrvtt t|j |  d}d}d}|D ]}d|v rd}d|v rd}d|v rd}q|sJ |rJ |r|sJ d S d S )Nz--chunked-prefill-sizez--log-leveldebugz--disable-radix-cachez--enable-mixed-chunkz--disable-overlap-schedulei  i  rV   w)r%   r#  rF  rY  r  r   FzUvicorn runningTleakAbort)r   DEFAULT_MODEL_NAME_FOR_TESTrp   rq   r,   STDOUT_FILENAMESTDERR_FILENAMEr~  r  r  r  rQ  r  r   rs  rL  rJ   rL   ru  rv  rM   )workload_funcdisable_radix_cacheenable_mixed_chunkdisable_overlapchunked_prefill_sizeassert_has_abortrY  r#  r   rr   rX  r  r  r  rJ  r  has_new_serverhas_leak	has_abortrO  r7   r7   r8   run_and_check_memory_leak  sl   












rc  c                 C   s   t td}t td}tj| |||dd}g }tjt|tfd}|  |	  |
  |
  tjtr:tt tjtrEtt t|j |  |S )NrS  T)r  r  r"  ru   rT  )r,   rX  rY  r  r  r  r  rQ  r  rt  rL  rJ   rL   ru  rv  r   rs  rM   )r  r"  r  r  r  rJ  r  r7   r7   r8   run_command_and_capture_output  s$   





rd  c                 C       dd }t || |||dd d S )Nc                 S   s>   t | |dddd}zt|}|d dksJ d|W d S w )Nmmlur  )rX  r   	eval_namenum_examplesnum_threadsscoreg?zmetrics=)r	   r   )rX  r   r   metricsr7   r7   r8   rZ    s   z$run_mmlu_test.<locals>.workload_funcFr_  rc  )r[  r\  r]  r^  rZ  r7   r7   r8   run_mmlu_test  s   
rn  c                 C   re  )Nc                    sR    fdd}t d}t||ttd W d    d S 1 s"w   Y  d S )Nc                    s0   d}t j  d|ddddd}| }d S )N
            System: You are a helpful assistant.
            User: What is the capital of France?
            Assistant: The capital of France is
            r   r   r   rW   rX   r   r]   )r)   rb   r^   )r   rd   r4   retrX  r7   r8   run_one  s   
z>run_mulit_request_test.<locals>.workload_func.<locals>.run_oner'   r  )r   r  mapr(   )rX  r   rs  executorr7   rr  r8   rZ    s   
"z-run_mulit_request_test.<locals>.workload_funcFrl  rm  )r[  r\  enable_overlapr^  rZ  r7   r7   r8   run_mulit_request_test  s   
rw  c                 C   sZ   t jdstd d S tt jd d}||  W d    d S 1 s&w   Y  d S )NGITHUB_STEP_SUMMARYz0GITHUB_STEP_SUMMARY environment variable not seta)rJ   ro  r*   loggingwarningr,   rJ  )r-   fr7   r7   r8   write_github_step_summary,  s   
"r}  selfr8  c              	   C   s  |\}}}}}}t t|}tj| jd |||dd|||dd}	|	 }
|
}| |d d | | |d d | |r| t|d d	 | |d d  | t|d d
 | |r| t|d d | |d d  | t|d d | t|D ]r}| t|d d | | |dkrd}|t|d d | k rz| |d d
 | |d d | |  W n5 t	y   |d d | | d |d d | |d  d kr|d7 }n Y nw |t|d d | k sqd S d S d S )Nr   T)rW   rX   
ignore_eos)	input_idsr   r  logprob_start_lentop_logprobs_numr]   	meta_infoprompt_tokenscompletion_tokensinput_token_logprobsoutput_token_logprobsinput_top_logprobsoutput_top_logprobsr   r&   )
r  r(   r)   rb   rX  r^   assertEqualry   assertListEqualAssertionError)r~  r8  	input_len
output_lenrW   r  r  r  r  r4   response_jsonri   r3   rankr7   r7   r8   run_logprob_check5  s   


r  c                    s"    fddfddt |D S )zOSends generate request serially and returns status codes. Max concurrency is 1.c                     s*   d} t j  d| ddddd}|jS )Nz
        System: You are a helpful assistant.
        User: What is the capital of France?
        Assistant: The capital of France is
        r   r   r  rp  r   r]   )r)   rb   rc   )rd   r4   rr  r7   r8   generate  s   
z(send_generate_requests.<locals>.generatec                    s   g | ]}  qS r7   r7   r  )r  r7   r8   r~     s    z*send_generate_requests.<locals>.<listcomp>)r(   )rX  r  r7   )rX  r  r8   send_generate_requests  s   r  c                    s4   fdd  fddt |D }tj| I dH S )z^Sends generate request concurrently and returns status codes. Max concurrency is num_requests.c               
      s   t  4 I d H E} d}| j  d|ddddd4 I d H }|jW  d   I d H  W  d   I d H  S 1 I d H s>w   Y  W d   I d H  d S 1 I d H sTw   Y  d S )Nro  r   r   r  rp  r   r]   )r  r  rb   r  )rb  rd   r4   rr  r7   r8   async_generate  s"   	.z9send_concurrent_generate_requests.<locals>.async_generatec                    s   g | ]}t   qS r7   )r  r  r  )r  r7   r8   r~         z5send_concurrent_generate_requests.<locals>.<listcomp>N)r(   r  gather)rX  r  tasksr7   )r  rX  r8   !send_concurrent_generate_requests  s   r  custom_paramsc                    sb   ddddd} fdd}g }|D ]}|  }|| |t|| qtj| I dH S )	zSends generate request concurrently with custom parameters and returns status code and response json tuple. Max concurrency is num_requests.z
                System: You are a helpful assistant.
                User: What is the capital of France?
                Assistant: The capital of France is
                r   r  rp  r   c              
      s   t  4 I d H F}|j  d| d4 I d H !}| I d H }|j|fW  d   I d H  W  d   I d H  S 1 I d H s?w   Y  W d   I d H  d S 1 I d H sUw   Y  d S )Nr   r]   )r  r  rb   r^   r  )reqrb  r4   	resp_jsonrr  r7   r8   async_generate_with_priority  s   .zZsend_concurrent_generate_requests_with_custom_params.<locals>.async_generate_with_priorityN)rB  r  r   r  r  r  )rX  r  base_payloadr  r  cr  r7   rr  r8   4send_concurrent_generate_requests_with_custom_params  s   	
r  r'   ncclc              
      s   ddl m} |d}|  td}g }t|D ]}|jt|||||  |fd}	|	  |	|	 q|D ]}	|	
  q5 fddt|D }
dd |
D }
|
rWtd	
|
dS )
zSpawn ``world_size`` processes, initialise torch.distributed in each,
    run *func(rank, **kwargs)*, and propagate any worker exception to the caller.
    r   Nspawni<s  rT  c                    s   g | ]}   qS r7   )r*   r  result_queuer7   r8   r~     r   z(run_distributed_test.<locals>.<listcomp>c                 S   s   g | ]}|r|qS r7   r7   )r{   r6   r7   r7   r8   r~     r   r/  )torch.multiprocessingmultiprocessingget_contextQueuers   r(   Process_distributed_workerr  r   rM   r  )r   
world_sizer   r   mpctxrr   	processesr  pr  r7   r  r8   run_distributed_test  s&   

r  c           
      C   s   dd l }dd lm} |dkrtj|  |j|d| || d z@z|| fi | |d  W n" tyR }	 z|d|  d|	 d|	   W Y d }	~	nd }	~	ww W |
  d S W |
  d S |
  w )Nr   r  ztcp://127.0.0.1:)r   init_methodr  r  zRank : r/  )	tracebacktorch.distributeddistributedr  r   
set_deviceinit_process_groupputr/   
format_excdestroy_process_group)
r  r  r   rr   r   r  r   r  distr6   r7   r7   r8   r    s*   ,r  c                       s$   e Zd Z fddZdd Z  ZS )CustomTestCasec                    s<   t j }|d u rt rdnd}t fdd|d d S )Nr&   r   c                      s   t tS rw   )superr  _callTestMethodr7   )	__class__methodr~  r7   r8   r    r  z0CustomTestCase._callTestMethod.<locals>.<lambda>)	max_retry)r   SGLANG_TEST_MAX_RETRYr*   r=   r   )r~  r  r  r  )r  r~  r8   r  
  s   

zCustomTestCase._callTestMethodc                 C   s"   t d| jj d| j dd d S )Nz[CI Test Method] r   Tr  )r   r  __name___testMethodName)r~  r7   r7   r8   setUp  s   
zCustomTestCase.setUp)r  
__module____qualname__r  r  __classcell__r7   r7   r  r8   r  	  s    	r  rL   c           	      C   s   | sd S g }t t|D ]$}|| }|d }t| |}|t|||t|| || kd qtd|   t| 	d
dd |D  d S )Nr   )	prompt_idrd   r(  correctz%BenchRawResultDumper save results to r/  c                 s   s    | ]}t |V  qd S rw   )r^   r  )r{   rowr7   r7   r8   	<genexpr>2  s    z(dump_bench_raw_result.<locals>.<genexpr>)r(   ry   _ensure_remove_suffixru   r   dictboolr   r   
write_textrM   )	rL   statespredslabelsrowsr3   stater(  rd   r7   r7   r8   dump_bench_raw_result  s"   	"r  ru   suffixc                 C   s   |  |sJ | |S rw   )endswithremovesuffix)ru   r  r7   r7   r8   r  5  s   
r  c                   @   sD   e Zd Z				d
dededeee  dee dee f
dd	ZdS )ModelLaunchSettingsr&   NrQ   tp_size
extra_argsr"  variantc                 C   s   || _ || _|rt|ng | _|| _|| _| jdkr*d| jvr*| jdt| jg ddg}|D ]}|| jvr=| j| q0d S )Nr&   z--tpz--enable-multimodal--trust-remote-code)	rQ   r  r  r  r"  r  rq  r   r   )r~  rQ   r  r  r"  r  
fixed_args	fixed_argr7   r7   r8   __init__;  s   
zModelLaunchSettings.__init__)r&   NNN)	r  r  r  r   r   r   r   r  r  r7   r7   r7   r8   r  :  s     
r  c                   @   s   e Zd ZdedefddZdS )ModelEvalMetricsaccuracy	eval_timec                 C   s   || _ || _d S rw   )r  r  )r~  r  r  r7   r7   r8   r  S  s   
zModelEvalMetrics.__init__N)r  r  r  r#  r  r7   r7   r7   r8   r  R  s    r  r(  c                 C   s"   t d| }|r|d}|S d S )Nz\[Profile\]\((.*?)\)r&   )r!  r"  r$  )r(  r+  
trace_linkr7   r7   r8   5extract_trace_link_from_bench_one_batch_server_outputX  s
   
r  model_stringc                 C   s   dd |  dD S )Nc                 S   s   g | ]
}|  r|  qS r7   r   )r{   r   r7   r7   r8   r~   a  r   z parse_models.<locals>.<listcomp>r   )r  )r  r7   r7   r8   parse_models`  s   r  c                 C   s&  g }|durd}|d7 }nd}|d7 }dd | D }t | D ]\}}	|dur-||nd}
||v r;||d	d
 nd}||v r|du r|| \}}}||	koS||
k}|rXdnd}|s||	k rt|d| d| d|dd|	dd	 ||
kr|d| d| d|dd|
dd	 |durd| d| d| d|	 d| d|
 d}nFd| d| d| d|	 d	}n6d}|r|nd}|d|  |durd| d| d|	 d|
 d| d}nd| d| d|	 d| d	}||7 }q t| t rtd| d|  |rtd td|dS ) zm
    results: list of tuple of (model_path, accuracy, latency) or (model_path, accuracy, latency, error)
    NzU | model | status | score | score_threshold | latency | latency_threshold | error | 
zT| ----- | ------ | ----- | --------------- | ------- | ----------------- | ----- | 
z7 | model | status | score | score_threshold | error | 
z6| ----- | ------ | ----- | --------------- | ----- | 
c                 S   s8   i | ]}|d  |d |d t |dkr|d ndfqS )r   r&   r'   r  r   Nrx   )r{   ri   r7   r7   r8   
<dictcomp>v  s    ,z1check_evaluation_test_results.<locals>.<dictcomp>g    eA)NNNr'   u   ✅u   ❌z
Score Check Failed: z
Model z score (z.4fz) is below threshold (r/  z
Latency Check Failed: z
 latency (z) is above threshold (z| z | z | - |
zModel not evaluatedz(Model failed to launch or be evaluated: z	 | N/A | z |
z## r/  z"Some models failed the evaluation.)	sortedr   r*   r   r   r=   r}  r  rM   )results	test_namemodel_accuracy_thresholdsmodel_latency_thresholdsmodel_countfailed_modelssummaryresults_dictr   accuracy_thresholdlatency_thresholdr)  r  r  r   
is_successstatus_emojirO  error_displayr7   r7   r8   check_evaluation_test_resultsd  sx   


	, &
r  r   default_valc                 C   s"   t j| |}dd |dD S )Nc                 S   s   g | ]}|rt |qS r7   )r   rz   r7   r7   r8   r~     r  z'_parse_int_list_env.<locals>.<listcomp>r   )rJ   ro  r*   r  )r   r  valr7   r7   r8   _parse_int_list_env  s   r  c                 C   s>   g }t | D ]\}}}|D ]}|dr||  qq|S )Nz.trace.json.gz)rJ   walkr  r   )rL   r  r   dirsfilesfiler7   r7   r8   find_traces_under_path  s   
r   ry  c                 C   s   t   | ||d d}d|v r|df|d< g }|dkrNtjdrNztdd}t	|}W d    n1 s;w   Y  W n tj
yM   g }Y nw t|trY|| n|g}tdd}tj||dd	 W d    d S 1 suw   Y  d S )
Nrj  )	timestampr   rk  rj  r  ry  zresults.jsonrrS  r'   )indent)r   now	isoformatr*   rJ   rL   ru  r,   r^   r.   JSONDecodeErrorr   r  r   dump)r   rk  moder  existing_resultsr|  r7   r7   r8   write_results_to_json  s.   

"r
  c                    s    fdd}|S )Nc                    s   t   fdd}|S )Nc                    s   g d}| pg  }| }t ||\}}}td| td| td| td| t rAd urC| | d S d S d S )N)z--attention-backend	intel_amxz--disable-radixr  zmodel=zprefill_latency=zdecode_throughput=zdecode_latency=)r-  r   r=   assertGreater)r~  common_args	full_argsr   r%  r&  r'  )r  min_throughput	test_funcr7   r8   wrapper  s   
z7intel_amx_benchmark.<locals>.decorator.<locals>.wrapper)r   )r  r  r  r  )r  r8   	decorator  s   z&intel_amx_benchmark.<locals>.decoratorr7   )r  r  r  r7   r  r8   intel_amx_benchmark  s   r  c                  C   s    t  dkr	d} | S tj } | S )Nr   r   )r   r  acceleratordevice_count)	gpu_countr7   r7   r8   get_gpu_count  s
   

r  c                   C   sh   t tdrt tjdrtj S t tdr!tj r!tj  dS t tdr2tj r2tj  dS dS )z
    Unified empty_cache for PyTorch 2.8 (no torch.accelerator)
    and PyTorch 2.9+ (where torch.accelerator.empty_cache() exists).
    r  empty_cacher   Nxpu)hasattrr  r  r  r   is_availabler  r7   r7   r7   r8   empty_gpu_cache	  s   


r  c                   C   s,   t  r
tj d S t rtj d S dS )Ni   @r   )r   r  r   device_memory_usedr   r  memory_allocatedr7   r7   r7   r8   get_gpu_memory_gb	  s
   r   r   .c                 C   s   t | }t|j}t }tjdd}|j| | j|d}t	|dks+J d| j |D ]}|
|}|jdksBJ d|j dq-d S )	NT)verbose)globsr&   zNo tests found for r   zTest z failed)r   r   r  r   doctestDocTestFinderDocTestRunnerfindr  ry   r  failedr   )r   modglobalsfinderrunnerteststestr  r7   r7   r8   run_doctests"	  s   


r.  metric_namer  r  c              
   C   s  zt  \}}t|dr| }t|trt|}n!t|tttfr$|}nzt|}W n tt	fy9   t|}Y nw ||| |t

 d}d}|rmztj|dd ||d< |}W n t	tfyl   t|}||d< |}Y nw td}	|	rz/|	 dt  d	}
t|
d
dd}|tj|ddd  W d   n1 sw   Y  W n ty } ztd|
 d|  W Y d}~nd}~ww |rt|trd| d}ndtj|dd }nd}td|  d| |  W dS  ty } ztjd|  d| dd W Y d}~dS d}~ww )a  
    Output test metric to JSONL and stdout for CI performance tracking.

    Schema (v1):
      - Required: filename, test_case, metric_name, value
      - Optional fields supported: ts, labels
        - ts is emitted by default for convenience
        - labels preferred as dict; if not JSON-serializable, stored as string

    Value types (v1 contract):
      - Supported: int, float, str
      - Input may be bool (will be coerced to int: True=1, False=0)
      - Others: best-effort conversion to float, fallback to str

    Output channels:
      - JSONL: ${SGLANG_TEST_METRICS_OUTPUT}.${pid}.jsonl (if env var set)
      - stdout: [METRIC] metric_name=value [labels=...]

    This function never fails tests - all errors are silently caught.

    Args:
        metric_name: Metric name (e.g., "gsm8k_accuracy", "cache_hit_rate")
        value: Metric value
        labels: Optional label dict (e.g., {"backend": "fa3"})
    item)rK  	test_caser/  r  tsNF)ensure_asciir  SGLANG_TEST_METRICS_OUTPUTr   z.jsonlry  zutf-8)encodingr/  z,sglang.test.dump_metric: failed to write to r  z	 labels=''z labels=rI   z	[METRIC] =z0sglang.test.dump_metric: failed to dump metric 'z': T)exc_info)_get_test_contextr  r0  r   r  r   r#  r   r   	TypeErrorr1   r^   r  rJ   rK   getpidr,   rJ  r/   rz  r{  r   )r/  r  r  rK  r1  converted_valuerecordlabels_for_outputstringified	base_path
jsonl_pathr|  r6   
labels_strr7   r7   r8   dump_metric.	  st   



	

rC  c                  C   s   t d} | r*| dd dd}t|dkr*t|d }|d dd}||fS ddl}| }|ri|jri|jjri|jj}t|j	j
}|jd	}|rat|d
ra|jj d|j	j }||fS |j	j}||fS dS )zy
    Get current test's filename and test_case.

    Tries PYTEST_CURRENT_TEST first, falls back to inspect.stack().
    PYTEST_CURRENT_TESTr  r   z::r&   r'   r   Nr~  r  )z
unknown.pyunknown_test)rJ   rK   r  ry   _repo_relative_pathreplacer   currentframef_backf_codeco_filenamef_localsr*   r  r  r  co_name)pytest_currentr	  rK  r1  r   framecaller	test_selfr7   r7   r8   r9  	  s&   
r9  filepathc                 C   s   z=t |  }td}|r%zt|t | W W S  ty$   Y nw zt|t  W W S  ty=   |j Y W S w  t	yK   t | j Y S w )zCConvert absolute path to repo-relative, preferring GITHUB_WORKSPACEGITHUB_WORKSPACE)
r   resolverJ   rK   r   relative_tor   cwdr   r/   )rR  abs_path	workspacer7   r7   r8   rF  	  s"   
rF  )r   )NN)Nr&   N)NNr&   N)Nr&   NNrw   )F)NNNNr   FN)Nr7   N)rp   rI   Nr   r  NFFFr   r   NN)rm   r  NFr   )rm   r&   r  NFr   )FF)FFFr  )r'   r  )ry  )__doc__argparser  rB  r#  r   r^   rz  rJ   rp   r!  rr  r  rT  r  r1   unittestconcurrent.futuresr   r   	functoolsr   r   ior   pathlibr   typesr   r	   typingr
   r   r   r   r   r   r  numpyr   r)   r  torch.nn.functionalnn
functionalr  PILr   sglang.bench_servingr   sglang.global_configr   sglang.srt.environr   r1  r   r   r   r   r   r   r   sglang.test.run_evalr   rp  r   rW  !DEFAULT_SMALL_MODEL_NAME_FOR_TEST&DEFAULT_SMALL_MODEL_NAME_FOR_TEST_BASE'DEFAULT_SMALL_MODEL_NAME_FOR_TEST_SCOREDEFAULT_MOE_MODEL_NAME_FOR_TEST*DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_BASE*DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_CHAT+DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST/DEFAULT_SMALL_CROSS_ENCODER_MODEL_NAME_FOR_TESTDEFAULT_MLA_MODEL_NAME_FOR_TEST#DEFAULT_MLA_FP8_MODEL_NAME_FOR_TESTDEFAULT_MODEL_NAME_FOR_TEST_MLA%DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN(DEFAULT_HYBRID_MAMBA_MODEL_NAME_FOR_TEST!DEFAULT_MODEL_NAME_FOR_TEST_VL_PP&DEFAULT_MODEL_NAME_FOR_TEST_GLM_41V_PP%DEFAULT_DEEPSEEK_NVFP4_MODEL_FOR_TEST%DEFAULT_MODEL_NAME_FOR_TEST_MOE_NVFP4DEFAULT_MODEL_NAME_FOR_TEST_FP8(DEFAULT_MODEL_NAME_FOR_ACCURACY_TEST_FP86DEFAULT_MODEL_NAME_FOR_DYNAMIC_QUANT_ACCURACY_TEST_FP87DEFAULT_MODEL_NAME_FOR_MODELOPT_QUANT_ACCURACY_TEST_FP8$DEFAULT_MODEL_NAME_FOR_TEST_QWEN_FP8(DEFAULT_MODEL_NAME_FOR_TEST_FP8_WITH_MOE*DEFAULT_MODEL_NAME_FOR_TEST_MXFP4_WITH_MOE DEFAULT_MODEL_NAME_FOR_TEST_W8A8)DEFAULT_MODEL_NAME_FOR_TEST_W8A8_WITH_MOE$DEFAULT_MODEL_NAME_FOR_TEST_AWQ_INT4DEFAULT_TARGET_MODEL_EAGLEDEFAULT_DRAFT_MODEL_EAGLEDEFAULT_TARGET_MODEL_EAGLE3DEFAULT_DRAFT_MODEL_EAGLE3"DEFAULT_TARGET_MODEL_EAGLE_DP_ATTN!DEFAULT_DRAFT_MODEL_EAGLE_DP_ATTNDEFAULT_TARGET_MODEL_STANDALONEDEFAULT_DRAFT_MODEL_STANDALONEDEFAULT_TARGET_MODEL_NGRAM%DEFAULT_AUTOROUND_MODEL_NAME_FOR_TEST+DEFAULT_MODEL_NAME_FOR_TEST_LOCAL_ATTENTION%DEFAULT_REASONING_MODEL_NAME_FOR_TEST"DEFAULT_DEEPEP_MODEL_NAME_FOR_TEST(DEFAULT_DEEPEP_MODEL_NAME_FOR_TEST_NEXTN#DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST+DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST&DEFAULT_DEEPSEEK_W4AFP8_MODEL_FOR_TEST1DEFAULT_ENABLE_ROUTED_EXPERTS_MODEL_NAME_FOR_TEST'DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1'DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2+DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1+DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2-DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1&DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN%DEFAULT_SMALL_VLM_MODEL_NAME_FOR_TESTDEFAULT_IMAGE_URLDEFAULT_VIDEO_URLr  r   r   r9   r=   r@   rC   rF   rR   ro  r*    DEFAULT_PORT_FOR_SRT_TEST_RUNNERr  rk   rs   r   r   r   r   r   r   r   ArgumentParserr   r   r   	Namespacer   r   r   r   r   r   r  r  r  r   r  r>  rE  tupler  rW  r#  rd  r~  r  r  r  r  r  r  r
  r  r  r-  r1  r6  r>  rI  rY  rX  rQ  rc  rd  rn  rw  r}  TestCaser  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r
  r  r  r  r   r.  rC  r9  rF  r7   r7   r7   r8   <module>   s    $		%

}
7
4
	

  
)
L
Q
Kid"@,
K!'	K


#
O	"	(d&