o
    پi                     @   sX   d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 G dd de	ZdS )    N)kill_process_tree)!DEFAULT_TIMEOUT_FOR_SERVER_LAUNCHDEFAULT_URL_FOR_TESTCustomTestCasepopen_launch_serverc                
   @   sl   e Zd ZdZdZg dZeZedd Z	ddde
d	e
d
e
dedB fddZ					dddZdd ZdS )TestVLMModels g        )z--trust-remote-codez--cuda-graph-max-bs32z--enable-multimodalz--mem-fraction-staticgffffff?z--log-levelinfoz--attention-backendascendz--disable-cuda-graphz	--tp-size   c                 C   s.   t | _d| _| jtjd< | j dtjd< d S )Nz	sk-123456OPENAI_API_KEYz/v1OPENAI_API_BASE)r   base_urlapi_keyosenviron)cls r   P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/ascend/vlm_utils.py
setUpClass#   s   zTestVLMModels.setUpClassN)envmodel_versionoutput_pathlimitr   c                C   sz   d}d}d}d}d}	t j|dd d| d| }
d	d
dd|d|
d|dt|dd|	dt|d|ddg}tj|ddd dS )u   
        Evaluate a VLM on the MMMU validation set with lmms‑eval.
        Only `model_version` (checkpoint) and `chat_template` vary;
        We are focusing only on the validation set due to resource constraints.
        openai_compatible   mmmu_val   T)exist_okzmodel_version="z",tp=python3z-m	lmms_evalz--modelz--model_argsz--tasksz--batch_sizez--log_samplesz--log_samples_suffixz--output_pathz--limitz--configzB/__w/sglang/sglang/test/registered/ascend/vlm_models/mmmu-val.yamli  )checktimeoutN)r   makedirsstr
subprocessrun)selfr   r   r   r   modeltptasks
batch_size
log_suffix
model_argscmdr   r   r   run_mmmu_eval-   sB   
zTestVLMModels.run_mmmu_eval./logsF50c                 C   s  t d| j |  d}d}zĐztj }|r|| d}	d}
|r.tdd}	tdd}
t| j| j| j	| j
| j||r@|	|
fndd}| | j|| t| dd	 }t|d
}t|}t d| d|  W d   n1 suw   Y  |d d d }t d| j d| d|d |r|r|  }| || jd| j d|dd| jdd|  |W W |dur| du rt d|j  zt|j W n ty } zt d|  W Y d}~nd}~ww |r&|	r|	  |
r|
  dD ].}ztj|rt| W q ty% } zt d| d|  W Y d}~qd}~ww S S  tyU } z!t d| j | d|  | d| j | d|  W Y d}~nd}~ww W |dur| du rt d|j  zt|j W n ty } zt d|  W Y d}~nd}~ww |r|	r|	  |
r|
  dD ]1}ztj|rt| W q ty } zt d| d|  W Y d}~qd}~ww dS dS |dur
| du r
t d|j  zt|j W n ty	 } zt d|  W Y d}~nd}~ww |rM|	r|	  |
r|
  dD ]0}ztj|r,t| W q tyL } zt d| d|  W Y d}~qd}~ww w w )aT  
        Common method to run VLM MMMU benchmark test.
        Args:
            model: Model to test
            output_path: Path for output logs
            test_name: Optional test name for logging
            custom_env: Optional custom environment variables
            capture_output: Whether to capture server stdout/stderr
        z
Testing model: Nr   /tmp/server_stdout.logw/tmp/server_stderr.log)r   r#   r   
other_argsr   return_stdout_stderrz/*.jsonr   rResultz
: resultsr   zmmmu_acc,nonezModel z achieved accuracyz: z.4fz accuracy (z) below expected threshold ()zCleaning up process zError killing process: )r3   r5   zError removing zError testing zTest failed for )printr)   r   r   copyupdateopenr   r   timeout_for_server_launchr   r6   r0   globjsonload_read_output_from_filesassertGreaterEqualmmmu_accuracypollpidr   	Exceptionclosepathexistsremovefail)r(   r   	test_name
custom_envcapture_outputr   processserver_outputprocess_envstdout_filestderr_fileresult_file_pathfresultrF   efilenamer   r   r   _run_vlm_mmmu_testc   s   




"
 (
"
"z TestVLMModels._run_vlm_mmmu_testc                 C   s   g }ddg}|D ]P\}}z.t j|r9t|d}|D ]}|| d|   qW d    n1 s4w   Y  W q tyX } ztd|  d|  W Y d }~qd }~ww d	|S )N)r3   z[STDOUT])r5   z[STDERR]r8    zError reading z file: 
)
r   rK   rL   r?   appendrstriprI   r<   lowerjoin)r(   output_lines	log_filesr[   tagrX   linerZ   r   r   r   rD      s$   $
z%TestVLMModels._read_output_from_files)r1   r   NFr2   )__name__
__module____qualname__r)   rF   r6   r   r@   classmethodr   r%   dictr0   r\   rD   r   r   r   r   r      s0    

8
fr   )rA   rB   r   r&   sglang.srt.utilsr   sglang.test.test_utilsr   r   r   r   r   r   r   r   r   <module>   s    