o
    پi                     @   s"  d Z ddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlmZmZmZ dZdd Zd	d
 Zdd Zdd Zedkre Zejdedd ejded ejdedd ejdedd ejdedd ejdedd ejdedd ejded d e Zee dS dS )!zc
Run few-shot GSM-8K evaluation.

Usage:
python3 -m sglang.test.few_shot_gsm8k --num-questions 200
    N)set_default_backend)RuntimeEndpoint)download_and_cache_filedump_state_text
read_jsonliigc                 C   s0   d| | d  d }|r|d| | d  7 }|S )Nz
Question: questionz
Answer: answer )linesiinclude_answerretr
   r
   N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/few_shot_gsm8k.pyget_one_example   s   r   c                 C   s*   d}t |D ]}|t| |dd 7 }q|S )N Tz

)ranger   )r   kr   r   r
   r
   r   get_few_shot_examples   s   r   c                 C   sP   |  dd} td| }t|dk rtS zt|d W S  ty'   t Y S w )N,r   z\d+   )replacerefindalllenINVALIDastliteral_evalSyntaxError)
answer_strnumbersr
   r
   r   get_answer_value$   s   r"   c              
      s  t t j d j   jd u rd}t|}n j}tt|} j} j	}t
||g }g }tt|d | D ]}|t||d |t|| d  q;tdd |D s]J dd |D }	d	d lj fd
d}
t }|
j|	t dr jnd	 jdt dd t dd d}t | }g }tt|D ]}|t|| d  qtt|t|k}tt|tk}tdd |D }|| }td|d td|d td|dd td|dd t d| ||||dS )N:zchttps://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonlFr	   c                 s   s    | ]}|t kV  qd S )N)r   ).0lr
   r
   r   	<genexpr>F   s    zrun_eval.<locals>.<genexpr>c                 S   s   g | ]}d |iqS )r   r
   )r$   qr
   r
   r   
<listcomp>G   s    zrun_eval.<locals>.<listcomp>r   c                    s*   | | 7 } | j d jg dd7 } d S )Nr	   )Questionz
Assistant:z<|separator|>)
max_tokensstop)genmax_new_tokens)sr   argsfew_shot_examplessglr
   r   few_shot_gsm8kO   s   z run_eval.<locals>.few_shot_gsm8ktemperatureTreturn_logproblogprob_start_len)r4   num_threadsprogress_barr5   r6   c                 s   s    | ]
}| d d V  qdS )r	   completion_tokensN)get_meta_info)r$   r.   r
   r
   r   r&   t   s    
z
Accuracy: z.3fz	Invalid: z	Latency: z szOutput throughput: z token/sztmp_output_gsm8k.txt)accuracyinvalidlatencyoutput_throughput)!r   r   hostport	data_pathr   listr   num_questions	num_shotsr   r   r   appendr   r"   allsglangfunctiontimeperf_counter	run_batchhasattrr4   parallelgetattrnpmeanarrayr   sumprintr   )r0   urlfilenamer   rC   rD   	questionslabelsr   	argumentsr3   ticstatesr=   predsaccr<   num_output_tokensr>   r
   r/   r   run_eval/   s`   





r^   __main__z--num-shots   )typedefaultz--data-path)ra   z--num-questions   z--max-new-tokensi   z
--parallel   z--hostzhttp://127.0.0.1z--porti0u  z--temperatureg        )__doc__argparser   r   rI   numpyrO   sglang.lang.apir   $sglang.lang.backend.runtime_endpointr   sglang.utilsr   r   r   r   r   r   r"   r^   __name__ArgumentParserparseradd_argumentintstrfloat
parse_argsr0   r
   r
   r
   r   <module>   s6    [