o
    }oiX                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ dd Zdd	 Zed
kr}e ZeeZed e
ddZerned eejdZeedZeejdZeejedZeeed ed ned e  e  dS dS )    N)evaluate)ApiEndpointConfigParamsEvaluationConfigEvaluationTarget)wait_for_server_ready)loggingc                  C   sd   t jdd} | jdtdd | jdtdd | jdtd	d | jd
tdd | jdtdd |  S )NzHTest evaluation with lm-eval-harness on nemo2 model deployed on PyTriton)description--nemo2_ckpt_pathzNeMo 2.0 ckpt path)typehelp--max_batch_sizez#Max BS for the model for deployment--trtllm_dirzjFolder for the trt-llm conversion, trt-llm engine gets saved                         in this specified dirz--eval_typez0Evaluation benchmark to run from lm-eval-harnessz--limitz*Limit evaluation to `limit` num of samples)argparseArgumentParseradd_argumentstrint
parse_args)parser r   [/home/ubuntu/.local/lib/python3.10/site-packages/tests/evaluation/test_evaluation_legacy.pyget_args   s   r   c              
   C   s$   t ddd| jdt| jd| jgS )Npythonz!tests/evaluation/deploy_script.pyr
   r   r   )
subprocessPopennemo2_ckpt_pathr   max_batch_size
trtllm_dir)argsr   r   r   
run_deploy*   s   r    __main__zWaiting for server readiness...   )max_retrieszStarting evaluation...)nemo_checkpoint_path)api_endpoint)limit_samples)r   params)
target_cfgeval_cfgzEvaluation completed.zServer is not ready.)r   r   nemo.collections.llmr   #nemo.collections.llm.evaluation.apir   r   r   r   $nemo.collections.llm.evaluation.baser   
nemo.utilsr   r   r    __name__r   deploy_procinfoserver_readyr   r%   eval_targetlimiteval_params	eval_typeeval_configerror	terminatewaitr   r   r   r   <module>   s2   




