o
    }oiS                     @   sN  d dl Z d dlZd dlZd dlZd dlZd dlZd dlm  m	Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ G d
d deZ				d(dedededefddZ					d)dededededef
ddZdd Z de!ee"e f fdd Z#d!edefd"d#Z$d$ed%efd&d'Z%dS )*    N)Instance)LM)tqdm)AutoTokenizer)SentencePieceTokenizer)EvaluationConfigEvaluationTarget)NemoQueryLLM)loggingc                       sr   e Zd ZdZ fddZddedefddZd	d
 Zdee	 fddZ
dee	 fddZdee	 fddZ  ZS )NeMoFWLMEvala]  
    NeMoFWLMEval is a wrapper class subclassing lm_eval.api.model.LM class, that defines how lm_eval interfaces with
    NeMo model deployed on PyTriton server.
    Created based on: https://github.com/EleutherAI/lm-evaluation-harness/blob/v0.4.4/docs/model_guide.md
    This class is deprecated and not used for evaluation with nvidia-lm-eval
    c
           
         sT   t jdtdd || _|| _|| _|| _|| _|| _|| _	|| _
|	| _t   d S )NzNeMoFWLMEval is deprecated and will be removed in 25.06. Please refer to https://github.com/NVIDIA/NeMo/blob/main/docs/source/evaluation/evaluation_doc.rst and update your code accordingly   )
stacklevel)warningswarnDeprecationWarning
model_nameapi_url	tokenizer
batch_sizemax_tokens_to_generatetemperaturetop_ptop_kadd_bossuper__init__)
selfr   r   r   r   r   r   r   r   r   	__class__ X/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/evaluation/base.pyr   ,   s   zNeMoFWLMEval.__init__Fsingle_prediction_tokenreturn_logitsc              
   C   s   t | j|d d}d}d}|r|rd}nd}|jt|d tr#|d n|d g|d |d |d |d	 ||dd
}|rM|rE|d d d S |d d d S t|d d d S )z
        A private method that sends post request to the model on PyTriton server and returns either generated text or
        logits.
        model)urlr   FTprompt
max_tokensr   r   r   )promptsmax_output_lenr   r   r   output_context_logitsoutput_generation_logitsopenai_format_responsechoicesr   context_logitsgeneration_logitstext)r	   r   	query_llm
isinstanceliststr)r   payloadr!   r"   nqr)   r*   responser   r   r    _generate_tokens_logitsA   s,   z$NeMoFWLMEval._generate_tokens_logitsc                 C   s$   t |trdS t |trdS td)z4
        Returns the type of the tokenizer.
        r   r   zTokenizer type is not one of SentencePieceTokenizer or HF's AutoTokenizer. Please check how to handle special tokens for this tokenizer)r1   r   r   
ValueError)r   r   r   r   r    tokenizer_typed   s   

zNeMoFWLMEval.tokenizer_typerequestsc               	   C   s  i }|  | j}|dkr| j|d< n	|dkr| j|d< d}d}t||d jr*d}d	| _g }ttdt	|| j
D ]	}|||| j
  }g }	g }
g }g }g }|D ]f}|jd }|jd	 }| jjj|fi |}| jjj|fi |}|  | jdkr|d	d
 }|d	d
 }t	|}t	|}||| jj|d d }|	| |
| || || || qR| j|	| j| j| j| jd}| j||dd}t|D ]p\}}|| }|| }|| }|s|d
d
tj|dkddd
d
f }|d
d
| d
d
d
f }tjt|dd}tj|tjdd}|jdd}||k }t |d|d!d}t"|# t$|f}|| qq:|S )a9  
        Defines the loglikelihood request. Takes input requests of type list[Instance] where Instance is a dataclass
        defined in lm_eval.api.instance. Each Instance conists of the input prompt, output prompt, request type(here
        loglikelihood) and other relevant args like few shot samples.
        r   r   r   add_special_tokensFz^mmlu_r   T   N r#   r%   r&   r   r   r   )r"   )r   r   )axis)dim)dtyper   )%r9   r   r   rematch	task_namer   r   rangelenr   	argumentsencodereplacedecodeappendr   r   r   r   r7   	enumeratenpanyFlog_softmaxtorchtensorlong	unsqueezeargmaxallgathersqueezefloatsumbool) r   r:   special_tokens_kwargsr9   r!   mmlu_regex_patternresultsibatchr'   continuationscontinuation_encsnum_ctx_tokens_listnum_cont_tokens_listrequestcontextcontinuationcontext_enccontinuation_encnum_ctx_tokensnum_cont_tokensr%   r4   logits_batchjlogitslogProbs	cont_toksgreedy_tokens	is_greedylogProbs_actualresultr   r   r    loglikelihoodr   sv   







$zNeMoFWLMEval.loglikelihoodc                 C   s   dS )zX
        Defines the loglikelihood_rolling request type. Yet to be implemented.
        Nr   )r   r:   r   r   r    loglikelihood_rolling   s   z"NeMoFWLMEval.loglikelihood_rollinginputsc                 C   sP   g }t |D ]}|jd }| j|| j| j| j| jd}| |}|| q|S )a?  
        Defines the generate_until request type. Takes input requests of type list[Instance] where Instance is a
        dataclass defined in lm_eval.api.instance. Each Instance conists of the input prompt, output prompt, request
        type(here loglikelihood) and other relevant args like few shot samples.
        r   r?   )	r   rH   r   r   r   r   r   r7   rL   )r   rx   r_   instancer%   r4   generated_textr   r   r    generate_until   s   

	zNeMoFWLMEval.generate_until)FF)__name__
__module____qualname____doc__r   r\   r7   r9   r2   r   rv   rw   r{   __classcell__r   r   r   r    r   $   s    #hr   http://0.0.0.0:8080triton_modelX  r   base_urlr   max_retriesretry_intervalc           
   	   C   s$  ddl }ddl}|  d}|  d}t|D ]m}td zL||}	|	jdkr8td|	j  || W qtd |j||d	d
dd}	|	jdkr]td|	j  || W qtd| d W  dS  |j	j
y}   td| d Y nw || qtd| d| d dS )a  
    Wait for FastAPI server and model to be ready.

    Args:
        base_url (str): The URL to the FastAPI server (e.g., "http://0.0.0.0:8080").
        model_name (str): The name of the deployed model.
        max_retries (int): Maximum number of retries before giving up.
        retry_interval (int): Time in seconds to wait between retries.

    Returns:
        bool: True if both the server and model are ready within the retries, False otherwise.
    r   Nz/v1/completions/z/v1/triton_health&Checking server and model readiness...   'Server is not ready. HTTP status code: Server is ready.hellor<   )r#   r%   r&   )jsonz&Model is not ready. HTTP status code: Model '' is ready.T+Pytriton server not ready yet. Retrying in  seconds...Server or model '' not ready after 
 attempts.F)timer:   rF   r
   infogetstatus_codesleeppost
exceptionsRequestExceptionerror)
r   r   r   r   r   r:   completions_url
health_url_r6   r   r   r    wait_for_fastapi_server   s4   








r   http://0.0.0.0:8000@  r$   triton_http_portc              	   C   s  ddl }ddl}ddlm} ddlm}m}	 | dr8d}
t	|
| }|
d}| ddd	| d	| } |  d
}t|D ]}td zE||}|jdkrctd|j  || W qAtd || ||d td| d 	 W d   W  dS 1 sw   Y  W n6 |	y   td| d Y n% |y   td| d Y n |jjy   td| d Y nw || qAtd| d| d dS )a  
    Wait for PyTriton server and model to be ready.

    Args:
        url (str): The URL of the Triton server (e.g., "grpc://0.0.0.0:8001").
        triton_http_port (int): http port of the triton server.
        model_name (str): The name of the deployed model.
        max_retries (int): Maximum number of retries before giving up.
        retry_interval (int): Time in seconds to wait between retries.

    Returns:
        bool: True if both the server and model are ready within the retries, False otherwise.
    r   N)ModelClient)#PyTritonClientModelUnavailableErrorPyTritonClientTimeoutErrorzgrpc://z:(\d+)r<   zhttp://:z/v2/health/readyr   r   r   r   )r   init_timeout_sr   r   TzTimeout: Server or model 'z' not ready yet.z' is unavailable on the server.r   r   r   r   r   F)r   r:   pytriton.clientr   pytriton.client.exceptionsr   r   
startswithrC   searchgrouprJ   rF   r
   r   r   r   r   r   r   r   )r$   r   r   r   r   r   r:   r   r   r   patternrD   	grpc_portr   r   r6   r   r   r    wait_for_server_ready2  sB   

 





(r   c                 C   s   t | j| jd S )N.)pkgutiliter_modules__path__r|   )ns_pkgr   r   r    _iter_namespacev  s   r   returnc                  C   s   zddl } W n ty   tdw dd t| D }i }| D ] \}}|ds2td| d| ^}}}t| ||< q!|S )	a  
    Finds all pre-defined evaluation configs across all installed evaluation frameworks.

    Returns:
        dict[str, list[str]]: Dictionary of available evaluations, where key is evaluation
            framework and value is list of available tasks.
    r   Nz[Please ensure that core_evals is installed in your env as it is required to run evaluationsc                 S   s"   i | ]\}}}|t jd |dqS )z.input)package)	importlibimport_module).0findernameispkgr   r   r    
<dictcomp>  s    z.list_available_evaluations.<locals>.<dictcomp>core_evals.z
Framework z! is not a submodule of core_evals)	
core_evalsImportErrorr   itemsr   RuntimeErrorget_available_evaluationsr2   keys)r   discovered_modulesevalsframework_nameinput_moduler   task_name_mappingr   r   r    list_available_evaluations~  s"   

r   	eval_taskc                    sp   t  } fdd| D }t|dkrtd  dt|dkr4dd |D }td  d	| d
|d S )z
    Find framework for executing the evaluation eval_task.

    This function serches for framework (module) that defines a task with given name and returns the framework name.
    c                    s   g | ]
\}} |v r|qS r   r   )r   ftasksr   r   r    
<listcomp>  s    z"find_framework.<locals>.<listcomp>r   zFramework for task z not found!r<   c                 S   s$   g | ]}|t d d ddqS )r   Nr   -)rG   rJ   )r   r   r   r   r    r     s   $ z#Multiple frameworks found for task z: zL. Please indicate which version should be used by passing <framework>.<task>)r   r   rG   r8   )r   r   
frameworksframeworks_namesr   r   r    find_framework  s   r   
target_cfgeval_cfgc                 C   s   | j jdu r
tdzddlm} W n ty   tdw ddlm} | j }|j|jd dd	}t	|j
|j|jd
}|sAtd|j}t|j|j
||j|j|j|j|j|jd	}|j}	|j||	|j|j|jd}
td|
d |	   dS )a  
    Evaluates nemo model deployed on PyTriton server (via trtllm) using lm-evaluation-harness
    (https://github.com/EleutherAI/lm-evaluation-harness/tree/main).

    Args:
        target_cfg (EvaluationTarget): target of the evaluation. Providing nemo_checkpoint_path, model_id, and
            url in EvaluationTarget.api_endpoint is required to run evaluations.
        eval_cfg (EvaluationConfig): configuration for evaluations
    Nz7Please provide nemo_checkpoint_path in your target_cfg.r   )	evaluatorzfPlease ensure that lm-evaluation-harness is installed in your env as it is required to run evaluations)ioz/contextzmodel.tokenizer)subpath)r$   r   r   zServer not ready for evaluation)	r   r   r   r   r   r   r   r   r   )r#   r   limitnum_fewshotbootstrap_iterszscore: r_   )api_endpointnemo_checkpoint_pathr8   lm_evalr   r   nemo.lightningr   load_contextr   r$   nemo_triton_http_portmodel_idr   paramsr   r   max_new_tokensr   r   r   r   typesimple_evaluatelimit_samplesr   r   r
   r   )r   r   r   r   endpointr   server_readyr   r#   r   r_   r   r   r    _legacy_evaluate  sL   r   )r   r   r   r   )r   r   r   r   r   )&r   r   rC   r   numpyrN   rR   torch.nn.functionalnn
functionalrP   lm_eval.api.instancer   lm_eval.api.modelr   r   =nemo.collections.common.tokenizers.huggingface.auto_tokenizerr   :nemo.collections.common.tokenizers.sentencepiece_tokenizerr   #nemo.collections.llm.evaluation.apir   r   nemo.deploy.nlpr	   
nemo.utilsr
   r   r3   intr   r   r   dictr2   r   r   r   r   r   r   r    <module>   sl    Y
8
D