o
    5ti4;                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
mZ d dlmZmZmZ d dlZd dlmZ er@d dlmZ eeZg dZed	d
G dd dZdS )    N)	Namespace)asdict	dataclassfieldfields)Path)TYPE_CHECKINGAnycast)simple_parse_args_stringTaskManager)
wandb_argswandb_config_argshf_hub_log_argsmetadata
model_args
gen_kwargsT)slotsc                   @   s  e Zd ZU dZedddidZedB ed< edddidZeed	< ee	dd
idZ
e	ed< eeddidZeee B ed< edddidZedB ed< edddidZeed< edddidZedB ed< edddidZedB ed< edddidZedB ed< edddidZee	B dB ed< edddidZedB ed< ee	ddidZe	ed < ed!dd"idZeed#< ed!dd$idZeed%< ed!dd&idZeed'< eddd(idZedB ed)< ed!dd*idZeed+< eddd,idZedB ed-< ed!dd.idZeeB ed/< eddd0idZedB ed1< ed!dd2idZ eed3< eddd4idZ!edB ed5< ee	dd6idZ"e	ed7< eddd8idZ#edB ed9< ee	dd:idZ$e	ed;< ee	dd<idZ%e	ed=< ee	dd>idZ&e	ed?< ed@dA ddBidZ'eedC< ed!ddDidZ(eedE< ed!ddFidZ)eedG< ee	ddHidZ*e	edI< e+dJe,dKd fdLdMZ-e+dNee.B dKd fdOdPZ/e0dNee.B dKe	ee1f fdQdRZ2dSdT Z3dUdV Z4dWdX Z5dYdZ Z6d`dIe	dB dKd[fd\d]Z7d^d_ Z8dS )aEvaluatorConfiga  Configuration for language model evaluation runs.

    This dataclass contains all parameters for configuring model evaluations via
    `simple_evaluate()` or the CLI. It supports initialization from:
    - CLI arguments (via `from_cli()`)
    - YAML configuration files (via `from_config()`)
    - Direct instantiation with keyword arguments

    The configuration handles argument parsing, validation, and preprocessing
    to ensure properly structured and validated.

    Example:
        # From CLI arguments
        config = EvaluatorConfig.from_cli(args)

        # From YAML file
        config = EvaluatorConfig.from_config("eval_config.yaml")

        # Direct instantiation
        config = EvaluatorConfig(
            model="hf",
            model_args={"pretrained": "gpt2"},
            tasks=["hellaswag", "arc_easy"],
            num_fewshot=5
        )

      See individual field documentation for detailed parameter descriptions.
    NhelpzPath to YAML config file)defaultr   confighfzName of model e.g. 'hf'modelz"Arguments for model initialization)default_factoryr   r   z.Comma-separated list of task names to evaluatetasksz&Number of examples in few-shot contextnum_fewshot   zBatch size for evaluation
batch_sizez$Maximum batch size for auto batchingmax_batch_sizezcuda:0z&Device to use (e.g. cuda, cuda:0, cpu)devicez!Limit number of examples per tasklimitz7dict, JSON string or path to JSON file with doc indicessamplesz0Path to sqlite db file for caching model outputs	use_cachez+Cache dataset requests: true/refresh/deletecache_requestsFzRun test suite for taskscheck_integrityz%Print prompts for first few documents	write_outzSave model outputs and inputslog_samplesz+Dir path where result metrics will be savedoutput_pathzFOnly save model outputs, don't evaluate metrics. Use with log_samples.predict_onlyz Custom System instruction to addsystem_instructionz[Apply chat template to prompt. Either True, or a string identifying the tokenizer template.apply_chat_templatezYUse fewshot as multi-turn conversation. Defaults to True when apply_chat_template is set.fewshot_as_multiturnz%Show full config at end of evaluationshow_configz&Additional dir path for external tasksinclude_pathz9Arguments for model generation. Will update Task defaultsr   zLogging verbosity level	verbosityzArguments for wandb.initr   z!Arguments for wandb.config.updater   zArguments for HF Hub loggingr   c                   C   s   g dS )N)r     r1   r1    r2   r2   r2   R/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/config/evaluate_config.py<lambda>   s    zEvaluatorConfig.<lambda>z0Seeds for random, numpy, torch, fewshot (random)seedz!Trust remote code for HF datasetstrust_remote_codezZConfirm understanding of unsafe code risks (for code tasks that executes arbitrary Python)confirm_run_unsafe_codez-Additional metadata for tasks that require itr   	namespacereturnc                    s   t |  }t|dd }r|| td| ddh  fddt| D }|| | d
i | }|  |rX|	dd |rNt
d| d	nd tt|  |S )z
        Build an EvaluationConfig by merging with simple precedence:
        CLI args > YAML config > built-in defaults
        r   Nstrcommandfuncc                    s*   i | ]\}}|s|d kr| vr||qS )r   r2   ).0kvexcluded_argsr2   r3   
<dictcomp>   s    z,EvaluatorConfig.from_cli.<locals>.<dictcomp>z	CLI args z will override yamlr2   )r   getattrupdateload_yaml_configr
   varsitems_parse_dict_args
_configurepopeval_loggerinfoprinttextwrapdedent)clsr8   r   used_configcli_argsinstancer2   r@   r3   from_cli   s(   




zEvaluatorConfig.from_cliconfig_pathc                 C   s   |  |}| di | S )zy
        Build an EvaluationConfig from a YAML config file.
        Merges with built-in defaults and validates.
        Nr2   )rE   rI   )rP   rU   yaml_configr2   r2   r3   from_config   s   
zEvaluatorConfig.from_configc              
   C   s   t | }| std|  z	t| }W n0 tjy2 } ztd| d| |d}~w t	t
fyJ } ztd| d| |d}~ww t|ts_td|  dt|j |S )z#Load and validate YAML config file.zConfig file not found: zInvalid YAML in z: NzCould not read config file zYAML root must be a mapping in z, got )r   is_fileFileNotFoundErrorresolveyaml	safe_load	read_text	YAMLError
ValueErrorOSErrorUnicodeDecodeError
isinstancedicttype__name__)rU   _config_path	yaml_dataer2   r2   r3   rE      s"   
z EvaluatorConfig.load_yaml_configc                 C   sH   t | D ]}|jtu r!tt| |jtr!t| |jtt| |j q| S N)	r   rd   rc   rb   rC   namer:   setattrr   )selffr2   r2   r3   rH     s
   z EvaluatorConfig._parse_dict_argsc                 C   s   |      | S )z<Validate configuration and preprocess fields after creation.)_validate_arguments_process_arguments_set_trust_remote_coderl   r2   r2   r3   rI     s   zEvaluatorConfig._configurec                 C   s   | j du r	td| jrtd | jrd| _| js| jr$| js$td| jdu r8| j	r8t
d t| j	| _n| jdu rD| j	sDtd| jrP| jdurPtd| S )	z=Validate configuration arguments and cross-field constraints.Nz!Need to specify task to evaluate.zY--limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.TzBSpecify --output_path if providing --log_samples or --predict_onlyz(Using default fewshot_as_multiturn=True.zGWhen `fewshot_as_multiturn` is True, `apply_chat_template` must be set.z4If --samples is not None, then --limit must be None.)r   r_   r"   rK   warningr*   r(   r)   r-   r,   rL   boolr#   rq   r2   r2   r3   rn     s,   

z#EvaluatorConfig._validate_argumentsc                 C   s   | j r<t| j tr| j | _ n.t| j tr<z	t| j | _ W n tjy;   ttd| j  }	 r9t|
 | _ Y nw | jdu rDi | _| jdu rLi | _| j| jB | _| S )z6Process samples argument - load from a file if needed.r:   N)r#   rb   rc   r:   jsonloadsJSONDecodeErrorr   r
   rX   r]   r   r   )rl   samples_pathr2   r2   r3   ro   :  s"   


z"EvaluatorConfig._process_argumentsr   c                 C   s  ddl }ddl}ddlm} ddlm} |r|n| j| _|| j| jr%| jni d}t| j	t
r5| j	dndd | j	D }t|d	krnt|d  rng }t|d d
 }	| t
|	D ]}
||
}|| q\|| _	|S dd |D }t|}| D ]%}|ds||}ng }| |D ]}
||
}|| q|||< q~g }|j| D ]}||vr|| qdd | D }|rd|}td| || _	|S )a  Process and validate tasks, return resolved task names.

        Handles:
        - Task names (e.g., "hellaswag", "arc_easy")
        - Custom YAML config files (e.g., "/path/to/task.yaml")
        - Glob patterns (e.g., "/path/to/*.yaml")
        - Directories of YAML files
        r   N)utilsr   )r/   r   ,c                 S   s    g | ]}| d D ]}|q	qS )ry   )split)r=   tasktr2   r2   r3   
<listcomp>m  s     z1EvaluatorConfig.process_tasks.<locals>.<listcomp>r   z*.yamlc                 S   s*   g | ]}| d rtt| n|qS ).yaml)endswithr:   r   absolute)r=   r{   r2   r2   r3   r}   {  s    r~   c                 S   s   g | ]\}}|s|qS r2   r2   )r=   r{   matchesr2   r2   r3   r}     s    z, zTasks not found: )glob	itertoolslm_evalrx   lm_eval.tasksr   r   r/   rb   r   r:   rz   lenr   is_dirrE   appendrc   fromkeyskeysr   match_taskschainfrom_iterablevaluesrG   joinr_   )rl   r   r   r   rx   r   task_manager	task_list
task_names	yaml_path	yaml_filer   
match_dictr{   r   task_missingmissingr2   r2   r3   process_tasksP  sX   	
	






zEvaluatorConfig.process_tasksc                 C   s4   | j rddl}d|j_| jdu ri | _d| jd< | S )z/Apply the trust_remote_code setting if enabled.r   NTr6   )r6   datasetsr   HF_DATASETS_TRUST_REMOTE_CODEr   )rl   r   r2   r2   r3   rp     s   

z&EvaluatorConfig._set_trust_remote_coderi   )9re   
__module____qualname____doc__r   r   r:   __annotations__r   rc   r   listr   r   intr   r    r!   r"   floatr#   r$   r%   r&   rs   r'   r(   r)   r*   r+   r,   r-   r.   r/   r   r0   r   r   r   r5   r6   r7   r   classmethodr   rT   r   rW   staticmethodr	   rE   rH   rI   rn   ro   r   rp   r2   r2   r2   r3   r      s   
 "	 (Nr   )rt   loggingrN   argparser   dataclassesr   r   r   r   pathlibr   typingr   r	   r
   r[   lm_eval.utilsr   r   r   	getLoggerre   rK   	DICT_KEYSr   r2   r2   r2   r3   <module>   s    

