o
    5ti6                     @   s   d dl Z d dlZd dlZd dlmZmZmZmZmZ d dl	Z
d dlZd dlmZ d dlmZmZ eeZded fddZG d	d
 d
ZdS )    N)AnyDictListLiteralTuple)Version)_handle_non_serializableremove_none_patternreturnPrinterc                  C   s   ddl m}  |  }|S )z3Returns a wandb printer instance for pretty stdout.r   )new_printer)wandb.sdk.lib.printerr   )r   printer r   P/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/loggers/wandb_logger.pyget_wandb_printer   s   r   c                   @   s   e Zd ZddddZdeeef ddfddZdeeef fdd	Zde	eeef eeef f fd
dZ
dddZdddZdddZdeeeef  deeef dejfddZdeeeef  deddfddZdeeeeeef  f ddfddZdS )WandbLoggerNr
   c              
   C   s   zddl }t|jtdksJ t|jtdk r|d W n ty9 } ztd|  W Y d}~nd}~ww |p=i | _|pBi | _| j	dd| _
|jdu rg|jdi | j| _| jrf| jj| j n|j| _t | _dS )a
  Attaches to wandb logger if already initialized. Otherwise, passes init_args to wandb.init() and config_args to wandb.config.update()

        Args:
            init_args Optional[Dict]: Arguments for init configuration.
            config_args Optional[Dict]: Arguments for config

        Parse and log the results returned from evaluator.simple_evaluate() with:
            wandb_logger.post_init(results)
            wandb_logger.log_eval_result()
            wandb_logger.log_eval_samples(results["samples"])
        r   Nz0.13.6zreport-editing:v0zTo use the wandb reporting functionality please install wandb>=0.13.6.
To install the latest version of wandb run `pip install wandb --upgrade`
stepr   )wandbr   __version__require	Exceptionloggerwarning
wandb_argswandb_config_argspopr   runinitconfigupdater   r   )self	init_argsconfig_argsr   er   r   r   __init__   s0   



zWandbLogger.__init__resultsc                 C   s<   t || _t|di  | _t|di  | _d S )Nr&   groups)copydeepcopyr&   listgetkeys
task_namesgroup_names)r!   r&   r   r   r   	post_initB   s   zWandbLogger.post_initc                 C   s.   | j di | _| j di }| j|d}|S )zGet configuration parameters.configsr   )task_configscli_configs)r&   r+   r1   )r!   r2   r0   r   r   r   _get_configG   s   zWandbLogger._get_configc                 C   s\  t | jdt }t |}| jD ]'}||t }| D ]\}}t|\}}|r9||| |< || | q qi }	| jD ] }
||
t }| D ]\}}t	|t
r_||	|
 d| < qMq@|	 D ]\}}|d\}}|| | qet |}| D ]\}}| D ]\}}||| d| < || | qq| jD ]}
||
 q|	|fS )z Sanitize the results dictionary.r&   /)r(   r)   r&   r+   dictr-   itemsr	   r   
isinstancestrsplit)r!   _resultstmp_results	task_nametask_resultmetric_namemetric_value_metric_nameremovedwandb_summarytasksummary_metricsummary_value_task_summary_metrictask_resultsr   r   r   _sanitize_results_dictR   s>   





z"WandbLogger._sanitize_results_dictc                    s   g d}ddt t dtf fdd}|dg| d} jjd|i jd	 d
 j v rA|dg| d
} jjd|i jd	 dS dS )z6Generate and log evaluation results as a table to W&B.)r   Filternum_fewshotMetricValueStderrr&   columnskeyc                    s*  dd l }|j| d}t j}|| D ]{\}}| jv r%|dks%q|d|}|dkr3d }|d|}| D ]R\}	}
|	d\}}}|	drQq?|d	krVq?|d d | |v r||d d |  }|dkrrd
| }|j
|||||t|
t|g  q?|j
|||||t|
dg  q?q|S )Nr   )rO   r'   versionszN/Azn-shot,_stderraliasz%.4f )r   Tabler(   r)   r&   r+   r6   r.   	partitionendswithadd_datar8   )rO   rP   r   tabler&   kdicversionnmfvm_fser!   r   r   
make_table   s0   
"z5WandbLogger._log_results_as_table.<locals>.make_tableTaskszevaluation/eval_resultsr   r'   Groupszevaluation/group_eval_resultsN)r&   )r   r8   r   logr   r&   r,   )r!   rO   rf   rZ   r   re   r   _log_results_as_tablev   s   	 z!WandbLogger._log_results_as_tablec                 C   st   ddl }tj| jdtdd}|jddd}|jd	d
dd}|| W d   n1 s-w   Y  | j	| dS )z$Log results as JSON artifact to W&B.r   N   Findentdefaultensure_asciir&   eval_resultstypezresults.jsonwutf-8modeencoding)
r   jsondumpsr&   r   Artifactnew_filewriter   log_artifact)r!   r   dumpedartifactrc   r   r   r   _log_results_as_artifact   s   
z$WandbLogger._log_results_as_artifactc                 C   sd   |   }| jjj|| jdud |  \}| _| jj| | jj| j| jd | 	  | 
  dS )zLog evaluation results to W&B.N)allow_val_changerh   )r3   r   r   r    r   rI   wandb_resultssummaryrj   rk   r   )r!   r0   rB   r   r   r   log_eval_result   s   zWandbLogger.log_eval_resultdatar   c                    sT  dd |D }dd |D }dgt | }dgt | }dgt | }i }|d }	i }
|	D ]F  d  dv rh fdd|D |
  d	<  d
v rY fdd|D |
  d< q- fdd|D |
  d< q- fdd|D |
 < q-|d dkrdd |D }dd |D }dd |D }dd |D }nZ|d dkrdd |D }dd |D }dd |D }dd |D }n7|d dkrdd |D }dd |D }dd |D }n|d dkrd d |D }d!d |D }d"d |D }||d#< ||d$< ||d%}|d dkr	||d&< d'd |D ||d d(}|| || ||
 t|S ))a.  Generate a dataset from evaluation data.

        Args:
            data (List[Dict[str, Any]]): The data to generate a dataset for.
            config (Dict[str, Any]): The configuration of the task.

        Returns:
            pd.DataFrame: A dataframe that is ready to be uploaded to W&B.
        c                 S      g | ]}|d  qS )doc_idr   .0xr   r   r   
<listcomp>       z1WandbLogger._generate_dataset.<locals>.<listcomp>c                 S   r   )targetr   r   r   r   r   r      r   rU   metric_listmetric)word_perplexitybyte_perplexitybits_per_bytec                       g | ]}|  d  qS r   r   r   r   r   r   r          _loglikelihood)r   r   c                    r      r   r   r   r   r   r      r   _bytesc                    r   r   r   r   r   r   r   r      r   _wordsc                    s   g | ]}|  qS r   r   r   r   r   r   r      r   output_typeloglikelihoodc                 S      g | ]
}|d  d d qS 	argumentsr   r   r   r   r   r   r          c                 S   s   g | ]
}|d  d d qS )r   r   r   r   r   r   r   r   r      r   c                 S   sN   g | ]#}d |d d d d  dd d |d d d d s!dnd qS )	#log probability of continuation is respsr    

3continuation will {} generated with greedy samplingr   not bebeformatr   r   r   r   r      s    c                 S   sF   g | ]}d |d d d  dd d |d d d sdnd qS )	r   filtered_respsr   r   r   r   r   r   r   r   r   r   r   r   r      s    multiple_choicec                 S   r   r   r   r   r   r   r   r      r   c                 S   s(   g | ]}d  dd t|d D qS )
c                 S   s"   g | ]\}}| d |d  qS )z. r   r   )r   idxyr   r   r   r      s   " <WandbLogger._generate_dataset.<locals>.<listcomp>.<listcomp>r   )join	enumerater   r   r   r   r      s    c                 S   $   g | ]}t d d |d D qS )c                 S   s   g | ]}|d  d  qS r   r   r   r^   r   r   r   r      r   r   r   npargmaxr   r   r   r   r      s   $ c                 S   r   )c                 S   r   r   r   r   r   r   r   r      r   r   r   r   r   r   r   r   r      s    loglikelihood_rollingc                 S   r   r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r   r     r   c                 S      g | ]}|d  d qS r   r   r   r   r   r   r   r     r   generate_untilc                 S   r   r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   raw_predictionsfiltered_predictions)idr   choicesc                 S   s   g | ]}t |qS r   )lenr   r   r   r   r     r   )	input_lenlabelsr   )r   r+   r    pd	DataFrame)r!   r   r   idsr   instancer   r   model_outputsmetrics_listmetricsr   df_datatmp_datar   r   r   _generate_dataset   sr   




zWandbLogger._generate_datasetr<   c                 C   sz   dd l }tj|dtdd}|j| dd}|j| ddd	d
}|| W d    n1 s0w   Y  | j| d S )Nr   rl   Frm   samples_by_taskrr   z_eval_samples.jsonrt   ru   rv   )	r   ry   rz   r   r{   r|   r}   r   r~   )r!   r   r<   r   r   r   rc   r   r   r   _log_samples_as_artifact  s   z$WandbLogger._log_samples_as_artifactsamplesc                    sb   fdd j D }g }i }|D ]2} j| dd}|r=t|tr%|g}|D ]}||s4|g||< q'|| | q'q|| q|D ]$}|| } | j|}	 jj| d|	i j	d  
|| qE| D ]@\}
}t }|D ]'}|| } | j|}	|
|	d< ||	d< tj||	gdd	} 
|| qx jj|
 d|i j	d qndS )
zLog evaluation samples to W&B.

        Args:
            samples (Dict[str, List[Dict[str, Any]]]): Evaluation samples for each task.
        c                    s   g | ]	}| j vr|qS r   )r.   r   re   r   r   r   9  s    z0WandbLogger.log_eval_samples.<locals>.<listcomp>groupN_eval_resultsrh   rC   T)ignore_index)r-   r1   r+   r7   r8   appendr   r   rj   r   r   r6   r   r   concat)r!   r   r-   ungrouped_taskstasks_by_groupsr<   r.   
group_name
eval_predsdfr   grouped_tasks
grouped_dfr   re   r   log_eval_samples3  sD   


zWandbLogger.log_eval_samples)NN)r
   N)__name__
__module____qualname__r%   r   r8   r   r/   r3   r   rI   rk   r   r   r   r   r   r   r   r   r   r   r   r   r      s.    )&
$
2


[
*r   )r(   ry   loggingtypingr   r   r   r   r   numpyr   pandasr   packaging.versionr   lm_eval.loggers.utilsr   r	   	getLoggerr   r   r   r   r   r   r   r   <module>   s    
