o
    6ti                     @   s   d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlm	Z	 er&d dlZe 
eZe	dded fddZd	ee dee fd
dZdd Zdedee deeef fddZdS )    N)cache)TYPE_CHECKINGUnion)AutoTokenizerreturn)z transformers.PreTrainedTokenizerz$transformers.PreTrainedTokenizerFastc                 K   s4   | p|}|s
J dt d| d tj|ddS )Nz$No tokenizer or pretrained provided.zUsing tokenizer z for babilong tasks.T)trust_remote_code)eval_loggerinfor   from_pretrained)	tokenizer
pretrainedkwargs r   W/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/tasks/babilong/common_utils.pyget_tokenizer   s   r   
predictionc                 C   s>   g }| D ]}|  }td}|d|  }|| q|S )Nz[\x00-\x1f]
)striprecompilesubappend)r   respredict_str
np_patternr   r   r   postprocess_pred   s   
r   c                  K   sD   |  dd}|  d}td| d|  tjd||d}||iS )Nmax_seq_lengths0kqa_splitz*Loading babilong dataset: max_seq_lengths=z, split=zRMT-team/babilong-1k-samples)namesplit)getr   r	   datasetsload_dataset)r   config_namer   datasetr   r   r   r#   (   s   
r#   docresultsc                 C   s<   t |}| dd }| |d  v rdnd}d|iS )Ntarget r   g      ?g        acc)r   r!   r   lower)r&   r'   predr(   scorer   r   r   process_results7   s   r.   )NN)loggingr   	functoolsr   typingr   r   r"   transformersr   	getLogger__name__r   r   liststrr   r#   dictfloatr.   r   r   r   r   <module>   s"    
	&