o
    7ti	                     @   s*  d dl Z d dlZd dlmZ d dlmZmZ d dlmZ er"d dlZe 	e
ZdgZe	dded fdd	Zd
ee dee fddZdee deee  defddZdee deee  defddZdedee deeef fddZdedee deeef fddZdee defddZdS )    N)cache)TYPE_CHECKINGUnion)AutoTokenizeri   return)z transformers.PreTrainedTokenizerz$transformers.PreTrainedTokenizerFastc                 K   s4   | p|}|s
J dt d| d tj|ddS )Nz$No tokenizer or pretrained provided.zUsing tokenizer z for synthetic tasks.T)trust_remote_code)eval_loggerinfor   from_pretrained)	tokenizer
pretrainedkwargs r   T/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/tasks/ruler/common_utils.pyget_tokenizer   s   r   
predictionc                 C   s>   g }| D ]}|  }td}|d|  }|| q|S )Nz[\x00-\x1f]
)striprecompilesubappend)r   respredict_str
np_patternr   r   r   postprocess_pred   s   
r   predsrefsc                 C   $   t dd t| |D t|  }|S )Nc                    .   g | ]\ }t  fd d|D t| qS )c                    $   g | ]}|     v rd ndqS g      ?g        lower.0rpredr   r   
<listcomp>.      $ z/string_match_all.<locals>.<listcomp>.<listcomp>sumlenr%   refr   r'   r   r)   -       z$string_match_all.<locals>.<listcomp>)r,   zipr-   r   r   scorer   r   r   string_match_all+      r4   c                 C   r   )Nc                    r   )c                    r    r!   r"   r$   r'   r   r   r)   8   r*   z0string_match_part.<locals>.<listcomp>.<listcomp>r+   r.   r   r'   r   r)   7   r0   z%string_match_part.<locals>.<listcomp>)maxr1   r-   r2   r   r   r   string_match_part5   r5   r7   docresultsc                 C   >   dd t D }| d }t|}t|| d g}||t|< |S )Nc                 S      i | ]}t |d qS g      strr%   lengthr   r   r   
<dictcomp>A       z#process_results.<locals>.<dictcomp>
max_lengthoutputs)DEFAULT_SEQ_LENGTHSr   r4   r>   r8   r9   metrics	input_lenr(   r3   r   r   r   process_results?      rI   c                 C   r:   )Nc                 S   r;   r<   r=   r?   r   r   r   rA   K   rB   z(process_results_part.<locals>.<dictcomp>rC   rD   )rE   r   r7   r>   rF   r   r   r   process_results_partI   rJ   rK   rG   c                 C   s&   dd | D }|sdS t |t| S )Nc                 S   s   g | ]}|d kr|qS )r   )r%   xr   r   r   r)   T   s    z%aggregate_metrics.<locals>.<listcomp>rL   r+   )rG   r   r   r   r   aggregate_metricsS   s   rN   )NN)loggingr   	functoolsr   typingr   r   transformersr   	getLogger__name__r   rE   r   listr>   r   floatr4   r7   dictrI   rK   rN   r   r   r   r   <module>   s*    
	""
"
"

