o
    7ti/                     @   s  d dl Z d dlZd dlmZ d dlmZ zd dlZd dlmZ d dl	m
Z
 W n ey1   edw dedefd	d
ZdedefddZdedefddZdedee fddZdedefddZdedee fddZdedefddZdedee fddZdedefddZdedee fdd Zdedefd!d"Zdedee defd#d$Zd%ededefd&d'Zdedee fd(d)Zdedefd*d+Zd,d- Zdeeef deeef fd.d/Z dedee fd0d1Z!dedefd2d3Z"dedefd4d5Z#dedee fd6d7Z$dedee fd8d9Z%dedee fd:d;Z&dedee fd<d=Z'dedee fd>d?Z(dedee fd@dAZ)dedee fdBdCZ*dedee fdDdEZ+dedee fdFdGZ,dedee fdHdIZ-dedee fdJdKZ.dS )L    N)Counter)Union)fuzz)RougezPlease install the required dependencies for this task with `pip install lm_eval["longbench"] or `pip install jieba fuzzywuzzy rouge`sreturnc                 C   s4   dd }dd }dd }dd }||||| S )	zALower text and remove punctuation, articles and extra whitespace.c                 S   s   t dd| S )Nz\b(a|an|the)\b )resubtext r   S/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/tasks/longbench/metrics.pyremove_articles+      z)normalize_answer.<locals>.remove_articlesc                 S      d |  S )Nr   joinsplitr   r   r   r   white_space_fix.   r   z)normalize_answer.<locals>.white_space_fixc                    s"   t tj d fdd| D S )N c                 3       | ]	}| vr|V  qd S Nr   .0chexcluder   r   	<genexpr>3       z8normalize_answer.<locals>.remove_punc.<locals>.<genexpr>setstringpunctuationr   r   r   r   r   remove_punc1   s   
z%normalize_answer.<locals>.remove_puncc                 S      |   S r   lowerr   r   r   r   r'   5      znormalize_answer.<locals>.lowerr   )r   r   r   r$   r'   r   r   r   normalize_answer(   s
   r)   c                 C   s(   dd }dd }dd }|||| S )z4Lower text and remove punctuation, extra whitespace.c                 S   r   )Nr   r   r   r   r   r   r   >   r   z,normalize_zh_answer.<locals>.white_space_fixc                    s*   d}t tj|  d fdd| D S )Nu   ！？｡。＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏.r   c                 3   r   r   r   r   all_punctuationr   r   r   D   r   z;normalize_zh_answer.<locals>.remove_punc.<locals>.<genexpr>r    )r   cn_punctuationr   r*   r   r$   A   s   z(normalize_zh_answer.<locals>.remove_puncc                 S   r%   r   r&   r   r   r   r   r'   F   r(   z"normalize_zh_answer.<locals>.lowerr   )r   r   r$   r'   r   r   r   normalize_zh_answer;   s   r-   
predictionground_truthc                 K   sV   t d| }d}|D ]}t|t|kr|d7 }q
t|dkr!dn|t| }t|S )N\d+r              r	   findallstrlenfloat)r.   r/   kwargsnumbers	right_numnumberfinal_scorer   r   r   count_scoreL   s   r=   docresultsc                 K   :   d}|d   }| d D ]}t||}t||}qd|iS )Nr2   r   answersr=   )stripr=   maxr>   r?   r8   outputr.   r/   scorer   r   r   get_count_scoreV      
rG   c           
      K   n   d}t ||}|d }t d| }d}|D ]}t|t|kr$|d7 }qt|dkr-dn|t| }	t|	S )NzParagraph (\d+)r   r0   r1   r2   r3   
r.   r/   r8   patternmatchesground_truth_idr9   r:   r;   r<   r   r   r   retrieval_score_      rN   c                 K   r@   )Nr2   r   rA   rN   )rB   rN   rC   rD   r   r   r   get_retrieval_scorel   rH   rP   c           
      K   rI   )Nu   段落(\d+)r   r0   r1   r2   r3   rJ   r   r   r   retrieval_zh_scoreu   rO   rQ   c                 K   r@   )Nr2   r   rA   rQ   )rB   rQ   rC   rD   r   r   r   get_retrieval_zh_score   rH   rR   c                 K   sN   |  dd}d} |D ]}d|vrd|vrd|vr|}  nqt| |d S )N
r   `#z//d   )lstripr   r   ratio)r.   r/   r8   	all_linesliner   r   r   code_sim_score   s   r[   c                 K   s6   d}|d }| d D ]}t ||}t||}q
d|iS )Nr2   r   rA   r[   )r[   rC   rD   r   r   r   get_code_sim_score   s   
r\   c                 K   sl   g }|d }|D ]}|| v r| | q|D ]}||v r%||kr%|| q||v r2dt| }|S d}|S )Nall_classes      ?r2   )appendremover6   )r.   r/   r8   em_match_listr]   
class_name
match_termrF   r   r   r   classification_score   s   

rd   c                 C   sB   d}|d   }| d D ]}t||| d d}t||}qd|iS )Nr2   r   rA   r]   )r]   rd   )rB   rd   rC   )r>   r?   rE   r.   r/   rF   r   r   r   get_classification_score   s   
re   predictionsc                 K   sB   dt  vrt aztj| g|gdd}W n   Y dS |d d S )NrougeT)avgr2   zrouge-lf)globalsr   rg   
get_scores)rf   r/   r8   scoresr   r   r   rouge_score   s   
rm   c                 K   r@   )Nr2   r   rA   rm   )rB   rm   rC   rD   r   r   r   get_rouge_score   rH   rn   c                 K   s>   d ttj| dd} d ttj|dd}t| |}|S )Nr   Fcut_all)r   listjiebacutrm   )r.   r/   r8   rF   r   r   r   rouge_zh_score   s   
rt   c                 K   r@   )Nr2   r   rA   rt   )rB   rt   rC   rD   r   r   r   get_rouge_zh_score   rH   ru   c                 K   s`   t | t |@ }t| }|dkrdS d| t|  }d| t| }d| | ||  }|S )Nr   r^      )r   sumvaluesr6   )r.   r/   r8   commonnum_same	precisionrecallf1r   r   r   f1_score   s   r~   c                 K   r@   )Nr2   r   rA   r~   )rB   r~   rC   rD   r   r   r   get_f1_score   rH   r   c                 K   s*   t | }t |}| }| }t||S r   )r)   r   r~   )r.   r/   r8   normalized_predictionnormalized_ground_truthprediction_tokensground_truth_tokensr   r   r   qa_f1_score   s
   
r   c                 K   sf   t tj| dd}t tj|dd}dd |D }dd |D }dd |D }dd |D }t||S )NFro   c                 S      g | ]}t |qS r   r-   r   tokenr   r   r   
<listcomp>       z"qa_f1_zh_score.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r   r      r   c                 S      g | ]
}t |d kr|qS r   r6   r   r   r   r   r          c                 S   r   r   r   r   r   r   r   r     r   )rq   rr   rs   r~   )r.   r/   r8   r   r   r   r   r   qa_f1_zh_score   s   
r   c                 K   r@   )Nr2   r   rA   r   )rB   r   rC   rD   r   r   r   get_qa_f1_score  rH   r   c                 K   r@   )Nr2   r   rA   r   )rB   r   rC   rD   r   r   r   get_qa_f1_zh_score  rH   r   c                 K   $   t | |fi |}|d }||dS )Nr   )rF   r   )r   )r>   r?   r8   resultqa_f1r   r   r   get_qa_f1_with_score     
r   c                 K   r   )Nr   )rF   r   )r   )r>   r?   r8   r   qa_f1_zhr   r   r   get_qa_f1_zh_with_score"  r   r   c                 K   r   )Nrm   )rF   rm   )rn   )r>   r?   r8   r   rg   r   r   r   get_rouge_with_score(  r   r   c                 K   r   )Nrt   )rF   rt   )ru   )r>   r?   r8   r   rouge_zhr   r   r   get_rouge_zh_with_score.  r   r   c                 K   r   )Nrd   )rF   rd   )re   )r>   r?   r8   r   classificationr   r   r   get_classification_with_score4  r   r   c                 K   r   )Nr=   )rF   r=   )rG   )r>   r?   r8   r   countr   r   r   get_count_with_score:  r   r   c                 K   r   )NrN   )rF   rN   )rP   )r>   r?   r8   r   	retrievalr   r   r   get_retrieval_with_score@  r   r   c                 K   r   )NrQ   )rF   rQ   )rR   )r>   r?   r8   r   retrieval_zhr   r   r   get_retrieval_zh_with_scoreF  r   r   c                 K   r   )Nr[   )rF   r[   )r\   )r>   r?   r8   r   code_simr   r   r   get_code_sim_with_scoreL  r   r   )/r	   r"   collectionsr   typingr   rr   
fuzzywuzzyr   rg   r   ImportErrorr5   r)   r-   r=   dictrq   rG   rN   rP   rQ   rR   r[   r\   rd   re   r7   rm   rn   rt   ru   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sX   
			
		"			
	