o
    6ti
                     @   s   d dl Z d dlZd dlZd dlZd dlZdd Zdd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd ZdejdejfddZdS )    Nc                 C   s4   dd }dd }dd }dd }||||| S )	zALower text and remove punctuation, articles and extra whitespace.c                 S   s   t dt j}t |d| S )Nz\b(un|une|des|le|la|les)\b )recompileUNICODEsub)textregex r	   T/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/tasks/french_bench/utils.pyremove_articles   s   z)normalize_answer.<locals>.remove_articlesc                 S   s   d |  S )Nr   )joinsplitr   r	   r	   r
   white_space_fix   s   z)normalize_answer.<locals>.white_space_fixc                    s"   t tj d fdd| D S )N c                 3   s    | ]	}| vr|V  qd S Nr	   ).0chexcluder	   r
   	<genexpr>   s    z8normalize_answer.<locals>.remove_punc.<locals>.<genexpr>)setstringpunctuationr   r   r	   r   r
   remove_punc   s   
z%normalize_answer.<locals>.remove_puncc                 S   s   |   S r   )lowerr   r	   r	   r
   r      s   znormalize_answer.<locals>.lowerr	   )sr   r   r   r   r	   r	   r
   normalize_answer	   s
   r   c                 C   s   | sg S t |  S r   )r   r   )r   r	   r	   r
   
get_tokens   s   r   c                 C   s   t t|d t| d kS )Nr   )intr   predictions
referencesr	   r	   r
   exact$   s   r#   c           	      C   s   t |d }t | d }t|t|@ }t| }t|dks(t|dkr.t||kS |dkr4dS d| t| }d| t| }d| | ||  }|S )Nr   g      ?   )r   collectionsCountersumvalueslenr   )	r!   r"   	gold_toks	pred_tokscommonnum_same	precisionrecallf1r	   r	   r
   r0   )   s   r0   c                 C   s   | S )&
    # passthrough for efficiency
    r	   itemsr	   r	   r
   rouge19   s   r4   c                 C   s<   t t|  d }t t|  d }td}|j||dd S )z
    Higher is better
    r      rouger    r4   )listzipevaluateloadcompute)r3   refspredsrouge_scorerr	   r	   r
   
rouge1_agg@   s   
r?   c                 C   s   | d | d v r
dS dS )r1   r   r5   TFr	   r2   r	   r	   r
   is_includedJ   s   r@   c                 C   s2   |   } | dd} tdd| } | dd} | S )Nz [title]z. z\[.*?\]r   z  r   )stripreplacer   r   r   r	   r	   r
   
preprocessS   s
   rC   datasetreturnc                 C   s   dd }|  |S )Nc                 S   sN   | d d | d    }t| d d | dd | d D t| d	 d
}|S )Nctx_ar   ctx_bactivity_labelz: c                 S   s   g | ]}t |qS r	   )rC   )r   endingr	   r	   r
   
<listcomp>a   s    z6process_docs.<locals>._process_doc.<locals>.<listcomp>endingslabel)querychoicesgold)
capitalizerC   r   )docctxout_docr	   r	   r
   _process_doc]   s   
z"process_docs.<locals>._process_doc)map)rD   rT   r	   r	   r
   process_docs\   s   
	rV   )r%   r   r   datasetsr9   r   r   r#   r0   r4   r?   r@   rC   DatasetrV   r	   r	   r	   r
   <module>   s    
		