o
    6tih                     @   s   d dl Z d dlZd dlZe de jZdd Zdd Zdd Z	d	d
 Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" ZdS )#    Nz\b(a|an|the)\bc                 C   s   dd }|  |S )Nc                 S   s   | d | d | d t | dS )Nquery_idpassagequestion)idr   r   answers)get_answers)doc r	   L/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/tasks/drop/utils.py_process   s
   zprocess_docs.<locals>._process)map)datasetr   r	   r	   r
   process_docs
   s   
r   c                 C   s\   dd }g }t  }| d g|| d  }|D ]}t|}||v r!q|| || q|S )Nc                 S   sH   g }t t| d D ]}|| d | | d | | d | d q
|S )zFlattens a dict of lists of validated answers.
        {"number": ['1', '8'], ...}
        -> [{"number": ['1'], ...}, {"number": ['8'], ...}]
        numberdatespans)r   r   r   )rangelenappend)validated_answersvalid_answersir	   r	   r
   _flatten_validated_answers   s   


z/get_answers.<locals>._flatten_validated_answersanswerr   )setparse_answeraddr   )r   r   r   answers_set
candidates	candidater   r	   r	   r
   r      s   
r   c                 C   s`   | d dkrt | d fS | d g krt| d S d| d d | d d | d d g fS )	Nr    r    r   daymonthyear)strtuplejoinstrip)r   r	   r	   r
   r   3   s    r   c           	      C   sX   || d }}d}d}|D ]}t ||\}}|d  r&t||}t||}q||dS )Nr   r   )emf1)get_metricsr(   max)	r   resultspredsgoldsmax_emmax_f1gold_answerexact_matchf1_scorer	   r	   r
   process_results@   s   


r5   c                 C   sx   t | }t |}t|d t|d kr#t|d t|d kr#d}nd}t|d |d }t|}t|d}||fS )a  
    Takes a predicted answer and a gold answer (that are both either a string or a list of
    strings), and returns exact match and the DROP F1 metric for the prediction.  If you are
    writing a script for evaluating objects in memory (say, the output of predictions during
    validation, or while training), this is the function you want to call, after using
    :func:`answer_json_to_strings` when reading the gold answer from the released data file.
    r         ?              )_answer_to_bagsr   r   _align_bagsnpmeanround)	predictedgoldpredicted_bags	gold_bagsr3   
f1_per_bagr*   r	   r	   r
   r+   L   s   $

r+   c                 C   sX   t | ttfr
| }n| g}g }g }|D ]}t|}|| |t|  q||fS N)
isinstancelistr&   
_normalizer   r   split)r   	raw_spansnormalized_spans
token_bagsraw_spannormalized_spanr	   r	   r
   r:   d   s   
r:   c                 C   s   ddl m} tt|t| g}t|D ]\}}t| D ]\}}t||r/t|||||f< qq|| \}}	ttt|t| g}
t	||	D ]\}}t|
| |||f |
|< qJ|
S )z
    Takes gold and predicted answer sets and first finds the optimal 1-1 alignment
    between them and gets maximum metric values over all the answers.
    r   )linear_sum_assignment)
scipy.optimizerN   r<   zerosr   	enumerate_match_numbers_if_present_compute_f1r,   zip)r?   r@   rN   scores
gold_index	gold_item
pred_index	pred_itemrow_indcol_ind
max_scoresrowcolumnr	   r	   r
   r;   r   s   
r;   c                 C   sr   t || }| sd}n|tt |  }|sd}n|tt | }|dkr)|dks5d| | ||  }|S d}|S )Nr6   r7   r9   )r   intersectionfloat)predicted_baggold_bagr_   	precisionrecallr*   r	   r	   r
   rS      s   rS   c                 C   sZ   t  }t  }| D ]}t|r|| q|D ]}t|r!|| q|r)||r+dS dS NTF)r   
_is_numberr   r_   )rb   ra   gold_numberspredicted_numberswordr	   r	   r
   rR      s   

rR   c                 C   s$   zt |  W dS  ty   Y dS w re   )r`   
ValueErrortextr	   r	   r
   rf      s   rf   c                 C      t d| S Nr!   )	_ARTICLESsubrk   r	   r	   r
   _remove_articles      rq   c                 C   s   d |  S rn   )r'   rH   rk   r	   r	   r
   _white_space_fix   s   rs   c                    s.   t tj t| sd fdd| D S | S )Nr    c                 3   s    | ]	}| vr|V  qd S rD   r	   ).0chexcluder	   r
   	<genexpr>   s    z_remove_punc.<locals>.<genexpr>)r   stringpunctuationrf   r'   rk   r	   rv   r
   _remove_punc   s   
r{   c                 C   s   t | r
tt| S | S rD   )rf   r%   r`   rk   r	   r	   r
   _fix_number   s   r|   c                 C   rm   )Nz |-)rerH   rk   r	   r	   r
   	_tokenize   rr   r~   c                 C   s2   dd t | D }dd |D }d| }|S )Nc                 S   s$   g | ]}t ttt| qS r	   )rs   rq   r|   r{   lowerrt   tokenr	   r	   r
   
<listcomp>   s    z_normalize.<locals>.<listcomp>c                 S   s   g | ]}|  r|qS r	   )r(   r   r	   r	   r
   r      s    r!   )r~   r'   r(   )r   tokens
normalizedr	   r	   r
   rG      s   rG   )r}   ry   numpyr<   compileUNICODEro   r   r   r   r5   r+   r:   r;   rS   rR   rf   rq   rs   r{   r|   r~   rG   r	   r	   r	   r
   <module>   s(    