o
    6tir(                     @   st   d dl Z d dlZdd ZdedefddZG dd	 d	ZG d
d deZG dd deZdd Zdd Z	dd Z
dS )    Nc                 C   s8   |  dr
| ds|  dr| dr| dd S | S )N"'   )
startswithendswith)s r	   O/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/tasks/chartqa/utils.py_normalize_string   s   r   unnormalized_stringreturnc                 C   sd   | r0| d t jv s| d  r0| d dkr0| d d } | r0| d t jv s*| d  r0| d dks| S )Nr   %)stringpunctuationisspace)r   r	   r	   r
   _remove_end_punctuation   s   

r   c                	   @   sP   e Zd ZdZ	ddedee dedefddZd	ed
eee B defddZdS )RelaxedCorrectnessa%  Relaxed correctness metrics.

    The correctness tolerates certain error ratio defined by max_relative_change.
    See https://arxiv.org/pdf/2203.10244.pdf, end of section 5.1:
    "Following Methani et al. (2020), we use a relaxed accuracy measure for the
    numeric answers to allow a minor inaccuracy that may result from the automatic
    data extraction process. We consider an answer to be correct if it is within
    5% of the gold answer. For non-numeric answers, we still need an exact match
    to consider an answer to be correct."
    皙?
predictiontargetsmax_relative_changer   c                    sp  dt dttd B tf fdd}dt dtfdd}dt dt fdd}d	td
tdtfddd	td
tdtdtffdd d	t d
t dtfdd}dtdtdtfddd	tdtd
tdtdtdtf fdd}||}||\}	}
g }|D ]@}||}||\}}|	d ur|d ur||	|
|||}n||rt|dkr|d  | krdnd}n|||}|| qst|S )Ntextr   c                 S   s@   |   } | d}zt| d}||fW S  ty   Y dS w )Nr   )NF)stripr   floatrstrip
ValueError)r   
is_percentvaluer	   r	   r
   	_to_float*   s   

z:RelaxedCorrectness._relaxed_correctness.<locals>._to_floatc                 S   s   |   o	t| dkS )Nr   )isalphalenr   r	   r	   r
   
_is_letter3   s   z;RelaxedCorrectness._relaxed_correctness.<locals>._is_letterc                 S   s2   t dd | D st| S t| ddddS )Nc                 s   s    | ]}|  V  qd S N)isdigit).0charr	   r	   r
   	<genexpr>7   s    zTRelaxedCorrectness._relaxed_correctness.<locals>._preprocess_text.<locals>.<genexpr>, $)anyr   r   replacer"   r	   r	   r
   _preprocess_text6   s   zARelaxedCorrectness._relaxed_correctness.<locals>._preprocess_textr   targetc                 S   s   t | | tt |d S )Ng|=)absmaxr   r/   r	   r	   r
   calculate_relative_change<   s   zJRelaxedCorrectness._relaxed_correctness.<locals>.calculate_relative_changer   c                    s    | |}||krdS dS )N      ?        r	   )r   r/   r   relative_change)r3   r	   r
   _compare_numeric_values?   s   
zHRelaxedCorrectness._relaxed_correctness.<locals>._compare_numeric_valuesc                 S   sH   | r| d t jv r| d d } | r| d t jv s	|  | kr"dS dS )Nr   r4   r5   )r   r   lowerr2   r	   r	   r
   _compare_text_valuesE   s   zERelaxedCorrectness._relaxed_correctness.<locals>._compare_text_valuesr   r   c                 S   s   |r| d S | S )Nd   r	   )r   r   r	   r	   r
   _to_decimalJ   s   z<RelaxedCorrectness._relaxed_correctness.<locals>._to_decimalprediction_is_percenttarget_is_percentc                    sH    | ||}|dkr"|s|r"t | | ||| | |||}|S )Nr4   )r1   )r   r<   r/   r=   r   r   )r7   r;   r	   r
   _compare_numeric_with_percentM   s    zNRelaxedCorrectness._relaxed_correctness.<locals>._compare_numeric_with_percentr   r4   r5   )strtupler   boolr!   r8   appendr1   )selfr   r   r   r   r#   r.   r9   r>   prediction_floatr<   
value_listr/   target_floatr=   r   r	   )r7   r;   r3   r
   _relaxed_correctness'   s^   	
z'RelaxedCorrectness._relaxed_correctnessmodel_answerreference_answerc                 C   s    t |tr|n|g}| ||S r$   )
isinstancelistrG   )rC   rH   rI   r	   r	   r
   score   s
   zRelaxedCorrectness.scoreN)r   )	__name__
__module____qualname____doc__r?   rK   r   rG   rL   r	   r	   r	   r
   r      s    
"]r   c                       sZ   e Zd ZdZedefddZdedefddZded	eee B de	f fd
dZ
  ZS ) ExplicitPromptRelaxedCorrectnessz(Relaxed correctness for explicit prompt.r   c                 C      dS )N#explicit_prompt_relaxed_correctnessr	   rC   r	   r	   r
   name      z%ExplicitPromptRelaxedCorrectness.name
generationc                 C   s~   dt dt fdd}tdd|}|d| }|dkr=|td }||d  d	}td
d |D d}tdd|}|S dS )Npatternr   c                 S   s
   | | S r$   )rfind)rX   r   r	   r	   r
   _find_last_occurrence   s   
zQExplicitPromptRelaxedCorrectness._get_final_answer.<locals>._find_last_occurrencez([aA]nswer)\**:\**z\1:zanswer:r   
c                 s   s     | ]}|  r|  V  qd S r$   )r   )r&   liner	   r	   r
   r(      s    zEExplicitPromptRelaxedCorrectness._get_final_answer.<locals>.<genexpr>r*   z[*_\[\]\(\)])r?   resubr8   r!   splitnext)rC   rW   rZ   final_answer_indexstart_indexlinesfinal_answerr	   r	   r
   _get_final_answer   s   z2ExplicitPromptRelaxedCorrectness._get_final_answerrH   rI   c                    s    |  |}|s	dS t ||S )Nr5   )re   superrL   )rC   rH   rI   parsed_model_answer	__class__r	   r
   rL      s   
z&ExplicitPromptRelaxedCorrectness.score)rM   rN   rO   rP   propertyr?   rU   re   rK   r   rL   __classcell__r	   r	   rh   r
   rQ      s    *rQ   c                   @   s@   e Zd ZdZedefddZdedeee B defddZ	d	S )
"AnywhereInAnswerRelaxedCorrectnesszFalls back to handle cases where reference answer appears anywhere in generation.

    NOTE: This is an overly generous metric and is likely to falsely inflate scores.
    r   c                 C   rR   )N&anywhere_in_answer_relaxed_correctnessr	   rT   r	   r	   r
   rU      rV   z'AnywhereInAnswerRelaxedCorrectness.namerH   rI   c                    s  t |tr|n|g}|  }|r| ||S |D ]l}z1t|}t||kr*t|}t|d v r5W  dS t| v r?W  dS t|d  v rKW  dS W q ty   g }|D ]}|	d| d| d| dd| d	| g qWt
 fd
d|D rY  dS Y qw dS )Nr)   r4   r   zis zwas  .zare z

c                    s   g | ]}|   v qS r	   )r8   )r&   crH   r	   r
   
<listcomp>   s    z<AnywhereInAnswerRelaxedCorrectness.score.<locals>.<listcomp>r   )rJ   rK   re   rG   r   intformatr?   r   extendr,   )rC   rH   rI   rg   refnumber
candidatesr	   rq   r
   rL      sF   

	z(AnywhereInAnswerRelaxedCorrectness.scoreN)
rM   rN   rO   rP   rj   r?   rU   rK   r   rL   r	   r	   r	   r
   rl      s
    "rl   c                 C   sd   |d }| d }t d|t j}|r.|d }| d|  kr*ddiS ddiS ddiS )Nr   z$(?:Final Answer|FINAL ANSWER): (.+)$r   ro   exact_matchr4   r5   )r]   search
IGNORECASEgroupr   r8   removesuffix)
referencespredictionspredrv   matchextracted_predr	   r	   r
   ry      s   ry   c                 C   >   |d }| d }t  ||}|r|dkrddiS d S ddiS )Nr   r4   relaxed_accuracyr5   )rQ   rL   r~   r   r   rv   rL   r	   r	   r
   r        r   c                 C   r   )Nr   r4   anywhere_accuracyr5   )rl   rL   r   r	   r	   r
   r     r   r   )r]   r   r   r?   r   r   rQ   rl   ry   r   r   r	   r	   r	   r
   <module>   s    r)<