o
    }oi                     @   s`   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 G dd de
ZdS )    N)tqdm)tensor2list)loggingc                   @   s   e Zd Zedd Zedd Zedd Zedefdd	Zedefd
dZ	ededefddZ
ededefddZedd Zeg fddZed"ddZedd Zedd Zedd Zeg fdd Zd!S )#	QAMetricsc                 C   s   t dd| S )Nz\b(a|an|the)\b )resubtext r   [/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/nlp/metrics/qa_metrics.pyremove_articles      zQAMetrics.remove_articlesc                 C   s   d |  S )Nr   )joinsplitr	   r   r   r   white_space_fix    r   zQAMetrics.white_space_fixc                    s"   t tj d fdd| D S )N c                 3   s    | ]	}| vr|V  qd S Nr   ).0chexcluder   r   	<genexpr>'       z(QAMetrics.remove_punc.<locals>.<genexpr>)setstringpunctuationr   r	   r   r   r   remove_punc$   s   
zQAMetrics.remove_puncsc                 C   s   t t t |  S )zB Lower text and remove punctuation, articles and extra whitespace )r   r   r   r   lowerr   r   r   r   normalize_answer)   s   zQAMetrics.normalize_answerc                 C   s   | sg S t |  S )z Get normalized tokens )r   r!   r   r    r   r   r   _get_normalized_tokens/   s   z QAMetrics._get_normalized_tokens
predictionground_truthc           	      C   s   t | }t |}t|t|@ }t| }t|dks&t|dkr,t||kS |dkr2dS d| t| }d| t| }d| | ||  }|S )z7 Computes f1 score between prediction and ground truth r   g      ?   )r   r"   collectionsCountersumvalueslenint)	r#   r$   prediction_tokensground_truth_tokenscommonnum_same	precisionrecallf1r   r   r   
get_one_f16   s   

zQAMetrics.get_one_f1c                 C   s   t t| t|kS )z: Computes exact match between prediction and ground truth )r+   r   r!   )r#   r$   r   r   r   get_one_exact_matchK   s   zQAMetrics.get_one_exact_matchc                 C   sp   dd t t|D }| D ](}t|D ]!\}}t|| tjr+|| t||  q|| ||  qq|S )Nc                 S   s   g | ]}g qS r   r   )r   _r   r   r   
<listcomp>S   s    z;QAMetrics.convert_dict_outputs_to_lists.<locals>.<listcomp>)ranger*   	enumerate
isinstancetorchTensorextendr   )outputskeysoutput_listsoutputikeyr   r   r   convert_dict_outputs_to_listsQ   s   z'QAMetrics.convert_dict_outputs_to_listsc                    s   i }i }| D ]5}|j }||vrqdd |jD }|sdg}||  t fdd|D ||< t fdd|D ||< q||fS )z
        Returns a dictionary of question id: exact match/f1 score
        Questions with ids *not* present in `question_id_filter` are excluded
        c                 S   s"   g | ]}t |d  r|d  qS r	   )r   r!   )r   answerr   r   r   r6   k   s   " z4QAMetrics.get_exact_match_and_f1.<locals>.<listcomp>r   c                 3       | ]	}t  |V  qd S r   )r   r4   r   apredr   r   r   r   r   z3QAMetrics.get_exact_match_and_f1.<locals>.<genexpr>c                 3   rE   r   )r   r3   rF   rH   r   r   r   s   r   )qas_idanswersmax)examplespredsquestion_id_filterexact_scores	f1_scoresexamplequestion_idgold_answersr   rH   r   get_exact_match_and_f1]   s   z QAMetrics.get_exact_match_and_f1r   c                 C   sr   t | }t| d|dkrdt|   | ndf| d|dkr,dt|  | ndf| dt|fgS )z5 Returns dictionary with formatted evaluation scores exactr   g      Y@g        r2   total)r*   r&   OrderedDictr(   r)   float)rP   rQ   prefixrW   r   r   r   make_eval_dictw   s   ((zQAMetrics.make_eval_dictc                 C   s,   t  }| D ]}|D ]}|| ||< q
q|S )z
        Combines multiple evaluation dict outputs into one dict
        Ex: combines eval dicts for HasAns F1, NoAnsF1, and Total F1
        )r&   rX   )
eval_dictsmerged_dict	eval_dictrB   r   r   r   merge_eval_dicts   s   zQAMetrics.merge_eval_dictsc                 C   s   dd | dt | D }dd | D }dd | D }t|df|df|d	fg}g }|D ]\}}t| ||\}	}
tj|	|
|d
}|| q0t|}|S )z 
        Calculates exact match and f1 scores for all predictions, 
            questions with answers, and no answer questions
        c                 S   s   i | ]	}|j t|jqS r   )rJ   boolrK   )r   rR   r   r   r   
<dictcomp>   s    z2QAMetrics.evaluate_predictions.<locals>.<dictcomp>Nc                 S   s   g | ]\}}|r|qS r   r   r   rJ   
has_answerr   r   r   r6          z2QAMetrics.evaluate_predictions.<locals>.<listcomp>c                 S   s   g | ]\}}|s|qS r   r   rb   r   r   r   r6      rd   r   HasAns_NoAns_)rZ   )r*   itemslistr   rU   r[   appendr_   )rM   all_predictionsqas_id_to_has_answerhas_answer_qidsno_answer_qidsfilters_and_prefixesr\   qas_id_filterrZ   
curr_exactcurr_f1curr_eval_dictmerged_eval_dictr   r   r   evaluate_predictions   s   

zQAMetrics.evaluate_predictionsc                 C   s|   t d|   t| d'}t|D ]}|j|j|j||j d}|t	|d  qW d    d S 1 s7w   Y  d S )NzWriting predictions to w)idcontextquestionpredicted_answer
)
r   infoopenr   rJ   context_textquestion_textwritejsondumps)output_filenamerM   predictionswriterexoutput_itemr   r   r   dump_predicted_answers_to_file   s   "z(QAMetrics.dump_predicted_answers_to_filec                    s   t d|   t| d:}t|D ],}|j|j|jg d}||j D ] |d  fdd|D  q#|t	
|d  qW d    d S 1 sJw   Y  d S )NzWriting nbest predictions to ru   )rv   rw   rx   nbest_predictionsr   c                    s   i | ]}| | qS r   r   )r   rB   rH   r   r   ra      s    z<QAMetrics.dump_nbest_predictions_to_file.<locals>.<dictcomp>rz   )r   r{   r|   r   rJ   r}   r~   ri   r   r   r   )r   rM   r   keys_to_dumpr   r   r   r   rH   r   dump_nbest_predictions_to_file   s   "z(QAMetrics.dump_nbest_predictions_to_fileN)r   )__name__
__module____qualname__staticmethodr   r   r   strr!   r"   r3   r4   rC   rU   r[   r_   rt   r   r   r   r   r   r   r      s:    






r   )r&   r   r   r   r:   r   &nemo.collections.nlp.parts.utils_funcsr   
nemo.utilsr   objectr   r   r   r   r   <module>   s   