o
    Ni                     @   sz   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm  m	Z
 ddlmZ dZdZdZG d	d
 d
ejjZdS )zDPassage, query, answers and answer classification with explanations.    )absolute_import)division)print_functionNa  
@unpublished{eraser2019,
    title = {ERASER: A Benchmark to Evaluate Rationalized NLP Models},
    author = {Jay DeYoung and Sarthak Jain and Nazneen Fatema Rajani and Eric Lehman and Caiming Xiong and Richard Socher and Byron C. Wallace}
}
@inproceedings{MultiRC2018,
    author = {Daniel Khashabi and Snigdha Chaturvedi and Michael Roth and Shyam Upadhyay and Dan Roth},
    title = {Looking Beyond the Surface:A Challenge Set for Reading Comprehension over Multiple Sentences},
    booktitle = {NAACL},
    year = {2018}
}
a^  
Eraser Multi RC is a dataset for queries over multi-line passages, along with
answers and a rationalte. Each example in this dataset has the following 5 parts
1. A Mutli-line Passage
2. A Query about the passage
3. An Answer to the query
4. A Classification as to whether the answer is right or wrong
5. An Explanation justifying the classification
z4http://www.eraserbenchmark.com/zipped/multirc.tar.gzc                   @   s4   e Zd ZdZejdZdd Zdd Z	dd Z
d	S )
EraserMultiRcz>Multi Sentence Reasoning with Explanations (Eraser Benchmark).z0.1.1c                 C   sR   t jj| tt jt j t j t jjddgdt jt j dd dt	dS )NFalseTrue)namespassagequery_and_answerlabel	evidencesz'https://cogcomp.seas.upenn.edu/multirc/)builderdescriptionfeaturessupervised_keyshomepagecitation)
tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDictText
ClassLabelSequence	_CITATION)self r   \/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/text/eraser_multi_rc.py_info;   s   zEraserMultiRc._infoc              	   C   s   | t}tj|d}tjjtjj	|tj|dddtjjtjj
|tj|dddtjjtjj|tj|dddgS )zReturns SplitGenerators.multircztrain.jsonl)data_dirfilepath)name
gen_kwargsz	val.jsonlz
test.jsonl)download_and_extract_DOWNLOAD_URLospathjoinr   r   SplitGeneratorSplitTRAIN
VALIDATIONTEST)r   
dl_managerdl_dirr"   r   r   r   _split_generatorsJ   s*   
zEraserMultiRc._split_generatorsc              
   c   s    t j|d}tjj|[}|D ]O}t|}g }|d d D ]}|d }	|	|d  q!t j||	}
tjj|
}|
 }W d   n1 sLw   Y  |d ||d |d	 |d
fV  qW d   dS 1 smw   Y  dS )zYields examples.docsr   r   docidtextNannotation_idqueryclassificationr	   )r(   r)   r*   tfiogfileGFilejsonloadsappendread)r   r"   r#   multirc_dirflinerowr   evidencer4   passage_filef1passage_textr   r   r   _generate_examplesd   s*   

"z EraserMultiRc._generate_examplesN)__name__
__module____qualname____doc__r   r   VersionVERSIONr    r2   rI   r   r   r   r   r   6   s    r   )rM   
__future__r   r   r   r=   r(   tensorflow.compat.v2compatv2r9   tensorflow_datasets.public_api
public_apir   r   r   r'   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s   
