o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddl	m
Z dZdZdZd	ZG d
d dejjZG dd dejjZdS )zai2_arc dataset.    )absolute_import)division)print_functionNae  
@article{allenai:arc,
      author    = {Peter Clark  and Isaac Cowhey and Oren Etzioni and Tushar Khot and
                    Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
      title     = {Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
      journal   = {arXiv:1803.05457v1},
      year      = {2018},
}
a8  
A new dataset of 7,787 genuine grade-school level, multiple-choice science 
 questions, assembled to encourage research in advanced question-answering. 
 The dataset is partitioned into a Challenge Set and an Easy Set, where the 
 former contains only questions answered incorrectly by both a retrieval-based
 algorithm and a word co-occurrence algorithm. We are also including a corpus
 of over 14 million science sentences relevant to the task, and an 
 implementation of three neural baseline models for this dataset.
 We pose ARC as a challenge to the community.
zhttps://allenai.org/data/arczFhttps://ai2-datasets.s3-us-west-2.amazonaws.com/arc/ARC-V1-Feb2018.zipc                       s    e Zd ZdZ fddZ  ZS )Ai2ArcConfigzBuilderConfig for Ai2ARC.c                    s(   t t| jddtjddi| dS )z_BuilderConfig for Ai2Arc.

    Args:
      **kwargs: keyword arguments forwarded to super.
    version1.0.0zNew split APIN )superr   __init__tfdscoreVersion)selfkwargs	__class__r   b/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/question_answering/ai2_arc.pyr
   8   s
   
zAi2ArcConfig.__init__)__name__
__module____qualname____doc__r
   __classcell__r   r   r   r   r   5   s    r   c                   @   sR   e Zd ZdZejdZedddedddgZ	dd	 Z
d
d ZdefddZdS )Ai2ArczThe AI2 ARC dataset.r   zARC-Challengez          Challenge Set of 2590 "hard" questions (those that both a retrieval and a co-occurrence method fail to answer correctly)
          )namedescriptionzARC-EasyzG          Easy Set of 5197 questions for the ARC Challenge. 
          c                 C   sf   g d}t jj| tt jt j t j t jt j t jj|ddt jj|ddd t	t
dS )N)ABCDE)namestextlabel)idquestionchoices	answerKey)builderr   featuressupervised_keyshomepagecitation)r   r   DatasetInfo_DESCRIPTIONr)   FeaturesDictTextSequence
ClassLabel	_HOMEPAGE	_CITATION)r   optionsr   r   r   _infoU   s"   zAi2Arc._infoc              
   C   s   | t}tj|d}tj|| jj}tjj	tj
jdtj|| jjd idtjj	tj
jdtj|| jjd idtjj	tj
jdtj|| jjd idgS )zReturns SplitGenerators.zARC-V1-Feb2018-2filepathz-Train.jsonl)r   
gen_kwargsz
-Dev.jsonlz-Test.jsonl)download_and_extract_URLospathjoinbuilder_configr   r   r   SplitGeneratorSplitTRAIN
VALIDATIONTEST)r   
dl_managerdl_dirdata_dir	base_pathr   r   r   _split_generatorsn   s8   



zAi2Arc._split_generatorsr7   c              	   #   s    t td d  tjj|J}|D ]>}t|} 	|d |d }|d }|d d }|d d }dd	 |D }	 fd
d	|D }
|||||	|
ddfV  qW d   dS 1 s`w   Y  dS )z<Yields examples. Compatible with huggingface's `nlp` format.z	1 2 3 4 5z	A B C D Er'   r$   r%   stemr&   c                 S   s   g | ]}|d  qS )r"   r   .0choicer   r   r   
<listcomp>   s    z-Ai2Arc._generate_examples.<locals>.<listcomp>c                    s    g | ]}  |d  |d  qS )r#   )getrJ   n_to_lr   r   rM      s    r!   )r$   r'   r%   r&   N)
dictzipsplittfiogfileGFilejsonloadsrN   )r   r7   frowdata	answerkeyid_r%   r&   text_choiceslabel_choicesr   rO   r   _generate_examples   s.   

"zAi2Arc._generate_examplesN)r   r   r   r   r   r   r   VERSIONr   BUILDER_CONFIGSr6   rH   strra   r   r   r   r   r   B   s     r   )r   
__future__r   r   r   rX   r;   
tensorflowrT   tensorflow_datasets.public_api
public_apir   r4   r.   r3   r:   r   BuilderConfigr   GeneratorBasedBuilderr   r   r   r   r   <module>   s   
