o
    Ni_                     @   sl   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddl	m
Z dZdZG dd	 d	ejjZdS )
zopenbookQA dataset.    )absolute_import)division)print_functionNa  
@article{mihaylov2018can,
  title={Can a suit of armor conduct electricity? a new dataset for open book question answering},
  author={Mihaylov, Todor and Clark, Peter and Khot, Tushar and Sabharwal, Ashish},
  journal={arXiv preprint arXiv:1809.02789},
  year={2018}
}
aJ  
The dataset contains 5,957 4-way multiple choice questions. Additionally, they
provide 5,167 crowd-sourced common knowledge facts, and an expanded version of
the train/dev/test questions where each question is associated with its
originating core fact, a human accuracy score, a clarity score, and an
anonymized crowd-worker ID.
c                   @   s4   e Zd ZdZejdZdd Zdd Z	dd Z
d	S )

Openbookqaz'QA dataset with common knowledge facts.z0.1.0c                 C   s   t jj| tt jt j t j t j t j t j dt j t jjdtj	dt jjdtj	dt j t jj
g dddddtd	S )
N)stemchoice_Achoice_Bchoice_Cchoice_D )shapedtype)ABCD)namesquestionfact1
humanScoreclarityturkIdAnonymized	answerKey)r   r   zDhttps://leaderboard.allenai.org/open_book_qa/submissions/get-started)builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDictTextTensortffloat32
ClassLabel	_CITATION)selfr   r   W/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/text/openbookqa.py_info2   s(   zOpenbookqa._infoc              	   C   s   d}| |}tj|d}tjjtjj|tj|dddtjjtjj	|tj|dddtjjtjj
|tj|dddgS )zReturns SplitGenerators.zMhttps://s3-us-west-2.amazonaws.com/ai2-website/data/OpenBookQA-V1-Sep2018.zipz%OpenBookQA-V1-Sep2018/Data/Additionalztrain_complete.jsonl)data_dirfilepath)name
gen_kwargszdev_complete.jsonlztest_complete.jsonl)download_and_extractospathjoinr    r!   SplitGeneratorSplitTRAIN
VALIDATIONTEST)r+   
dl_managerdownload_urldl_dirr.   r   r   r,   _split_generatorsN   s,   
zOpenbookqa._split_generatorsc              
   c   s    t jj|[}|D ]O}t|}i }|d d |d< |d d }|d d |d< |d d |d< |d	 d |d
< |d d |d< |d ||d |d |d |d |d dfV  qW d   dS 1 sfw   Y  dS )zYields examples.r   r   choicesr   textr      r      r	      r
   idr   r   r   r   r   r   N)r'   iogfileGFilejsonloads)r+   r.   r/   flinerowr   r?   r   r   r,   _generate_exampleso   s*   
"zOpenbookqa._generate_examplesN)__name__
__module____qualname____doc__r    r!   VersionVERSIONr-   r>   rM   r   r   r   r,   r   -   s    !r   )rQ   
__future__r   r   r   rH   r3   
tensorflowr'   tensorflow_datasets.public_api
public_apir    r*   r#   r!   GeneratorBasedBuilderr   r   r   r   r,   <module>   s   		