o
    Ni                     @   sv   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddl	m
Z dZddd	Zd
ZG dd dejjZdS ).WebQuestions Benchmark for Question Answering.    )absolute_import)division)print_functionNa%  
@inproceedings{berant-etal-2013-semantic,
    title = "Semantic Parsing on {F}reebase from Question-Answer Pairs",
    author = "Berant, Jonathan  and
      Chou, Andrew  and
      Frostig, Roy  and
      Liang, Percy",
    booktitle = "Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing",
    month = oct,
    year = "2013",
    address = "Seattle, Washington, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/D13-1160",
    pages = "1533--1544",
}
z]https://worksheets.codalab.org/rest/bundles/0x4a763f8cde224c2da592b75f29e2f5c2/contents/blob/z]https://worksheets.codalab.org/rest/bundles/0xe7bac352fce7448c9ef238fb0a297ec2/contents/blob/)traintesta  This dataset consists of 6,642 question/answer pairs.
The questions are supposed to be answerable by Freebase, a large knowledge graph.
The questions are mostly centered around a single named entity.
The questions are popular ones asked on the web (at least in 2013).
c                   @   s4   e Zd ZdZejdZdd Zdd Z	dd Z
d	S )
WebQuestionsr   z1.0.0c                 C   sB   t jj| tt jt j t j t jt j dd dtdS )NurlquestionanswerszLhttps://worksheets.codalab.org/worksheets/0xba659fe363cb46e7a505c5b6a774dc8a)builderdescriptionfeaturessupervised_keyshomepagecitation)	tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDictTextSequence	_CITATION)self r   h/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/question_answering/web_questions.py_infoB   s   zWebQuestions._infoc                 C   s   | t}dd | D S )zReturns SplitGenerators.c                 S   s$   g | ]\}}t jj|d |idqS )	file_path)name
gen_kwargs)r   r   SplitGenerator).0splitr   r   r   r   
<listcomp>W   s    z2WebQuestions._split_generators.<locals>.<listcomp>)download_SPLIT_DOWNLOAD_URLitems)r   
dl_manager
file_pathsr   r   r   _split_generatorsS   s   
zWebQuestions._split_generatorsc                 c   s|    dd }t jj|(}t|}t|D ]\}}||d |d ||d dfV  qW d   dS 1 s7w   Y  dS )z&Parses split file and yields examples.c                 S   s$   t dd| } dd t d| D S )Nz^\(list |\)$ c                 S   s   g | ]}d  |qS )r,   )join)r#   ansr   r   r   r%   b   s    
zOWebQuestions._generate_examples.<locals>._target_to_answers.<locals>.<listcomp>z*\(description (?:"([^"]+?)"|([^)]+?))\)\w*)resubfindall)targetr   r   r   _target_to_answers`   s   
z;WebQuestions._generate_examples.<locals>._target_to_answersr
   	utterancetargetValuer	   N)tfiogfileGFilejsonload	enumerate)r   r   r3   fexamplesiexr   r   r   _generate_examples]   s   

"zWebQuestions._generate_examplesN)__name__
__module____qualname____doc__r   r   VersionVERSIONr   r+   rA   r   r   r   r   r   =   s    
r   )rE   
__future__r   r   r   r:   r/   
tensorflowr6   tensorflow_datasets.public_api
public_apir   r   r'   r   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s   