o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlmZ ddl	m
  mZ ddlmZ dZdZG d	d
 d
ejjZG dd dejjZdS )z/SQUAD: The Stanford Question Answering Dataset.    )absolute_import)division)print_functionN)logginga  @article{2016arXiv160605250R,
       author = {{Rajpurkar}, Pranav and {Zhang}, Jian and {Lopyrev},
                 Konstantin and {Liang}, Percy},
        title = "{SQuAD: 100,000+ Questions for Machine Comprehension of Text}",
      journal = {arXiv e-prints},
         year = 2016,
          eid = {arXiv:1606.05250},
        pages = {arXiv:1606.05250},
archivePrefix = {arXiv},
       eprint = {1606.05250},
}
a/  Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.
c                       s(   e Zd ZdZejj fddZ  ZS )SquadConfigzBuilderConfig for SQUAD.c                    s   t t| jdi | dS )z^BuilderConfig for SQUAD.

    Args:
      **kwargs: keyword arguments forwarded to super.
    N )superr   __init__)selfkwargs	__class__r   `/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/question_answering/squad.pyr	   7   s   zSquadConfig.__init__)	__name__
__module____qualname____doc__tfdscoredisallow_positional_argsr	   __classcell__r   r   r   r   r   4   s    r   c                   @   sN   e Zd ZdZdZdZdZedej	
dddd	gZd
d Zdd Zdd ZdS )Squadz<SQUAD: The Stanford Question Answering Dataset. Version 1.1.z3https://rajpurkar.github.io/SQuAD-explorer/dataset/zdev-v1.1.jsonztrain-v1.1.json
plain_textz1.0.0z6New split API (https://tensorflow.org/datasets/splits)z
Plain text)nameversiondescriptionc                 C   sV   t jj| tt jtjt j t j t j t j	t j tj
ddd dtdS )N)textanswer_start)idtitlecontextquestionanswersz+https://rajpurkar.github.io/SQuAD-explorer/)builderr   featuressupervised_keyshomepagecitation)r   r   DatasetInfo_DESCRIPTIONr$   FeaturesDicttfstringTextSequenceint32	_CITATION)r
   r   r   r   _infoQ   s"   zSquad._infoc                 C   sh   t j| j| jt j| j| jd}||}tjj	tj
jd|d idtjj	tj
jd|d idgS )N)traindevfilepathr2   )r   
gen_kwargsr3   )ospathjoin_URL_TRAINING_FILE	_DEV_FILEdownload_and_extractr   r   SplitGeneratorSplitTRAIN
VALIDATION)r
   
dl_managerurls_to_downloaddownloaded_filesr   r   r   _split_generatorsk   s   


zSquad._split_generatorsc                 c   s    t d| tjj|[}t|}|d D ]H}|dd	 }|d D ]9}|d 	 }|d D ],}|d 	 }	|d	 }
d
d |d D }dd |d D }|
|||	|
||ddfV  q2q&qW d   dS 1 slw   Y  dS )z:This function returns the examples in the raw (text) form.zgenerating examples from = %sdatar    
paragraphsr    qasr!   r   c                 S   s   g | ]}|d  qS )r   r   .0answerr   r   r   
<listcomp>   s    z,Squad._generate_examples.<locals>.<listcomp>r"   c                 S   s   g | ]}|d    qS )r   )striprI   r   r   r   rL      s    )r   r   )r   r    r!   r   r"   N)
r   infor+   iogfileGFilejsonloadgetrM   )r
   r4   fsquadarticler   	paragraphr    qar!   id_answer_startsr"   r   r   r   _generate_examples{   s4   
"zSquad._generate_examplesN)r   r   r   r   r9   r;   r:   r   r   r   VersionBUILDER_CONFIGSr1   rD   r\   r   r   r   r   r   A   s"    
r   )r   
__future__r   r   r   rR   r6   abslr   tensorflow.compat.v2compatv2r+   tensorflow_datasets.public_api
public_apir   r0   r)   r   BuilderConfigr   GeneratorBasedBuilderr   r   r   r   r   <module>   s   