o
    Ni                     @   sz   d Z ddlmZ ddlmZ ddlmZ ddlZddlm  mZ	 ddl
mZ dZdZdZd	Zd
ZG dd dejjZdS )z'GAP is a gender-balanced text data set.    )absolute_import)division)print_functionNay  
@article{DBLP:journals/corr/abs-1810-05201,
  author    = {Kellie Webster and
               Marta Recasens and
               Vera Axelrod and
               Jason Baldridge},
  title     = {Mind the {GAP:} {A} Balanced Corpus of Gendered Ambiguous Pronouns},
  journal   = {CoRR},
  volume    = {abs/1810.05201},
  year      = {2018},
  url       = {http://arxiv.org/abs/1810.05201},
  archivePrefix = {arXiv},
  eprint    = {1810.05201},
  timestamp = {Tue, 30 Oct 2018 20:39:56 +0100},
  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1810-05201},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
z
GAP is a gender-balanced dataset containing 8,908 coreference-labeled pairs of 
(ambiguous pronoun, antecedent name), sampled from Wikipedia and released by 
Google AI Language for the evaluation of coreference resolution in practical 
applications.
zehttps://raw.githubusercontent.com/google-research-datasets/gap-coreference/master/gap-development.tsvzdhttps://raw.githubusercontent.com/google-research-datasets/gap-coreference/master/gap-validation.tsvz^https://raw.githubusercontent.com/google-research-datasets/gap-coreference/master/gap-test.tsvc                   @   s4   e Zd ZdZejdZdd Zdd Z	dd Z
d	S )
GapzGAP is a gender-balanced dataset.

  It contains 8,908 coreference-labeled pairs
  of (ambiguous pronoun, antecedent name), sampled from Wikipedia.
  z0.1.0c                 C   sf   t jj| tt jt j t j t j tjt j tjtj	t j tjtj	t j dd dt
dS )N)IDTextPronounzPronoun-offsetAzA-offsetzA-corefBzB-offsetzB-corefURLz;https://github.com/google-research-datasets/gap-coreference)builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDictr   tfint32bool	_CITATION)self r   P/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/text/gap.py_infoD   s(   z	Gap._infoc                 C   sd   | tttd}tjjtjjd|d idtjjtjj	d|d idtjjtjj
d|d idgS )zReturns SplitGenerators.)train
validationtestfilepathr   )name
gen_kwargsr    r!   )download_and_extract	_TRAINURL_VALIDATIONURL_TESTURLr   r   SplitGeneratorSplitTRAIN
VALIDATIONTEST)r   
dl_manager	directoryr   r   r   _split_generatorsZ   s$   


zGap._split_generatorsc                 c   s`    t jj|}tj|dd}t|D ]	\}}||fV  qW d   dS 1 s)w   Y  dS )zYields examples.z	excel-tab)dialectN)r   iogfileGFilecsv
DictReader	enumerate)r   r"   tsvfilereaderirowr   r   r   _generate_examplesp   s   "zGap._generate_examplesN)__name__
__module____qualname____doc__r   r   VersionVERSIONr   r0   r<   r   r   r   r   r   ;   s    r   )r@   
__future__r   r   r   r5   tensorflow.compat.v2compatv2r   tensorflow_datasets.public_api
public_apir   r   r   r&   r'   r(   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s   