o
    NiY                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm  m	Z
 ddlmZ dZdZg dZG d	d
 d
ejjZdd ZdS )zThe scicite dataset.    )absolute_import)division)print_functionNz
@InProceedings{Cohan2019Structural,
  author={Arman Cohan and Waleed Ammar and Madeleine Van Zuylen and Field Cady},
  title={Structural Scaffolds for Citation Intent Classification in Scientific Publications},
  booktitle="NAACL",
  year="2019"
}
a  
This is a dataset for classifying citation intents in academic papers.
The main citation intent label for each Json object is specified with the label
key while the citation context is specified in with a context key. Example:
{
 'string': 'In chacma baboons, male-infant relationships can be linked to both
    formation of friendships and paternity success [30,31].'
 'sectionName': 'Introduction',
 'label': 'background',
 'citingPaperId': '7a6b2d4b405439',
 'citedPaperId': '9d1abadc55b5e0',
 ...
 }
You may obtain the full information about the paper using the provided paper ids
with the Semantic Scholar API (https://api.semanticscholar.org/).
The labels are:
Method, Background, Result
)
properNoun	andPhraseacronym
etAlPhraseexplicitacronymParennanc                   @   s6   e Zd ZdZejdZdd Zdd Z	d
dd	Z
dS )ScicitezFThis is a dataset for classifying citation intents in academic papers.z1.0.0c                 C   s   t jj| tt jt j t j t jjg ddt j t j tj	tj
t jjg ddtjtjt jjtdtjtjt j dddtdS )N)method
backgroundresult)names)
supportivenot_supportivecant_determinenone)stringsectionNamelabelcitingPaperIdcitedPaperIdexcerpt_indexisKeyCitationlabel2citeEnd	citeStartsourcelabel_confidencelabel2_confidenceid)r   r   z"https://github.com/allenai/scicite)builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr%   FeaturesDictText
ClassLabeltfint32boolint64_SOURCE_NAMESfloat32	_CITATION)self r8   T/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/text/scicite.py_infoE   s2   $zScicite._infoc              	   C   s   | ddi}tj|d d}tjjtjjdtj|didtjjtjj	dtj|didtjjtjj
dtj|didgS )zReturns SplitGenerators.scicitezIhttps://s3-us-west-2.amazonaws.com/ai2-s2-research/scicite/scicite.tar.gzpathztrain.jsonl)name
gen_kwargsz	dev.jsonlz
test.jsonl)download_and_extractosr<   joinr)   r*   SplitGeneratorSplitTRAIN
VALIDATIONTEST)r7   
dl_managerdl_pathsr<   r8   r8   r9   _split_generatorsu   s$   zScicite._split_generatorsNc                 c   s   t jj|t}i }|D ]f}t|}t|d }||v rqd||< ||d t|d t|d t|d t|d t|d t|d	 t|	d
dt
|d t
|d t|d t|	ddt|	ddt|d dfV  qW d   dS 1 sw   Y  dS )zYields examples.	unique_idTr   r   r   r   r   r   r   r   r   r   r   r   r    g        r!   r"   )r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   N)r0   iogfileGFilejsonloadsstrintr2   get	_safe_intfloat)r7   r<   f
unique_idslinedrJ   r8   r8   r9   _generate_examples   s6   










"zScicite._generate_examples)N)__name__
__module____qualname____doc__r)   r*   VersionVERSIONr:   rI   rY   r8   r8   r8   r9   r   @   s    0r   c                 C   s    zt | W S  ty   Y dS w )N)rQ   
ValueError)ar8   r8   r9   rS      s
   
rS   )r]   
__future__r   r   r   rN   r@   tensorflow.compat.v2compatv2r0   tensorflow_datasets.public_api
public_apir)   r6   r,   r4   r*   GeneratorBasedBuilderr   rS   r8   r8   r8   r9   <module>   s   	g