o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlZddl	m
  mZ ddlmZ dZdZdZd	ZG d
d dejjZdS )z#XNLI: The Cross-Lingual NLI Corpus.    )absolute_import)division)print_functionNaJ  @InProceedings{conneau2018xnli,
  author = "Conneau, Alexis
                 and Rinott, Ruty
                 and Lample, Guillaume
                 and Williams, Adina
                 and Bowman, Samuel R.
                 and Schwenk, Holger
                 and Stoyanov, Veselin",
  title = "XNLI: Evaluating Cross-lingual Sentence Representations",
  booktitle = "Proceedings of the 2018 Conference on Empirical Methods
               in Natural Language Processing",
  year = "2018",
  publisher = "Association for Computational Linguistics",
  location = "Brussels, Belgium",
}aF  XNLI is a subset of a few thousand examples from MNLI which has been translated
into a 14 different languages (some low-ish resource). As with MNLI, the goal is
to predict textual entailment (does sentence A imply/contradict/neither sentence
B) and is a classification task (given two sentences, predict one of three
labels).
z/https://cims.nyu.edu/~sbowman/xnli/XNLI-1.0.zip)arbgdeelenesfrhiruswthtrurvizhc                   @   sF   e Zd ZdZejjdejddddgZdd Z	d	d
 Z
dd ZdS )Xnliz0XNLI: The Cross-Lingual NLI Corpus. Version 1.0.
plain_textz1.0.0z6New split API (https://tensorflow.org/datasets/splits)zPlain text import of XNLI)nameversiondescriptionc              
   C   sJ   t jj| tt jt jjtdt jjtdt jj	g dddd dt
dS )N)	languages)
entailmentneutralcontradiction)namespremise
hypothesislabelz)https://www.nyu.edu/projects/bowman/xnli/)builderr   featuressupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr#   FeaturesDictTranslation
_LANGUAGESTranslationVariableLanguages
ClassLabel	_CITATION)self r2   Q/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/text/xnli.py_infoJ   s$   z
Xnli._infoc                 C   s\   | t}tj|d}tjjtjj	dtj|didtjjtjj
dtj|didgS )NzXNLI-1.0filepathzxnli.test.tsv)r   
gen_kwargszxnli.dev.tsv)download_and_extract	_DATA_URLospathjoinr'   r(   SplitGeneratorSplitTEST
VALIDATION)r1   
dl_managerdl_dirdata_dirr2   r2   r3   _split_generators`   s   
zXnli._split_generatorsc           	      c   s    t t}tjj|}tj|dtj	d}|D ]}||d  
| qW d   n1 s/w   Y  t|D ]!}dd |D }dd |D }|d d |||d d	 d
fV  q9dS )z:This function returns the examples in the raw (text) form.	)	delimiterquotingpairIDNc                 S      i | ]	}|d  |d qS )language	sentence1r2   .0rowr2   r2   r3   
<dictcomp>v       z+Xnli._generate_examples.<locals>.<dictcomp>c                 S   rH   )rI   	sentence2r2   rK   r2   r2   r3   rN   w   rO   r   
gold_labelr   )collectionsdefaultdictlisttfiogfileGFilecsv
DictReader
QUOTE_NONEappendsix
itervalues)	r1   r5   rows_per_pair_idfreaderrM   rowsr   r    r2   r2   r3   _generate_examplesl   s"   


zXnli._generate_examplesN)__name__
__module____qualname____doc__r'   r(   BuilderConfigVersionBUILDER_CONFIGSr4   rC   rc   r2   r2   r2   r3   r   >   s    
r   )rg   
__future__r   r   r   rR   rY   r9   r]   tensorflow.compat.v2compatv2rU   tensorflow_datasets.public_api
public_apir'   r0   r*   r8   r-   r(   GeneratorBasedBuilderr   r2   r2   r2   r3   <module>   s   