o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlm  mZ	 ddl
mZ dZdZG dd	 d	ejjZG d
d dejjZdS )zThe Multi-Genre NLI Corpus.    )absolute_import)division)print_functionNa  @InProceedings{N18-1101,
  author = "Williams, Adina
            and Nangia, Nikita
            and Bowman, Samuel",
  title = "A Broad-Coverage Challenge Corpus for
           Sentence Understanding through Inference",
  booktitle = "Proceedings of the 2018 Conference of
               the North American Chapter of the
               Association for Computational Linguistics:
               Human Language Technologies, Volume 1 (Long
               Papers)",
  year = "2018",
  publisher = "Association for Computational Linguistics",
  pages = "1112--1122",
  location = "New Orleans, Louisiana",
  url = "http://aclweb.org/anthology/N18-1101"
}
a  The Multi-Genre Natural Language Inference (MultiNLI) corpus is a
crowd-sourced collection of 433k sentence pairs annotated with textual
entailment information. The corpus is modeled on the SNLI corpus, but differs in
that covers a range of genres of spoken and written text, and supports a
distinctive cross-genre generalization evaluation. The corpus served as the
basis for the shared task of the RepEval 2017 Workshop at EMNLP in Copenhagen.
c                       s*   e Zd ZdZejjd fdd	Z  ZS )MultiNLIConfigzBuilderConfig for MultiNLI.Nc                    s:   t t| jddtjddi| |ptjj | _	dS )a  BuilderConfig for MultiNLI.

    Args:
      text_encoder_config: `tfds.features.text.TextEncoderConfig`, configuration
        for the `tfds.features.text.TextEncoder` used for the features feature.
      **kwargs: keyword arguments forwarded to super.
    versionz1.0.0z6New split API (https://tensorflow.org/datasets/splits)N )
superr   __init__tfdscoreVersionfeaturestextTextEncoderConfigtext_encoder_config)selfr   kwargs	__class__r   V/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/text/multi_nli.pyr	   =   s   	zMultiNLIConfig.__init__)N)	__name__
__module____qualname____doc__r
   r   disallow_positional_argsr	   __classcell__r   r   r   r   r   :   s    r   c                   @   s>   e Zd ZdZedddgZdd Zdd Zd	d
 Zdd Z	dS )MultiNLIz?MultiNLI: The Stanford Question Answering Dataset. Version 1.1.
plain_textz
Plain text)namedescriptionc              
   C   sR   t jj| tt jt jj| jjdt jj| jjdt jj	g dddd dt
dS )N)encoder_config)
entailmentneutralcontradiction)namespremise
hypothesislabelz-https://www.nyu.edu/projects/bowman/multinli/)builderr   r   supervised_keyshomepagecitation)r
   r   DatasetInfo_DESCRIPTIONr   FeaturesDictTextbuilder_configr   
ClassLabel	_CITATION)r   r   r   r   _infoX   s$   zMultiNLI._infoc                 c   s2    |  |D ]\}}d|d |d gV  qd S )N r&   r'   )_generate_examplesjoin)r   filepath_exr   r   r   _vocab_text_genn   s   zMultiNLI._vocab_text_genc                 C   s   | d}tj|d}tj|d}tj|d}tj|d}| jjd | | | jjd j}| jjd 	| | jjd 	| t
jjt
jjd|id	t
jjd
d|id	t
jjdd|id	gS )Nz7https://cims.nyu.edu/~sbowman/multinli/multinli_1.0.zipzmultinli_1.0zmultinli_1.0_train.txtzmultinli_1.0_dev_matched.txtzmultinli_1.0_dev_mismatched.txtr&   r'   r8   )r   
gen_kwargsvalidation_matchedvalidation_mismatched)download_and_extractospathr7   infor   maybe_build_from_corpusr;   encodermaybe_set_encoderr
   r   SplitGeneratorSplitTRAIN)r   
dl_managerdownloaded_dir	mnli_path
train_pathmatched_validation_pathmismatched_validation_pathrD   r   r   r   _split_generatorsr   s<   zMultiNLI._split_generatorsc                 c   st    t tjj|dD ],\}}|dkrqtj| }|d}|d dkr(q||d |d |d dfV  qdS )	zGenerate mnli examples.

    Args:
      filepath: a string

    Yields:
      dictionaries containing "premise", "hypothesis" and "label" strings
    rbr   	-      r%   N)		enumeratetfiogfileGFilecompatas_textstripsplit)r   r8   idxline
split_liner   r   r   r6      s   	
zMultiNLI._generate_examplesN)
r   r   r   r   r   BUILDER_CONFIGSr4   r;   rO   r6   r   r   r   r   r   N   s     r   )r   
__future__r   r   r   r@   tensorflow.compat.v2rZ   v2rV   tensorflow_datasets.public_api
public_apir
   r3   r.   r   BuilderConfigr   GeneratorBasedBuilderr   r   r   r   r   <module>   s   
