o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlm  mZ	 ddl
mZ dZdZdZG d	d
 d
ejjZG dd dejjZdS )zThe Multi-Genre NLI Corpus.    )absolute_import)division)print_functionNa  @InProceedings{N18-1101,
  author = "Williams, Adina
            and Nangia, Nikita
            and Bowman, Samuel",
  title = "A Broad-Coverage Challenge Corpus for
           Sentence Understanding through Inference",
  booktitle = "Proceedings of the 2018 Conference of
               the North American Chapter of the
               Association for Computational Linguistics:
               Human Language Technologies, Volume 1 (Long
               Papers)",
  year = "2018",
  publisher = "Association for Computational Linguistics",
  pages = "1112--1122",
  location = "New Orleans, Louisiana",
  url = "http://aclweb.org/anthology/N18-1101"
}
a  The Multi-Genre Natural Language Inference (MultiNLI) corpus is a
crowd-sourced collection of 433k sentence pairs annotated with textual
entailment information. The corpus is modeled on the SNLI corpus, but differs in
that covers a range of genres of spoken and written text, and supports a
distinctive cross-genre generalization evaluation. The corpus served as the
basis for the shared task of the RepEval 2017 Workshop at EMNLP in Copenhagen.
z7https://cims.nyu.edu/~sbowman/multinli/multinli_1.0.zipc                       s*   e Zd ZdZejjd fdd	Z  ZS )MultiNLIMismatchConfigz$BuilderConfig for MultiNLI Mismatch.Nc                    s,   t t| jdi | |ptjj | _dS )a  BuilderConfig for MultiNLI Mismatch.

    Args:
      text_encoder_config: `tfds.features.text.TextEncoderConfig`, configuration
        for the `tfds.features.text.TextEncoder` used for the features feature.
      **kwargs: keyword arguments forwarded to super.
    N )superr   __init__tfdsfeaturestextTextEncoderConfigtext_encoder_config)selfr   kwargs	__class__r   _/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/text/multi_nli_mismatch.pyr   ?   s   	zMultiNLIMismatchConfig.__init__)N)	__name__
__module____qualname____doc__r	   coredisallow_positional_argsr   __classcell__r   r   r   r   r   <   s    r   c                   @   s@   e Zd ZdZeddddgZdd Zdd	 Zd
d Zdd Z	dS )MultiNLIMismatchz?MultiNLI: The Stanford Question Answering Dataset. Version 1.1.
plain_textz0.0.1z
Plain text)nameversiondescriptionc              
   C   sR   t jj| tt jt jj| jjdt jj| jjdt jj| jjddd dt	dS )N)encoder_configpremise
hypothesislabelz-https://www.nyu.edu/projects/bowman/multinli/)builderr   r
   supervised_keyshomepagecitation)
r	   r   DatasetInfo_DESCRIPTIONr
   FeaturesDictTextbuilder_configr   	_CITATION)r   r   r   r   _infoX   s$   zMultiNLIMismatch._infoc                 c   s8    |  |D ]\}}d|d |d |d gV  qd S )N r!   r"   r#   )_generate_examplesjoin)r   filepath_exr   r   r   _vocab_text_genn   s    z MultiNLIMismatch._vocab_text_genc                 C   s   | t}tj|d}tj|d}tj|d}| jjd | | | jjd j	}| jjd 
| | jjd 
| | jjd 
| tjjtjjd|idtjjtjjd|idgS )	Nzmultinli_1.0zmultinli_1.0_train.txtzmultinli_1.0_dev_mismatched.txtr!   r"   r#   r2   )r   
gen_kwargs)download_and_extractROOT_URLospathr1   infor
   maybe_build_from_corpusr5   encodermaybe_set_encoderr	   r   SplitGeneratorSplitTRAIN
VALIDATION)r   
dl_managerdownloaded_dir	mnli_path
train_pathvalidation_pathr=   r   r   r   _split_generatorsr   s(   
z"MultiNLIMismatch._split_generatorsc                 c   sf    t tjj|dD ]%\}}|dkrqtj| }|d}||d |d |d dfV  qdS )zGenerate mnli mismatch examples.

    Args:
      filepath: a string

    Yields:
      dictionaries containing "premise", "hypothesis" and "label" strings
    rbr   	      r    N)		enumeratetfiogfileGFilecompatas_textstripsplit)r   r2   idxline
split_liner   r   r   r0      s   	
z#MultiNLIMismatch._generate_examplesN)
r   r   r   r   r   BUILDER_CONFIGSr.   r5   rH   r0   r   r   r   r   r   M   s    r   )r   
__future__r   r   r   r9   tensorflow.compat.v2rR   v2rN   tensorflow_datasets.public_api
public_apir	   r-   r)   r8   r   BuilderConfigr   GeneratorBasedBuilderr   r   r   r   r   <module>   s   	