o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlm  mZ	 ddl
mZ dZdZdZd	ZG d
d dejjZG dd dejjZdS )zEBLiMP dataset with minimal pairs of grammatical phenomena in English.    )absolute_import)division)print_functionNa+  
@article{warstadt2019blimp,
  title={BLiMP: A Benchmark of Linguistic Minimal Pairs for English},
  author={Warstadt, Alex and Parrish, Alicia and Liu, Haokun and Mohananey, Anhad and Peng, Wei, and Wang, Sheng-Fu and Bowman, Samuel R},
  journal={arXiv preprint arXiv:1912.00582},
  year={2019}
}
aI  
BLiMP is a challenge set for evaluating what language models (LMs) know about
major grammatical phenomena in English. BLiMP consists of 67 sub-datasets, each
containing 1000 minimal pairs isolating specific contrasts in syntax,
morphology, or semantics. The data is automatically generated according to
expert-crafted grammars.
z2https://github.com/alexwarstadt/blimp/tree/master/z;https://raw.githubusercontent.com/alexwarstadt/blimp/masterc                       s(   e Zd ZdZejj fddZ  ZS )BlimpConfigzBuilderConfig for Blimp.c                    s@   |}t }|d|7 }tt| jd||tjdd| dS )zBuilderConfig for Blimp.

    Args:
      paradigm_uid: string, UID of the linguistic paradigm
      **kwargs: keyword arguments forwarded to super.
    z,This configuration includes the paradigm {}.z0.1.0)namedescriptionversionN )_DESCRIPTIONformatsuperr   __init__tfdscoreVersion)selfparadigm_uidkwargsr   r   	__class__r	   R/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/text/blimp.pyr   4   s   

zBlimpConfig.__init__)	__name__
__module____qualname____doc__r   r   disallow_positional_argsr   __classcell__r	   r	   r   r   r   1   s    r   c                   @   s>   e Zd ZdZg dZdd eD Zdd Zdd Zd	d
 ZdS )BlimpzGMinimal grammatical and ungrammatical pairs of 67 linguistic paradigms.)Cadjunct_islandanaphor_gender_agreementanaphor_number_agreementanimate_subject_passiveanimate_subject_trans	causativecomplex_NP_island3coordinate_structure_constraint_complex_left_branch1coordinate_structure_constraint_object_extractiondeterminer_noun_agreement_1determiner_noun_agreement_2%determiner_noun_agreement_irregular_1%determiner_noun_agreement_irregular_2$determiner_noun_agreement_with_adj_2.determiner_noun_agreement_with_adj_irregular_1.determiner_noun_agreement_with_adj_irregular_2*determiner_noun_agreement_with_adjective_1$distractor_agreement_relational_noun$distractor_agreement_relative_clausedrop_argumentellipsis_n_bar_1ellipsis_n_bar_2 existential_there_object_raisingexistential_there_quantifiers_1existential_there_quantifiers_2!existential_there_subject_raisingexpletive_it_object_raising
inchoativeintransitive$irregular_past_participle_adjectivesirregular_past_participle_verbs)irregular_plural_subject_verb_agreement_1)irregular_plural_subject_verb_agreement_2 left_branch_island_echo_question"left_branch_island_simple_question$matrix_question_npi_licensor_presentnpi_present_1npi_present_2only_npi_licensor_presentonly_npi_scope	passive_1	passive_2principle_A_c_commandprinciple_A_case_1principle_A_case_2principle_A_domain_1principle_A_domain_2principle_A_domain_3principle_A_reconstruction'regular_plural_subject_verb_agreement_1'regular_plural_subject_verb_agreement_2(sentential_negation_npi_licensor_presentsentential_negation_npi_scopesentential_subject_islandsuperlative_quantifiers_1superlative_quantifiers_2tough_vs_raising_1tough_vs_raising_2
transitive	wh_islandwh_questions_object_gapwh_questions_subject_gap&wh_questions_subject_gap_long_distancewh_vs_that_no_gapwh_vs_that_no_gap_long_distancewh_vs_that_with_gap!wh_vs_that_with_gap_long_distancec                 C   s   g | ]}t |d qS ))r   )r   ).0paradigmr	   r	   r   
<listcomp>   s    
zBlimp.<listcomp>c                 C   s^   t jj| tt jt j t j t j t j t j tjtjtjtjtj	d
d t
tdS )N
sentence_goodsentence_badfieldlinguistics_termUIDsimple_LM_methodone_prefix_methodtwo_prefix_methodlexically_identicalpair_id)builderr   featuressupervised_keyshomepagecitation)r   r   DatasetInfor
   rp   FeaturesDictTexttfboolint32_PROJECT_URL	_CITATION)r   r	   r	   r   _info   s&   zBlimp._infoc                 C   sL   | j }|jdtd|jd gi}||}tjjtjj	d||j idgS )zReturns SplitGenerators./dataz.jsonlfilepath)r   
gen_kwargs)
builder_configr   join_DOWNLOAD_URLdownload_and_extractr   r   SplitGeneratorSplitTRAIN)r   
dl_managercfgdownload_urlsdownloaded_filesr	   r	   r   _split_generators   s   
zBlimp._split_generatorsc                 c   s    t jj|dC}|D ]7}t|}|d d |d  }|d |d |d |d |d |d	 |d
 |d |d |d d
}||fV  qW d   dS 1 sOw   Y  dS )zYields examples.rbri   _pairIDre   rf   rg   rh   rj   rk   rl   rm   rd   N)rw   iogfileGFilejsonloads)r   r   fline	line_dictid_featsr	   r	   r   _generate_examples   s&   
"zBlimp._generate_examplesN)	r   r   r   r   all_paradigmsBUILDER_CONFIGSr|   r   r   r	   r	   r	   r   r   H   s    Fr   )r   
__future__r   r   r   r   tensorflow.compat.v2compatv2rw   tensorflow_datasets.public_api
public_apir   r{   r
   rz   r   r   BuilderConfigr   GeneratorBasedBuilderr   r	   r	   r	   r   <module>   s   	