o
    Ni                     @   sl   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 dZ
dZd	d
 dD ZG dd de	jZdS )zWMT15: Translate dataset.    )absolute_import)division)print_functionN)wmtz1http://www.statmt.org/wmt15/translation-task.htmla  
@InProceedings{bojar-EtAl:2015:WMT,
  author    = {Bojar, Ond{r}ej  and  Chatterjee, Rajen  and  Federmann, Christian  and  Haddow, Barry  and  Huck, Matthias  and  Hokamp, Chris  and  Koehn, Philipp  and  Logacheva, Varvara  and  Monz, Christof  and  Negri, Matteo  and  Post, Matt  and  Scarton, Carolina  and  Specia, Lucia  and  Turchi, Marco},
  title     = {Findings of the 2015 Workshop on Statistical Machine Translation},
  booktitle = {Proceedings of the Tenth Workshop on Statistical Machine Translation},
  month     = {September},
  year      = {2015},
  address   = {Lisbon, Portugal},
  publisher = {Association for Computational Linguistics},
  pages     = {1--46},
  url       = {http://aclweb.org/anthology/W15-3001}
}
c                 C   s   g | ]}|d fqS )en ).0langr   r   W/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/translate/wmt15.py
<listcomp>)   s    r   )csdefifrruc                   @   s8   e Zd ZdZdd eD dd eD  Zedd ZdS )Wmt15Translatez>WMT 15 translation datasets for all {xx, "en"} language pairs.c              
   C   s8   g | ]\}}t jd ||f tt||ftjddqS )z(WMT 2015 %s-%s translation task dataset.1.0.0)descriptionurlcitationlanguage_pairversion)r   	WmtConfig_URL	_CITATIONtfdscoreVersionr   l1l2r   r   r
   r   1   s    

zWmt15Translate.<listcomp>c                 C   sP   g | ]$\}}t jd ||f tt||ftjjjtjjjdddtj	
ddqS )z>WMT 2015 %s-%s translation task dataset with subword encoding.
subwords8ki    )encoder_clsname
vocab_sizer   )r   r   r   r   text_encoder_configr   )r   r   r   r   r   featurestextTextEncoderConfigSubwordTextEncoderr   r   r   r   r   r
   r   9   s"    
c                 C   s(   t jjg dt jjg dt jjddgiS )N)
europarl_v7europarl_v8_16commoncrawlmultiunnewscommentary_v10gigafrenczeng_10yandexcorpuswikiheadlines_fiwikiheadlines_ru)newsdev2015newsdiscussdev2015newstest2014newstest2015newsdiscusstest2015)r   SplitTRAIN
VALIDATIONTEST)selfr   r   r
   _subsetsJ   s   zWmt15Translate._subsetsN)__name__
__module____qualname____doc___LANGUAGE_PAIRSBUILDER_CONFIGSpropertyr>   r   r   r   r
   r   .   s    r   )rB   
__future__r   r   r   tensorflow_datasets.public_api
public_apir   tensorflow_datasets.translater   r   r   rC   WmtTranslater   r   r   r   r
   <module>   s   