o
    Ni"                     @   sz   d Z ddlmZ ddlmZ ddlmZ ddlZddlm  mZ	 ddl
mZ dZdZdZd	Zd
ZG dd dejjZdS )Multi-News dataset.    )absolute_import)division)print_functionNaJ  
@misc{alex2019multinews,
    title={Multi-News: a Large-Scale Multi-Document Summarization Dataset and Abstractive Hierarchical Model},
    author={Alexander R. Fabbri and Irene Li and Tianwei She and Suyi Li and Dragomir R. Radev},
    year={2019},
    eprint={1906.01749},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
aP  
Multi-News, consists of news articles and human-written summaries
of these articles from the site newser.com.
Each summary is professionally written by editors and
includes links to the original articles cited.

There are two features:
  - document: text of news articles seperated by special token "|||||".
  - summary: news summary.
zPhttps://drive.google.com/uc?export=download&id=1vRY2wM6rlOZrf9exGTm5pXj5ExlVwJ0Cdocumentsummaryc                   @   s6   e Zd ZdZejdZdd Zdd Z	d
dd	Z
dS )	MultiNewsr   z1.0.0c              
   C   s8   t jj| tt jtt j tt j ittfdt	dS )Nz)https://github.com/Alex-Fabbri/Multi-News)builderdescriptionfeaturessupervised_keyshomepagecitation)
tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDict	_DOCUMENTText_SUMMARY	_CITATION)self r   `/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/summarization/multi_news.py_info=   s   

zMultiNews._infoc              	   C   sx   t j|td}tjjtjj	dt j|didtjjtjj
dt j|didtjjtjjdt j|didgS )zReturns SplitGenerators.zmulti-news-originalpathtrain)name
gen_kwargsvaltest)osr   joindownload_and_extract_URLr   r   SplitGeneratorSplitTRAIN
VALIDATIONTEST)r   
dl_managerextract_pathr   r   r   _split_generatorsJ   s    
zMultiNews._split_generatorsNc              
   c   s    t jjtj|d P}t jjtj|d *}tt||D ]\}\}}|t	|
 ddt|
 difV  q$W d   n1 sIw   Y  W d   dS W d   dS 1 saw   Y  dS )zYields examples.z.srcz.tgtNEWLINE_CHAR
z- N)tfiogfileGFiler"   r   r#   	enumeratezipr   stripreplacer   lstrip)r   r   src_ftgt_fisrc_linetgt_liner   r   r   _generate_examples]   s    

PzMultiNews._generate_examples)N)__name__
__module____qualname____doc__r   r   VersionVERSIONr   r-   r>   r   r   r   r   r   8   s    r   )rB   
__future__r   r   r   r"   tensorflow.compat.v2compatv2r0   tensorflow_datasets.public_api
public_apir   r   r   r%   r   r   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s   