o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlmZm	Z	m
Z
mZmZ ddlm  mZ ddlmZ dZdZd	Zd
ZdZG dd dejjZdS )zSAMSum dataset.    )absolute_import)division)print_functionN)DictIteratorListTextTuplea  
@article{gliwa2019samsum,
  title={SAMSum Corpus: A Human-annotated Dialogue Dataset for Abstractive Summarization},
  author={Gliwa, Bogdan and Mochol, Iwona and Biesek, Maciej and Wawer, Aleksander},
  journal={arXiv preprint arXiv:1911.12237},
  year={2019}
}
z
SAMSum Corpus contains over 16k chat dialogues with manually annotated
summaries.

There are two features:

  - dialogue: text of dialogue.
  - summary: human written summary of the dialogue.
  - id: id of a example.

dialoguesummaryidc                
   @   sz   e Zd ZdZejdZdZdejj	fddZ
dejjdeejj fdd	Z	
ddedeeeeeef f  fddZd
S )SamsumzSAMSum dataset builder.z1.0.0z  Download https://arxiv.org/src/1911.12237v2/anc/corpus.7z, decompress and
  place train.json, val.json and test.json in the manual follder.
  returnc                 C   sB   t jj| tt jtt j tt j t	t j ittfdt
dS )Nz&https://arxiv.org/src/1911.12237v2/anc)builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDict	_DOCUMENTr   _SUMMARY_ID	_CITATION)self r   \/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/summarization/samsum.py_infoA   s   


zSamsum._info
dl_managerc              	   C   sj   t jjt jjdtj|jdidt jjt jj	dtj|jdidt jjt jj
dtj|jdidgS )zReturns SplitGenerators.pathz
train.json)name
gen_kwargszval.jsonz	test.json)r   r   SplitGeneratorSplitTRAINosr#   join
manual_dir
VALIDATIONTEST)r   r"   r   r   r    _split_generatorsO   s    zSamsum._split_generatorsNr#   c                 c   sV    t jj|d}t|D ]	}|t |fV  qW d   dS 1 s$w   Y  dS )zYields examples.rbN)tfiogfileGFilejsonloadr   )r   r#   fexampler   r   r    _generate_examplesh   s   "zSamsum._generate_examples)N)__name__
__module____qualname____doc__r   r   VersionVERSIONMANUAL_DOWNLOAD_INSTRUCTIONSr   r!   downloadDownloadManagerr   r&   r.   r   r   r	   r   r8   r   r   r   r    r   8   s     

r   )r<   
__future__r   r   r   r4   r)   typingr   r   r   r   r	   tensorflow.compat.v2compatv2r0   tensorflow_datasets.public_api
public_apir   r   r   r   r   r   r   GeneratorBasedBuilderr   r   r   r   r    <module>   s   	