o
    Nif                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlm  mZ	 ddl
mZ dZdZdZd	Zd
ZdZg dZG dd dejjZG dd dejjZdS )z;Reddit TIFU dataset using tifu or tldr from subreddit tifu.    )absolute_import)division)print_functionNa  
@misc{kim2018abstractive,
    title={Abstractive Summarization of Reddit Posts with Multi-level Memory Networks},
    author={Byeongchang Kim and Hyunwoo Kim and Gunhee Kim},
    year={2018},
    eprint={1811.00783},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
a  
Reddit dataset, where TIFU denotes the name of subbreddit /r/tifu.
As defined in the publication, styel "short" uses title as summary and
"long" uses tldr as summary.

Features includes:
  - document: post text without tldr.
  - tldr: tldr line.
  - title: trimmed title without tldr.
  - ups: upvotes.
  - score: score.
  - num_comments: number of comments.
  - upvote_ratio: upvote ratio.
zPhttps://drive.google.com/uc?export=download&id=1ffWfITKFMJeqjT8loC8aiCLRNJpc_XnF	documentstitletldr)upsnum_commentsscoreupvote_ratioc                       s*   e Zd ZdZejjd fdd	Z  ZS )RedditTifuConfigzBuilderConfig for RedditTifu.Nc                    s,   t t| jddtjdi| || _dS )zBuilderConfig for RedditTifu.

    Args:
      summary_key: key string of summary in downloaded json file.
      **kwargs: keyword arguments forwarded to super.
    versionz1.1.0N )superr   __init__tfdscoreVersionsummary_key)selfr   kwargs	__class__r   a/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/summarization/reddit_tifu.pyr   A   s   	

zRedditTifuConfig.__init__N)	__name__
__module____qualname____doc__r   r   disallow_positional_argsr   __classcell__r   r   r   r   r   >   s    r   c                   @   sF   e Zd ZdZededdededdgZdd Zd	d
 Z	dddZ
dS )
RedditTifuzReddit TIFU Dataset.shortzUsing title as summary.)namer   descriptionlongzUsing TLDR as summary.c                 C   sP   dd t D }|dd tttfD  tjj| ttj	
|t| jjfdtdS )Nc                 S   s    i | ]}|t jjg tjd qS ))shapedtype)r   featuresTensortffloat32.0kr   r   r   
<dictcomp>`   s    z$RedditTifu._info.<locals>.<dictcomp>c                 S   s   i | ]}|t j qS r   )r   r(   Textr,   r   r   r   r/   e   s    zhttps://github.com/ctr4si/MMN)builderr$   r(   supervised_keyshomepagecitation)_ADDITIONAL_FEATURESupdate	_DOCUMENT_TLDR_TITLEr   r   DatasetInfo_DESCRIPTIONr(   FeaturesDictbuilder_configr   	_CITATION)r   r(   r   r   r   _info_   s   

zRedditTifu._infoc                 C   s$   | t}tjjtjjd|idgS )zReturns SplitGenerators.path)r#   
gen_kwargs)download_and_extract_URLr   r   SplitGeneratorSplitTRAIN)r   
dl_managerdl_pathr   r   r   _split_generatorso   s   
zRedditTifu._split_generatorsNc              	   #   s    t jj|dH}t|D ]:\}}t| t d  t	 d  t
 d p)d i}| fddtD  |t rH|| jj rH||fV  qW d   dS 1 sTw   Y  dS )	zYields examples.rbselftext_without_tldrtrimmed_titler    c                    s   i | ]}| | qS r   r   r,   dr   r   r/      s    z1RedditTifu._generate_examples.<locals>.<dictcomp>N)r*   iogfileGFile	enumeratejsonloadsr7   stripr9   r8   r6   r5   r=   r   )r   r@   filinerr   rN   r   _generate_examplesy   s   

"zRedditTifu._generate_examplesr   )r   r   r   r   r   r9   r8   BUILDER_CONFIGSr?   rI   r[   r   r   r   r   r!   O   s     
r!   )r   
__future__r   r   r   rT   tensorflow.compat.v2compatv2r*   tensorflow_datasets.public_api
public_apir   r>   r;   rC   r7   r9   r8   r5   r   BuilderConfigr   GeneratorBasedBuilderr!   r   r   r   r   <module>   s    