o
    Ni                     @   sz   d Z ddlmZ ddlmZ ddlmZ ddlZddlm  mZ	 ddl
mZ dZdZdZd	Zd
ZG dd dejjZdS )Opinosis Opinion Dataset.    )absolute_import)division)print_functionNa  
@inproceedings{ganesan2010opinosis,
  title={Opinosis: a graph-based approach to abstractive summarization of highly redundant opinions},
  author={Ganesan, Kavita and Zhai, ChengXiang and Han, Jiawei},
  booktitle={Proceedings of the 23rd International Conference on Computational Linguistics},
  pages={340--348},
  year={2010},
  organization={Association for Computational Linguistics}
}
z
The Opinosis Opinion Dataset consists of sentences extracted from reviews for 51 topics.
Topics and opinions are obtained from Tripadvisor, Edmunds.com and Amazon.com.
zThttps://github.com/kavgan/opinosis-summarization/raw/master/OpinosisDataset1.0_0.zipreview_sents	summariesc                   @   s6   e Zd ZdZejdZdd Zdd Z	d
dd	Z
dS )Opinosisr   z1.0.0c                 C   s@   t jj| tt jtt j tt j	t j ittfdt
dS )Nz#http://kavita-ganesan.com/opinosis/)builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDict_REVIEW_SENTSText
_SUMMARIESSequence	_CITATION)self r   ^/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/summarization/opinosis.py_info7   s   
zOpinosis._infoc                 C   s$   | t}tjjtjjd|idgS )zReturns SplitGenerators.path)name
gen_kwargs)download_and_extract_URLr   r   SplitGeneratorSplitTRAIN)r   
dl_managerextract_pathr   r   r   _split_generatorsD   s   
zOpinosis._split_generatorsNc              
   c   s   t j|d}tjj|}|D ]w}t j||}|dd }tjj|d}|	 }W d   n1 s8w   Y  t j|d|}	g }
t
tjj|	D ],}t j|	|}tjj|d}|	  }|
| W d   n1 sww   Y  qP|
}|t|t|ifV  qdS )zYields examples.topicsz.txtr   rbNzsummaries-gold)osr   jointfiogfilelistdirsplitGFilereadsortedstripappendr   r   )r   r   topics_path	filenamesfilename	file_path
topic_namesrc_f
input_datasummaries_pathsummary_lstsumm_filenametgt_fdatasummary_datar   r   r   _generate_examplesN   s*   
zOpinosis._generate_examples)N)__name__
__module____qualname____doc__r   r   VersionVERSIONr   r'   rC   r   r   r   r   r   2   s    
r   )rG   
__future__r   r   r   r*   tensorflow.compat.v2compatv2r,   tensorflow_datasets.public_api
public_apir   r   r   r!   r   r   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s   