o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm  m	Z
 ddlmZ dZdZdZd	Zd
ZG dd dejjZdS )BillSum Dataset.    )absolute_import)division)print_functionNz
@misc{kornilova2019billsum,
    title={BillSum: A Corpus for Automatic Summarization of US Legislation},
    author={Anastassia Kornilova and Vlad Eidelman},
    year={2019},
    eprint={1910.00523},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
a;  
BillSum, summarization of US Congressional and California state bills.

There are several features:
  - text: bill text.
  - summary: summary of the bills.
  - title: title of the bills.
features for us bills. ca bills does not have.
  - text_len: number of chars in text.
  - sum_len: number of chars in summary.
zPhttps://drive.google.com/uc?export=download&id=1g89WgFHMRbr4QrvA0ngh26PY081Nv3lxtextsummaryc                   @   s6   e Zd ZdZejdZdd Zdd Z	d
dd	Z
dS )Billsumr   z3.0.0c                 C   sB   t jj| tt jtt j tt j dt j ittfdt	dS )Ntitlez%https://github.com/FiscalNote/BillSum)builderdescriptionfeaturessupervised_keyshomepagecitation)
tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDict	_DOCUMENTText_SUMMARY	_CITATION)self r   ]/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/summarization/billsum.py_infoB   s   


zBillsum._infoc                 C   sp   | t}tjjtjjtj	|ddddtjjtjj
tj	|ddddtjjdtj	|ddddgS )	zReturns SplitGenerators.z"us_train_data_final_OFFICIAL.jsonlbill_id)pathkey)name
gen_kwargsz!us_test_data_final_OFFICIAL.jsonlca_testz!ca_test_data_final_OFFICIAL.jsonlexternal_id)download_and_extract_URLr   r   SplitGeneratorSplitTRAINosr   joinTEST)r   
dl_managerdl_pathr   r   r   _split_generatorsP   s(   
		zBillsum._split_generatorsNc                 #   sl    t jj|$}|D ]}t|  |  fddttdfD fV  qW d   dS 1 s/w   Y  dS )zYields examples.c                    s   i | ]}| | qS r   r   ).0kdr   r   
<dictcomp>z   s    z.Billsum._generate_examples.<locals>.<dictcomp>r	   N)tfiogfileGFilejsonloadsr   r   )r   r   r   fliner   r1   r   _generate_examplesq   s   
$"zBillsum._generate_examples)NN)__name__
__module____qualname____doc__r   r   VersionVERSIONr   r.   r<   r   r   r   r   r   :   s    !r   )r@   
__future__r   r   r   r8   r)   tensorflow.compat.v2compatv2r4   tensorflow_datasets.public_api
public_apir   r   r   r%   r   r   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s   