o
    Ni                     @   sn   d Z ddlmZ ddlmZ ddlmZ ddlZddlm  mZ	 ddl
mZ dZdZG dd	 d	ejjZdS )
zTiny Shakespeare dataset.    )absolute_import)division)print_functionNz@misc{
  author={Karpathy, Andrej},
  title={char-rnn},
  year={2015},
  howpublished={\url{https://github.com/karpathy/char-rnn}}
}a  40,000 lines of Shakespeare from a variety of Shakespeare's plays. Featured in Andrej Karpathy's blog post 'The Unreasonable Effectiveness of Recurrent Neural Networks': http://karpathy.github.io/2015/05/21/rnn-effectiveness/.

To use for e.g. character modelling:

```
d = tfds.load(name='tiny_shakespeare')['train']
d = d.map(lambda x: tf.strings.unicode_split(x['text'], 'UTF-8'))
# train split includes vocabulary for other splits
vocabulary = sorted(set(next(iter(d)).numpy()))
d = d.map(lambda x: {'cur_char': x[:-1], 'next_char': x[1:]})
d = d.unbatch()
seq_len = 100
batch_size = 2
d = d.batch(seq_len)
d = d.batch(batch_size)
```
c                   @   s4   e Zd ZdZejdZdd Zdd Z	dd Z
d	S )
TinyShakespearez!Tiny Shakespeare dataset builder.z1.0.0c                 C   s*   t jj| tt jdt j id dtdS )NtextzOhttps://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt)builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr	   FeaturesDictText	_CITATION)self r   ]/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/text/tiny_shakespeare.py_info@   s   zTinyShakespeare._infoc           
      C   s  | d}tjj|rtj|d}n|}tjj|d}|	 }W d   n1 s-w   Y  t
t|d }|d| ||d }}t
t|d }|d| ||d }}|}	tjjtjjd|dd	tjjtjjd
|dd	tjjtjjd|	dd	gS )zReturns SplitGenerators.zYhttps://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txtz	input.txtrNg?g      ?train)	split_key
split_text)name
gen_kwargs
validationtest)downloadtfiogfileisdirospathjoinGFilereadintlenr   r   SplitGeneratorSplitTRAIN
VALIDATIONTEST)
r   
dl_managerdownload_pathtxt_pathfr   i
train_textvalidation_text	test_textr   r   r   _split_generatorsJ   sB   
z!TinyShakespeare._split_generatorsc                 c   s    |}d|i}||fV  dS )zYields examples.r   Nr   )r   r   r   data_keyfeature_dictr   r   r   _generate_examplesw   s   z"TinyShakespeare._generate_examplesN)__name__
__module____qualname____doc__r   r   VersionVERSIONr   r9   r<   r   r   r   r   r   ;   s    
-r   )r@   
__future__r   r   r   r%   tensorflow.compat.v2compatv2r!   tensorflow_datasets.public_api
public_apir   r   r   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s   