o
    Ni                     @   s   d Z ddlZddlmZ ddlmZ ddlm  mZ ej	j
Z
[ddlmZ ddlmZ ejZg dZeddd	 ed
dd edded dZdZdZdZdZdZdZdZdd Zdd Zdd Zdd Zdd Zd d! Z e!d"kr|e"e  dS dS )#zGenerate the minimal source code for a new dataset.

python -m tensorflow_datasets.scripts.create_new_dataset \
  --dataset dataset_name \
  --type dataset_type

    N)app)flags)naming)py_utils)
audioimageimage_classificationobject_detectionquestion_answering
structuredsummarizationtext	translatevideotfds_dirz&Root directory of tfds (auto-computed)datasetzDataset nametypezDataset typez"""{dataset_name} dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

z/import tensorflow_datasets.public_api as tfds

zmimport tensorflow_datasets.public_api as tfds
from tensorflow_datasets.{dataset_type} import {dataset_name}

z/# {TODO}: BibTeX citation
_CITATION = """
"""

z"# {TODO}:
_DESCRIPTION = """
"""

a  
class {dataset_cls}(tfds.core.GeneratorBasedBuilder):
  """{TODO}: Short description of my dataset."""

  # {TODO}: Set up version.
  VERSION = tfds.core.Version('0.1.0')

  def _info(self):
    # {TODO}: Specifies the tfds.core.DatasetInfo object
    return tfds.core.DatasetInfo(
        builder=self,
        # This is the description that will appear on the datasets page.
        description=_DESCRIPTION,
        # tfds.features.FeatureConnectors
        features=tfds.features.FeaturesDict({{
            # These are the features of your dataset like images, labels ...
        }}),
        # If there's a common (input, target) tuple from the features,
        # specify them here. They'll be used if as_supervised=True in
        # builder.as_dataset.
        supervised_keys=(),
        # Homepage of the dataset for documentation
        homepage='https://dataset-homepage/',
        citation=_CITATION,
    )

  def _split_generators(self, dl_manager):
    """Returns SplitGenerators."""
    # {TODO}: Downloads the data and defines the splits
    # dl_manager is a tfds.download.DownloadManager that can be used to
    # download and extract URLs
    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            # These kwargs will be passed to _generate_examples
            gen_kwargs={{}},
        ),
    ]

  def _generate_examples(self):
    """Yields examples."""
    # {TODO}: Yields (key, example) tuples from the dataset
    yield 'key', {{}}

a\  
class {dataset_cls}Test(tfds.testing.DatasetBuilderTestCase):
  # {TODO}:
  DATASET_CLASS = {dataset_name}.{dataset_cls}
  SPLITS = {{
      "train": 3,  # Number of fake train example
      "test": 1,  # Number of fake test example
  }}

  # If you are calling `download/download_and_extract` with a dict, like:
  #   dl_manager.download({{'some_key': 'http://a.org/out.txt', ...}})
  # then the tests needs to provide the fake output paths relative to the
  # fake data directory
  # DL_EXTRACT_RESULT = {{'some_key': 'output_file1.txt', ...}}


if __name__ == "__main__":
  tfds.testing.test_main()

z# {TODO}: If your dataset downloads files, then the checksums will be
# automatically added here when running the download_and_prepare script
# with --register_checksums.
c                 C   sx   t j| dd}tt t t t }t	|j
di |d}||j
di | W d   dS 1 s5w   Y  dS )z%Create a new dataset from a template.{dataset_type}z{dataset_name}.pywN )ospathjoin_HEADER_DATASET_DEFAULT_IMPORTS	_CITATION_DESCRIPTION_DATASET_DEFAULTSgfileGFileformatwriteroot_dirdata	file_pathcontextfr   r   b/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/scripts/create_new_dataset.pycreate_dataset_file   s   "r)   c                 C   sh   t j| dd}d}t|jdi |d}||jdi | W d   dS 1 s-w   Y  dS )z/Append the new dataset file to the __init__.py.r   z__init__.pyzjfrom tensorflow_datasets.{dataset_type}.{dataset_name} import {dataset_cls}  # {TODO} Sort alphabetically
aNr   )r   r   r   r   r   r    r!   )r#   r$   	init_filer&   r'   r   r   r(   add_the_init   s
   "r,   c                 C   sp   t j| dd}tt t }t|jdi |d}|	|jdi | W d   dS 1 s1w   Y  dS )z1Create the test file associated with the dataset.r   z{dataset_name}_test.pyr   Nr   )
r   r   r   r   _DATASET_TEST_DEFAULTS_IMPORTS_DATASET_TEST_DEFAULTSr   r   r    r!   r"   r   r   r(   create_dataset_test_file   s
   "r/   c                 C   s   t j| dddd}|jdi |}t| t j|d}t|d}|djdi | W d    d S 1 s;w   Y  d S )	Ntesting	test_datafake_examplesz{dataset_name}z(TODO-add_fake_data_in_this_directory.txtr   z'{TODO}: Add fake data in this directoryr   )r   r   r   r    r   makedirsr   r!   )r#   r$   fake_examples_dir	fake_pathr'   r   r   r(   create_fake_data   s   
"r6   c                 C   sd   t j| dd}t|jdi |d}|tjdi | W d    d S 1 s+w   Y  d S )Nurl_checksumsz{dataset_name}.txtr   r   )r   r   r   r   r   r    r!   _CHECKSUM_FILE)r#   r$   checksum_pathr'   r   r   r(   create_checksum_file   s   "r:   c                 C   s   t j}t j}t j}|st }t||t|d|d}t	|| t
|| t|| t|| t|| td|| d S )NzTODO({}))dataset_namedataset_typedataset_clsTODOzDataset generated in {}
You can start with searching TODO({}).
Please check this `https://github.com/tensorflow/datasets/blob/master/docs/add_dataset.md`for details.)FLAGSr   r   r   r   dictr   snake_to_camelcaser    r)   r,   r/   r6   r:   print)_r;   r<   r#   r$   r   r   r(   main   s(   




rD   __main__)#__doc__r   abslr   r   tensorflow.compat.v2compatv2tfior   tensorflow_datasets.corer   tensorflow_datasets.core.utilsr   r?   _DATASET_TYPEDEFINE_stringDEFINE_enumr   r   r-   r   r   r   r.   r8   r)   r,   r/   r6   r:   rD   __name__runr   r   r   r(   <module>   s>   	.		