o
    Ni!                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddlmZ ddl	m
  mZ ddlZddlZejZejejddZedd	d
 edd	d eddd eddd eddd eded eddd eddd eddd eddd eddd ejjjZed ejd!d" ejjD d# ed$dd% ed&g d' ed(dd) ed*dd+ ed,dd- ed.dd/ ed0dd1 ed2dd3 d4d5 Z d6d7 Z!d8d9 Z"d:d; Z#e$d<kre%  e&e# dS dS )=a~  Script to call download_and_prepare on DatasetBuilder.

Standalone script to generate specific dataset(s). This can be
used if you want to separate download/generation of dataset from actual usage.

By default, the dataset is generated in the default location
(~/tensorflow_datasets), which the same as when calling `tfds.load()`.

Instructions:

```
python -m tensorflow_datasets.scripts.download_and_prepare \
  --datasets=cifar10
```

If you have your dataset defined outside of `tensorflow_datasets`, use
`--module_import="path.to.my.dataset_module"` to have your Python module
containing your `DatasetBuilder` definition imported.


    N)appflags)logging~tensorflow_datasetsdatasets zNComma separated list of datasets to build, defaults to allregistered builders.exclude_datasetszFComma separated list of datasets to exclude,(no download, no prepare).module_importzModules to import. Use this when your DatasetBuilder is defined outside of tensorflow_datasets so that it is registered. Multiple imports can be passed by calling the flag multiple times, or using coma separated values.builder_config_idz?If given 1 dataset with BUILDER_CONFIGS, id of config to build.experimental_latest_versionFzISet to true to builder the latest version available, even if not default.data_dirzWhere to place the data.download_dirzWhere to place downloads.extract_dirzWhere to extract files.
manual_dirzBDirectory where dataset have manually been downloaded / extracted.checksums_dirzEFor external datasets, specify the location of the dataset checksums.add_name_to_manual_dirz4If true, append the dataset name to the `manual_dir`compute_statsc                 C   s   g | ]}|j qS  )value).0er   r   d/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/scripts/download_and_prepare.py
<listcomp>]   s    r   z1Whether to compute or not the dynamic statistics.max_examples_per_splitzGoptional max number of examples to write into each split (for testing).beam_pipeline_optionszA (comma-separated) list of flags to pass to `PipelineOptions` when preparing with Apache Beam. Example: `--beam_pipeline_options=job_name=my-job,project=my-project`register_checksumsz5If True, store size and checksum of downloaded files.force_checksums_validationz7If True, raise an error if the checksums are not found.debugz6If True, will drop into debugger after data generationdebug_startz+If True, will drop into debugger on startupsleep_startz.If True, will sleep on startup; useful for sshdisable_tqdmzIf True, disable tqdm.c                	   C   s,   t jjtjtjtjt jjjtj	tj
tjdS )N)r   r   r   download_moder   r   r   )tfdsdownloadDownloadConfigFLAGSr   r   r   GenerateModeREUSE_DATASET_IF_EXISTSr   r   r   r   r   r   r   download_config|   s   r*   c                 C   s   t d| jj t }t| tjjr,tjjj	}tj
jj|_|jjjdd tjD d|_tjr9tj|j| j|_| jtj|d tjt| jjdgd tj r_| j!tj"j#d}t$%  ~d	S d	S )
z"Generate data for a given dataset.z&download_and_prepare for dataset %s...c                 S   s   g | ]}d | qS )z--%sr   )r   optr   r   r   r      s    z(download_and_prepare.<locals>.<listcomp>r   )r   r*   bold)attrs)splitN)&r   info	full_namer*   
isinstancer$   coreBeamBasedBuilderlazy_importsapache_beamr%   ComputeStatsModeSKIPr   optionspipeline_optionsPipelineOptionsr'   r   beam_optionsr   ospathjoinr   namedownload_and_preparer   	termcolorcprintstras_protor   
as_datasetSplitTRAINpdb	set_trace)builder	dl_configbeamdatasetr   r   r   r@      s(   
r@   c                 C   s(   | D ]}| dD ]}t| q	qd S )N,)r.   	importlibimport_module)modulesmodulemr   r   r   import_modules   s
   rT   c                    s  t jrtt j t jrt  t jrtd t j	r#t
d t  t jr-tjt j tt jr7t jdp:t }|tt jd8 }t jrNddi ni  t
dd|  fdd	|D }t jd urt|d
krqtd|t| d  }|jstd|jt j }t
d|j tj|jft j |d }t!| d S |" D ](\}}|jrd|vr|jD ]}tj|jft j |d }t!| qqt!| qd S )Ni0*  zDisabling tqdm.rN   versionexperimental_latestz/Running download_and_prepare for dataset(s):
%s
c                    s&   i | ]}|t j|fd tji qS )r   )r$   rJ   r'   r   )r   r?   version_kwargr   r   
<dictcomp>   s    zmain.<locals>.<dictcomp>   zC--builder_config_id can only be used when building a single datasetr   z?--builder_config_id can only be used with datasets with configsz+Running download_and_prepare for config: %s)r   config/)#r'   r   rT   r    rH   rI   r!   timesleepr"   r   r/   r$   disable_progress_barr   r%   add_checksums_dirsetr   r.   list_buildersr
   r   r>   r   len
ValueErrorlistkeysBUILDER_CONFIGSr?   rJ   r   r@   items)_datasets_to_buildbuildersrJ   r\   builder_for_configr?   r   rX   r   main   sv   









rn   __main__)'__doc__rO   r<   rH   r^   abslr   r   r   tensorflow.compat.v2compatv2tfr   r$   rA   r'   r=   
expanduserr>   DEFAULT_DATA_DIRDEFINE_stringDEFINE_multi_stringDEFINE_integerDEFINE_booleanr%   r6   r7   default_compute_statsDEFINE_enumr   DEFINE_listr*   r@   rT   rn   __name__enable_v2_behaviorrunr   r   r   r   <module>   s   
C