o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlZddl	m
Z
 ddlZddlm  mZ ddlmZ ejdd	d
ZG dd deZG dd deZdS )zMock util for tfds.
    )absolute_import)division)print_functionN)absltest)features   c              
   #   s    dd }d
 fdd	}|s|}|st jt jtd}d}d}d	}tj||B tj||" tj|| dV  W d   n1 sHw   Y  W d   n1 sWw   Y  W d   dS W d   dS 1 sow   Y  dS )ah  Mock tfds to generate random data.

  This function requires the true metadata files (dataset_info.json, label.txt,
  vocabulary files) to be stored in `data_dir/dataset_name/version`, as they
  would be for the true dataset.
  The actual examples will be randomly generated using
  `builder.info.features.get_tensor_info()`.
  Download and prepare step will be skipped.

  Warning: As the mocked builder will use the true metadata (label names,...),
  the `info.split['train'].num_examples` won't match `len(list(ds_train))`.

  Usage (automated):

  ```
  with tfds.testing.mock_data(num_examples=5):
    ds = tfds.load('some_dataset', split='train')

    for ex in ds:  # ds will yield randomly generated examples.
      ex
  ```

  The examples will be deterministically generated. Train and test split will
  yield the same examples.

  If you want more fine grain control over the generated examples, you can
  manually overwrite the `DatasetBuilder._as_dataset` method.
  Usage (manual):

  ```
  def as_dataset(self, *args, **kwargs):
    return tf.data.Dataset.from_generator(
        lambda: ({
            'image': np.ones(shape=(28, 28, 1), dtype=np.uint8),
            'label': i % 10,
        } for i in range(num_examples)),
        output_types=self.info.features.dtype,
        output_shapes=self.info.features.shape,
    )

  with mock_data(as_dataset_fn=as_dataset):
    ds = tfds.load('some_dataset', split='train')

    for ex in ds:  # ds will yield the fake data example of 'as_dataset'.
      ex
  ```

  Args:
    num_examples: `int`, the number of fake example to generate.
    as_dataset_fn: if provided, will replace the default random example
      generator. This function mock the `FileAdapterBuilder._as_dataset`
    data_dir: `str`, `data_dir` folder from where to load the metadata.
      Will overwrite `data_dir` kwargs from `tfds.load`.

  Yields:
    None
  c                 _   s.   ~~t jj| jstdj| j| jdd S )NzTFDS has been mocked, but metadata files were not found in {}. You should copy the real metadata files, so that the dataset can be loaded properly, or set the data_dir kwarg of tfds.testing.mock_tfds(data_dir=...).)n)tfiogfileexists	_data_dir
ValueErrorformatname)selfargskwargs r   W/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/testing/mocking.pymock_download_and_prepare`   s   
z,mock_data.<locals>.mock_download_and_prepareNc                    s   ~~|du rt  jj }dd }nt jj }tjjjj|d}t	j
jj fddt	jdd |t	jdd |d}|j|t	j
jjd	 |S )
z-Function which overwrite builder._as_dataset.Nc                 S   s   | S Nr   )exr   r   r   <lambda>t   s    z4mock_data.<locals>.mock_as_dataset.<locals>.<lambda>)decodersc                      s    dS )N)buildernum_examplesr   r   )generator_clsr   r   r   r   r      s    c                 S      | j S r   )dtypetr   r   r   r          c                 S   r   r   )shaper    r   r   r   r      r"   )output_typesoutput_shapes)num_parallel_calls)RandomFakeGeneratorinfor   get_tensor_infoEncodedRandomFakeGeneratorget_serialized_info	functoolspartialdecode_exampler	   dataDatasetfrom_generatornestmap_structuremapexperimentalAUTOTUNE)r   splitr   r   specs	decode_fndsr   )r   r   r   mock_as_datasetl   s$   

z"mock_data.<locals>.mock_as_datasetmetadatazLtensorflow_datasets.core.dataset_builder.DatasetBuilder.download_and_preparezGtensorflow_datasets.core.dataset_builder.FileAdapterBuilder._as_datasetz+tensorflow_datasets.core.constants.DATA_DIRr   )ospathjoindirname__file__r   mockpatch)r   as_dataset_fndata_dirr   r<   download_and_prepare_pathas_dataset_pathdata_dir_pathr   r;   r   	mock_data$   s*   <P rJ   c                   @   s2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )r'   zDGenerator of fake examples randomly and deterministically generated.r   c                 C   s*   t j|| _t|| _|| _|| _d S r   )nprandomRandomState_rgnRandom_py_rng_builder_num_examples)r   r   r   seedr   r   r   __init__   s   
zRandomFakeGenerator.__init__c                    s    fdd|j D }t|tjr|j}nt|tjr!|jr!|j}nd}|j}|jr5 j	
d|||jS |jrB j	||jS |tjkrZd fddt j
dd	D S td
|)z/Generates a random tensor for a single feature.c                    s&   g | ]}|d u r j ddn|qS )N   2   )rN   randint).0sr   r   r   
<listcomp>   s    z>RandomFakeGenerator._generate_random_array.<locals>.<listcomp>   r    c                 3   s    | ]	} j d V  qdS )z abcdefghijN)rP   choice)rX   _rZ   r   r   	<genexpr>   s
    

z=RandomFakeGenerator._generate_random_array.<locals>.<genexpr>
      z$Fake generation not supported for {})r#   
isinstancefeatures_lib
ClassLabelnum_classesText
vocab_sizer   
is_integerrN   rW   astypeas_numpy_dtypeis_floatingrandom_sampler	   stringr@   rangerP   r   r   )r   featuretensor_infor#   	max_valuer   r   rZ   r   _generate_random_array   s$   

z*RandomFakeGenerator._generate_random_arrayc                    sD    j jj}||}|| } fddt||D }||S )zGenerate the next example.c                    s   g | ]
\}}  ||qS r   )rs   )rX   rp   rq   rZ   r   r   r[      s    
z9RandomFakeGenerator._generate_example.<locals>.<listcomp>)rQ   r(   r   _flattenr)   zip_nest)r   root_featureflat_featuresflat_tensor_infoflat_npr   rZ   r   _generate_example   s   



z%RandomFakeGenerator._generate_examplec                 c   s     t | jD ]}|  V  qdS zYields all fake examples.N)ro   rR   r{   )r   r_   r   r   r   __iter__   s   zRandomFakeGenerator.__iter__N)r   )__name__
__module____qualname____doc__rT   rs   r{   r}   r   r   r   r   r'      s    
r'   c                       s    e Zd ZdZ fddZ  ZS )r*   z#Generator of fake encoded examples.c                 #   s,    t t|  D ]}| jjj|V  qdS r|   )superr*   r}   rQ   r(   r   encode_example)r   r   	__class__r   r   r}      s   z#EncodedRandomFakeGenerator.__iter__)r~   r   r   r   r}   __classcell__r   r   r   r   r*      s    r*   )r   NN)r   
__future__r   r   r   
contextlibr,   r>   rL   absl.testingr   numpyrK   tensorflow.compat.v2compatv2r	   tensorflow_datasets.corer   rd   contextmanagerrJ   objectr'   r*   r   r   r   r   <module>   s    s9