o
    Ni}2                     @   s  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm	Z	 ddl
m  mZ ddlmZ dZdZd	Zd
ZdZdZeedfZdZdZdZdZdZdZdZG dd dejj Z!G dd de!Z"G dd de!Z#G dd dejj$Z%G dd de!Z&dd  Z'd!d" Z(dS )#z(MNIST, Fashion MNIST, KMNIST and EMNIST.    )absolute_import)division)print_functionN)urllibz3https://storage.googleapis.com/cvdf-datasets/mnist/ztrain-images-idx3-ubyte.gzztrain-labels-idx1-ubyte.gzzt10k-images-idx3-ubyte.gzzt10k-labels-idx1-ubyte.gz      
   `  '  z@article{lecun2010mnist,
  title={MNIST handwritten digit database},
  author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
  journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
  volume={2},
  year={2010}
}
an  @article{DBLP:journals/corr/abs-1708-07747,
  author    = {Han Xiao and
               Kashif Rasul and
               Roland Vollgraf},
  title     = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning
               Algorithms},
  journal   = {CoRR},
  volume    = {abs/1708.07747},
  year      = {2017},
  url       = {http://arxiv.org/abs/1708.07747},
  archivePrefix = {arXiv},
  eprint    = {1708.07747},
  timestamp = {Mon, 13 Aug 2018 16:47:27 +0200},
  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1708-07747},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
ar    @online{clanuwat2018deep,
  author       = {Tarin Clanuwat and Mikel Bober-Irizar and Asanobu Kitamoto and Alex Lamb and Kazuaki Yamamoto and David Ha},
  title        = {Deep Learning for Classical Japanese Literature},
  date         = {2018-12-03},
  year         = {2018},
  eprintclass  = {cs.CV},
  eprinttype   = {arXiv},
  eprint       = {cs.CV/1812.01718},
}
aF  @article{cohen_afshar_tapson_schaik_2017,
    title={EMNIST: Extending MNIST to handwritten letters},
    DOI={10.1109/ijcnn.2017.7966217},
    journal={2017 International Joint Conference on Neural Networks (IJCNN)},
    author={Cohen, Gregory and Afshar, Saeed and Tapson, Jonathan and Schaik, Andre Van},
    year={2017}
}
c                   @   s8   e Zd ZdZeZejdZ	dd Z
dd Zdd Zd	S )
MNISTzMNIST.z3.0.1c              	   C   s:   t jj| dt jt jjtdt jjtddddt	dS )Nz)The MNIST database of handwritten digits.shapenum_classesimagelabelz!http://yann.lecun.com/exdb/mnist/builderdescriptionfeaturessupervised_keyshomepagecitation)
tfdscoreDatasetInfor   FeaturesDictImageMNIST_IMAGE_SHAPE
ClassLabelMNIST_NUM_CLASSES_MNIST_CITATIONself r%   b/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/image_classification/mnist.py_infoe   s   zMNIST._infoc                    sv   t tttd}| fdd| D }tjjtj	j
tt|d |d ddtjjtj	jtt|d |d	 ddgS )
zReturns SplitGenerators.
train_datatrain_labels	test_datatest_labelsc                    s"   i | ]\}}|t j j|qS r%   )r   parseurljoinURL).0kvr#   r%   r&   
<dictcomp>|   s   " z+MNIST._split_generators.<locals>.<dictcomp>r)   r*   num_examples	data_path
label_pathname
gen_kwargsr+   r,   )_MNIST_TRAIN_DATA_FILENAME_MNIST_TRAIN_LABELS_FILENAME_MNIST_TEST_DATA_FILENAME_MNIST_TEST_LABELS_FILENAMEdownload_and_extractitemsr   r   SplitGeneratorSplitTRAINdict_TRAIN_EXAMPLESTEST_TEST_EXAMPLES)r$   
dl_manager	filenamesmnist_filesr%   r#   r&   _split_generatorsr   s2   zMNIST._split_generatorsc                 c   sR    t ||}t||}tt||}t|D ]\}\}}	||	d}
||
fV  qdS )zGenerate MNIST examples as dicts.

    Args:
      num_examples (int): The number of example.
      data_path (str): Path to the data files
      label_path (str): Path to the labels

    Yields:
      Generator yielding the next examples
    r   N)_extract_mnist_images_extract_mnist_labelslistzip	enumerate)r$   r5   r6   r7   imageslabelsdataindexr   r   recordr%   r%   r&   _generate_examples   s   


zMNIST._generate_examplesN)__name__
__module____qualname____doc__
_MNIST_URLr/   r   r   VersionVERSIONr'   rK   rV   r%   r%   r%   r&   r   _   s    r   c                   @      e Zd ZdZdd ZdS )FashionMNISTz;http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/c              	   C   >   t jj| dt jt jjtdt jjg dddddtdS )NzFashion-MNIST is a dataset of Zalando's article images consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.r   )
zT-shirt/topTrouserPulloverDressCoatSandalShirtSneakerBagz
Ankle bootnamesr   z0https://github.com/zalandoresearch/fashion-mnistr   )	r   r   r   r   r   r   r   r    _FASHION_MNIST_CITATIONr#   r%   r%   r&   r'      s   	zFashionMNIST._infoNrW   rX   rY   r/   r'   r%   r%   r%   r&   r_      s    r_   c                   @   r^   )KMNISTz-http://codh.rois.ac.jp/kmnist/dataset/kmnist/c              	   C   r`   )Na,  Kuzushiji-MNIST is a drop-in replacement for the MNIST dataset (28x28 grayscale, 70,000 images), provided in the original MNIST format as well as a NumPy format. Since MNIST restricts us to 10 classes, we chose one character to represent each of the 10 rows of Hiragana when creating Kuzushiji-MNIST.r   )
okisutsunahamayarewori   r   z+http://codh.rois.ac.jp/kmnist/index.html.enr   )	r   r   r   r   r   r   r   r    _K_MNIST_CITATIONr#   r%   r%   r&   r'      s   zKMNIST._infoNrl   r%   r%   r%   r&   rm      s    rm   c                       s(   e Zd ZdZejj fddZ  ZS )EMNISTConfigz BuilderConfig for EMNIST CONFIG.c                    s:   t t| jddtjddi| || _|| _|| _dS )a?  BuilderConfig for EMNIST class number.

    Args:
      class_number: There are six different splits provided in this dataset. And
        have different class numbers.
      train_examples: number of train examples
      test_examples: number of test examples
      **kwargs: keyword arguments forwarded to super.
    versionz3.0.0z6New split API (https://tensorflow.org/datasets/splits)Nr%   )	superry   __init__r   r   r\   class_numbertrain_examplestest_examples)r$   r}   r~   r   kwargs	__class__r%   r&   r|      s   
zEMNISTConfig.__init__)	rW   rX   rY   rZ   r   r   disallow_positional_argsr|   __classcell__r%   r%   r   r&   ry      s    ry   c                   @   s   e Zd ZdZdZdZedddddd	ed
ddddd	edddddd	edddddd	edddddd	edddddd	gZdd  Zd!d" Z	dS )#EMNISTzEmnist dataset.z:https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zipNbyclass>   iL
 ic zEMNIST ByClass)r9   r}   r~   r   r   bymerge/   zEMNIST ByMergebalancedi ipI  zEMNIST Balancedletters%   iZ i9  zEMNIST Lettersdigitsr   i i@  zEMNIST Digitsmnistr	   r
   zEMNIST MNISTc              	   C   s>   t jj| dt jt jjtdt jj| jj	ddddt
dS )Na  The EMNIST dataset is a set of handwritten character digits derived from the NIST Special Database 19 and converted to a 28x28 pixel image format and dataset structure that directly matches the MNIST dataset.

Note: Like the original EMNIST data, images provided here are inverted horizontally and rotated 90 anti-clockwise. You can use `tf.transpose` within `ds.map` to convert the images to a human-friendlier format.r   r   r   z=https://www.nist.gov/itl/products-and-services/emnist-datasetr   )r   r   r   r   r   r   r   r    builder_configr}   _EMNIST_CITATIONr#   r%   r%   r&   r'   &  s   zEMNIST._infoc                    s   d | jjd | jjd | jjd | jjd}tj|| jd | fdd|	 D }t
jjt
jjt| jj|d	 |d
 ddt
jjt
jjt| jj|d |d ddgS )Nz$emnist-{}-train-images-idx3-ubyte.gzz$emnist-{}-train-labels-idx1-ubyte.gzz#emnist-{}-test-images-idx3-ubyte.gzz#emnist-{}-test-labels-idx1-ubyte.gzr(   gzipc                    s    i | ]\}}|t j |qS r%   )ospathjoin)r0   r1   fnamedir_namer%   r&   r3   P  s    z,EMNIST._split_generators.<locals>.<dictcomp>r)   r*   r4   r8   r+   r,   )formatr   r9   r   r   r   r?   r/   extractr@   r   r   rA   rB   rC   rD   r~   rF   r   )r$   rH   rI   	extractedr%   r   r&   rK   ?  sD   zEMNIST._split_generators)
rW   rX   rY   rZ   r/   r]   ry   BUILDER_CONFIGSr'   rK   r%   r%   r%   r&   r      sb    .r   c                 C   sp   t jj| d&}|d |tt | }tj|tjd	|ttd}|W  d    S 1 s1w   Y  d S )Nrb   dtyper   )
tfiogfileGFileread_MNIST_IMAGE_SIZEnp
frombufferuint8reshape)image_filepath
num_imagesfbufrS   r%   r%   r&   rL   f  s   
$rL   c                 C   sd   t jj| d }|d ||}tj|tjdtj	}|W  d    S 1 s+w   Y  d S )Nr      r   )
r   r   r   r   r   r   r   r   astypeint64)labels_filepath
num_labelsr   r   rR   r%   r%   r&   rM   q  s   

$rM   ))rZ   
__future__r   r   r   r   numpyr   	six.movesr   tensorflow.compat.v2compatv2r   tensorflow_datasets.public_api
public_apir   r[   r;   r<   r=   r>   r   r   r!   rE   rG   r"   rk   rx   r   r   GeneratorBasedBuilderr   r_   rm   BuilderConfigry   r   rL   rM   r%   r%   r%   r&   <module>   s<   

Gs