o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlm	  m
Z ddlmZ dZdZdZd	Zg d
ZdgZG dd dejjZdS )zImagenet datasets.    )absolute_import)division)print_functionNa  ILSVRC 2012, aka ImageNet is an image dataset organized according to the
WordNet hierarchy. Each meaningful concept in WordNet, possibly described by
multiple words or word phrases, is called a "synonym set" or "synset". There are
more than 100,000 synsets in WordNet, majority of them are nouns (80,000+). In
ImageNet, we aim to provide on average 1000 images to illustrate each synset.
Images of each concept are quality-controlled and human-annotated. In its
completion, we hope ImageNet will offer tens of millions of cleanly sorted
images for most of the concepts in the WordNet hierarchy.

Note that labels were never publicly released for the test set, so we only
include splits for the training and validation sets here.
a  @article{ILSVRC15,
Author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
Title = {{ImageNet Large Scale Visual Recognition Challenge}},
Year = {2015},
journal   = {International Journal of Computer Vision (IJCV)},
doi = {10.1007/s11263-015-0816-y},
volume={115},
number={3},
pages={211-252}
}
z,image_classification/imagenet2012_labels.txtz7image_classification/imagenet2012_validation_labels.txt)zn01739381_1309.JPEGzn02077923_14822.JPEGzn02447366_23489.JPEGzn02492035_15739.JPEGzn02747177_10752.JPEGzn03018349_4028.JPEGzn03062245_4620.JPEGzn03347037_9675.JPEGzn03467068_12171.JPEGzn03529860_11437.JPEGzn03544143_17228.JPEGzn03633091_5218.JPEGzn03710637_5125.JPEGzn03961711_5286.JPEGzn04033995_2932.JPEGzn04258138_17003.JPEGzn04264628_27969.JPEGzn04336792_7448.JPEGzn04371774_5854.JPEGzn04596742_4225.JPEGzn07583066_647.JPEGzn13037406_4650.JPEGzn02105855_2933.JPEGc                   @   sX   e Zd ZdZejddZdZdd Z	e
dd Zd	d
 Zdd ZdddZdd ZdS )Imagenet2012zImagenet 2012, aka ILSVRC 2012.z5.0.0z6New split API (https://tensorflow.org/datasets/splits)z  manual_dir should contain two files: ILSVRC2012_img_train.tar and
  ILSVRC2012_img_val.tar.
  You need to register on http://www.image-net.org/download-images in order
  to get the link to download the dataset.
  c              	   C   sN   t jt}t jj| tt jt jjddt jj	|dt j
 dddtdS )Njpeg)encoding_format)
names_file)imagelabel	file_name)r	   r
   zhttp://image-net.org/)builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreget_tfds_path_LABELS_FNAMEDatasetInfo_DESCRIPTIONr   FeaturesDictImage
ClassLabelText	_CITATION)selfr    r   e/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/image_classification/imagenet.py_infon   s   zImagenet2012._infoc                 C   s   t jt}tjj|}| 	 
 }W d   n1 s w   Y  tjj| d}tjd|d}t| }W d   n1 sEw   Y  tt||S )a  Returns labels for validation.

    Args:
      val_path: path to TAR file containing validation images. It is used to
      retrieve the name of pictures and associate them to labels.

    Returns:
      dict, mapping from image name (str) to label (str).
    Nrbzr:)modefileobj)r   r   r   _VALIDATION_LABELS_FNAMEtfiogfileGFilereadstrip
splitlinestarfileopensortedgetnamesdictzip)val_pathlabels_pathlabels_flabels	tar_f_objtarimagesr   r   r   _get_validation_labels}   s   z#Imagenet2012._get_validation_labelsc                 C   s   t j|jd}t j|jd}tjj|rtjj|s&td	||t
jjt
jjd||idt
jjt
jj||| |ddgS )NzILSVRC2012_img_train.tarzILSVRC2012_img_val.tarzpImageNet requires manual download of the data. Please download the train and val set and place them into: {}, {}archive)name
gen_kwargs)r:   validation_labels)ospathjoin
manual_dirr%   r&   r'   existsAssertionErrorformatr   r   SplitGeneratorSplitTRAINiter_archive
VALIDATIONr9   )r   
dl_manager
train_pathr2   r   r   r   _split_generators   s*   
zImagenet2012._split_generatorsc                 C   sV   | j dk r|S |tv rttjj| }|S |t	v r)ttjj
| }|S )z8Fix image color system and format starting from v 3.0.0.z3.0.0)versionCMYK_IMAGESr&   BytesIOr   r   utilsjpeg_cmyk_to_rgbr)   
PNG_IMAGESpng_to_jpeg)r   image_fnamer	   r   r   r   
_fix_image   s   
zImagenet2012._fix_imageNc                 c   s    |r|  ||D ]	\}}||fV  q	|D ]1\}}|dd }t| }tj|tjjjD ]\}	}
| 	|	|
}
|	|
|d}|	|fV  q0qdS )zYields examples.Nr   r	   r
   )
_generate_examples_validationr&   rO   r)   r   downloadrH   ExtractMethod
TAR_STREAMrU   )r   r:   r=   keyexamplefnamefobjr
   fobj_memrT   r	   recordr   r   r   _generate_examples   s(   
zImagenet2012._generate_examplesc                 c   s.    |D ]\}}|||| d}||fV  qd S )NrW   r   )r   r:   r5   r^   r_   ra   r   r   r   rX      s   z*Imagenet2012._generate_examples_validation)N)__name__
__module____qualname____doc__r   r   VersionVERSIONMANUAL_DOWNLOAD_INSTRUCTIONSr    staticmethodr9   rL   rU   rb   rX   r   r   r   r   r   a   s    


r   )rf   
__future__r   r   r   r&   r>   r,   tensorflow.compat.v2compatv2r%   tensorflow_datasets.public_api
public_apir   r   r   r   r$   rN   rR   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s    