o
    Ni^                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm  m	Z
 ddlmZ ddlmZ dZdZd	Zd
ddZG dd deZdS )zImagenet subset datasets.    )absolute_import)division)print_functionN)Imagenet2012a  Imagenet2012Subset is a subset of original ImageNet ILSVRC 2012 dataset.
The dataset share the *same* validation set as the original ImageNet ILSVRC 2012
dataset. However, the training set is subsampled in a label balanced fashion.
In `1pct` configuration, 1%, or 12811, images are sampled, most classes have
the same number of images (average 12.8), some classes randomly have 1 more
example than others; and in `10pct` configuration, ~10%, or 128116, most classes
have the same number of images (average 128), and some classes randomly have 1
more example than others.

This is supposed to be used as a benchmark for semi-supervised learning, and
has been originally used in SimCLR paper (https://arxiv.org/abs/2002.05709).
a  @article{chen2020simple,
  title={A Simple Framework for Contrastive Learning of Visual Representations},
  author={Chen, Ting and Kornblith, Simon and Norouzi, Mohammad and Hinton, Geoffrey},
  journal={arXiv preprint arXiv:2002.05709},
  year={2020}
}
@article{ILSVRC15,
  Author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
  Title = {{ImageNet Large Scale Visual Recognition Challenge}},
  Year = {2015},
  journal   = {International Journal of Computer Vision (IJCV)},
  doi = {10.1007/s11263-015-0816-y},
  volume={115},
  number={3},
  pages={211-252}
}
z,image_classification/imagenet2012_labels.txtz]https://raw.githubusercontent.com/google-research/simclr/master/imagenet_subsets/1percent.txtz^https://raw.githubusercontent.com/google-research/simclr/master/imagenet_subsets/10percent.txt)1pct10pctc                   @   s8   e Zd ZdZdd eD Zdd Zdd Zdd	d
ZdS )Imagenet2012Subsetz/Class balanced subset of Imagenet 2012 dataset.c              	   C   s.   g | ]}t jj|d |t jdddqS )z"{} of total ImageNet training set.z5.0.0 )namedescriptionversion)tfdscoreBuilderConfigformatVersion).0subset_size r   p/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/image_classification/imagenet2012_subset.py
<listcomp>K   s    zImagenet2012Subset.<listcomp>c              	   C   sJ   t jt}t jj| tt jt j t jj	|dt j
 dddtdS )N)
names_file)imagelabel	file_name)r   r   zhttp://image-net.org/)builderr   featuressupervised_keyshomepagecitation)r   r   get_tfds_path_LABELS_FNAMEDatasetInfo_DESCRIPTIONr   FeaturesDictImage
ClassLabelText	_CITATION)selfr   r   r   r   _infoT   s   zImagenet2012Subset._infoc                 C   s   t j|jd}t j|jd}tjj|rtjj|s&td	|||
t| jj }t|tr8|d }tjj|}t|  }W d    n1 sRw   Y  tjjtjj|||ddtjjtjj||| |ddgS )NzILSVRC2012_img_train.tarzILSVRC2012_img_val.tarzpImageNet requires manual download of the data. Please download the train and val set and place them into: {}, {}r   )archivesubset)r
   
gen_kwargs)r+   validation_labels)ospathjoin
manual_dirtfiogfileexistsAssertionErrorr   downloadSUBSET2FILESbuilder_configr
   
isinstancelistGFilesetread
splitlinesr   r   SplitGeneratorSplitTRAINiter_archive
VALIDATION_get_validation_labels)r)   
dl_manager
train_pathval_pathsubset_filefpr,   r   r   r   _split_generatorsc   s8   
z$Imagenet2012Subset._split_generatorsNc                 c   s    |r|  ||D ]	\}}||fV  q	|D ]9\}}|dd }t| }	tj|	tjjjD ]\}
}| 	|
|}|du sB|
|v rM|
||d}|
|fV  q0qdS )zYields examples.N)r   r   r   )
_generate_examples_validationr4   BytesIOr?   r   r8   rD   ExtractMethod
TAR_STREAM
_fix_image)r)   r+   r,   r.   keyexamplefnamefobjr   fobj_memimage_fnamer   recordr   r   r   _generate_examples   s,   

z%Imagenet2012Subset._generate_examples)NN)	__name__
__module____qualname____doc__r9   BUILDER_CONFIGSr*   rL   rZ   r   r   r   r   r   H   s    	$r   )r^   
__future__r   r   r   r4   r/   tensorflow.compat.v2compatv2r3   1tensorflow_datasets.image_classification.imagenetr   tensorflow_datasets.public_api
public_apir   r#   r(   r!   r9   r   r   r   r   r   <module>   s    