o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm	  m
Z ddlmZ dZdZdZed	ZG d
d dejjZdS )zCode to build STL-10 dataset.    )absolute_import)division)print_functionNa,  
@inproceedings{coates2011stl10,
  title={{An Analysis of Single Layer Networks in Unsupervised Feature Learning}},
  author={Coates, Adam and Ng, Andrew and Lee, Honglak},
  booktitle={AISTATS},
  year={2011},
  note = {\url{https://cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf}},
}
ao  The STL-10 dataset is an image recognition dataset for developing unsupervised
feature learning, deep learning, self-taught learning algorithms. It is inspired
by the CIFAR-10 dataset but with some modifications. In particular, each class
has fewer labeled training examples than in CIFAR-10, but a very large set of 
unlabeled examples is provided to learn image models prior to supervised
training. The primary challenge is to make use of the unlabeled data (which
comes from a similar but different distribution from the labeled data) to build
a useful prior. All images were acquired from labeled examples on ImageNet.
z9http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz
unlabelledc                   @   s4   e Zd ZdZejdZdd Zdd Z	dd Z
d	S )
Stl10zSTL-10 dataset.z1.0.0c              	   C   s:   t jj| tt jt jjddt jjdddddtdS )N)`   r      )shape
   )num_classesimagelabelz&http://ai.stanford.edu/~acoates/stl10/)builderdescriptionfeaturessupervised_keyshomepagecitation)	tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDictImage
ClassLabel	_CITATION)self r   b/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/image_classification/stl10.py_info;   s   zStl10._infoc           	         s   ddg}ddg}dg}| t tj d  fdd}tjjt	|d	gd
}dd |D }W d   n1 s<w   Y  || j
jd _tjjtjjd||idtjjtjjd||idtjjtd||idg}|S )zReturns SplitGenerators.ztrain_X.binztrain_y.binz
test_X.binz
test_y.binzunlabeled_X.binzstl10_binary/c                 3   s     | D ]
}t j |V  qd S )N)ospathjoin)	filenamesf
stl10_pathr   r   gen_filenamesQ   s   z.Stl10._split_generators.<locals>.gen_filenameszclass_names.txtrc                 S   s   g | ]}| d qS )
)strip).0lr   r   r   
<listcomp>W   s    z+Stl10._split_generators.<locals>.<listcomp>Nr   	filepaths)name
gen_kwargs)download_and_extractURLr!   r"   r#   tfiogfileGFilenextinfor   namesr   r   SplitGeneratorSplitTRAINTEST
UNLABELLED)	r   
dl_managertrain_files
test_filesunlabeled_filesr(   r%   class_namessplitsr   r&   r   _split_generatorsH   s0   



zStl10._split_generatorsc           	      c   s   t |}|d }t|dkr|d nd}tjj|d}tj| tj	d}t
|d}t|d}W d   n1 s>w   Y  |rotjj|d}ttj| tj	d}|d8 }W d   n1 siw   Y  nd}t|D ]\}}|||dur|| ndd	fV  qudS )
zGenerate STL-10 examples as dicts.

    Args:
      filepaths (list[str]): The files to use to generate the data.

    Yields:
      The STL-10 examples, as defined in the dataset info features.
    r      Nrb)dtype)r   r   r   )r   r      rG   rJ   r   )listlenr4   r5   r6   r7   np
frombufferreaduint8reshape	transposecopy	enumerate)	r   r/   
image_path
label_pathr%   imageslabelsindexr   r   r   r   _generate_examplesi   s,   	
zStl10._generate_examplesN)__name__
__module____qualname____doc__r   r   VersionVERSIONr    rF   r[   r   r   r   r   r   6   s    !r   )r_   
__future__r   r   r   r!   numpyrN   tensorflow.compat.v2compatv2r4   tensorflow_datasets.public_api
public_apir   r   r   r3   r<   r?   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s   

