o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm	  m
Z ddlmZ dZdZdZd	Zd
ZdZG dd dejjZdS )zCaltech images dataset.    )absolute_import)division)print_functionNa0  @article{FeiFei2004LearningGV,
  title={Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories},
  author={Li Fei-Fei and Rob Fergus and Pietro Perona},
  journal={Computer Vision and Pattern Recognition Workshop},
  year={2004},
}
a  Caltech-101 consists of pictures of objects belonging to 101 classes, plus
one `background clutter` class. Each image is labelled with a single object.
Each class contains roughly 40 to 800 images, totalling around 9k images.
Images are of variable sizes, with typical edge lengths of 200-300 pixels.
This version contains image-level labels only. The original dataset also
contains bounding boxes.
z*image_classification/caltech101_labels.txtz8http://www.vision.caltech.edu/Image_Datasets/Caltech101/z101_ObjectCategories.tar.gz   c                   @   s6   e Zd ZdZejddZdd Zdd Z	dd	 Z
d
S )
Caltech101zCaltech-101.z3.0.0z6New split API (https://tensorflow.org/datasets/splits)c              	   C   sJ   t jt}t jj| tt jt j t jj	|dt j
 ddttdS )N)
names_fileimagelabelzimage/file_name)r	   r
   )builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreget_tfds_path_LABELS_FNAMEDatasetInfo_DESCRIPTIONr   FeaturesDictImage
ClassLabelText_URL	_CITATION)selfr    r   d/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/image_classification/caltech.py_info8   s   zCaltech101._infoc                 C   sH   | tjtt}tjjtj	j
|dddtjjtj	j|dddgS )NT)images_dir_pathis_train_split)name
gen_kwargsF)download_and_extractospathjoinr   _IMAGES_FNAMEr   r   SplitGeneratorSplitTRAINTEST)r   
dl_managerr'   r   r   r   _split_generatorsG   s   zCaltech101._split_generatorsc                 c   s.   t j }t jd tjj|d }tj	
||}tjj|}|D ]i}tjjtj	
||rtjjtj	
||D ]N\}}	}
tt|
krUtdtt|
|t jj|
tdd}t|
|}|ri|n|}|D ]}|drtj	
||}|| |d}d||f |fV  qmq?q%t j| d	S )
aL  Generates images and labels given the image directory path.

    As is usual for this dataset, 30 random examples from each class are added
    to the train split, and the remainder are added to the test split.

    Args:
      images_dir_path: path to the directory where the images are stored.
      is_train_split: bool, if true, generates the train split, else generates
        the test split.

    Yields:
      The image path, and its corresponding label and filename.

    Raises:
      ValueError: If too few points are present to create the train set for any
        class.
    i  r   z%Fewer than {} ({}) points in class {}F)replacez.jpgr   z%s/%sN)nprandom	get_stateseedtfiogfilelistdirr&   r'   r(   isdirwalk_TRAIN_POINTS_PER_CLASSlen
ValueErrorformatchoiceset
differenceendswithlower	set_state)r   r!   r"   numpy_original_state
parent_dirwalk_dirdirsd	full_path_fnamestrain_fnamestest_fnamesfnames_to_emit
image_file
image_pathrecordr   r   r   _generate_examplesX   s<   
"



zCaltech101._generate_examplesN)__name__
__module____qualname____doc__r   r   VersionVERSIONr    r/   rS   r   r   r   r   r   2   s    r   )rW   
__future__r   r   r   r&   numpyr1   tensorflow.compat.v2compatv2r5   tensorflow_datasets.public_api
public_apir   r   r   r   r   r)   r;   r   GeneratorBasedBuilderr   r   r   r   r   <module>   s   