o
    Ni:                     @   sL  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlZddl	Z	ddl
mZ ddlZddlm  mZ ddlmZ dZdZd	d
 dD ejjdejjjdejjdejjjdddddddddddZedg dZedg dZg dZg dZ G dd  d ej!j"Z#G d!d" d"ej!j$Z%d)d#d$Z&d%d& Z'd'd( Z(dS )*zPOpen images datasets.

https://storage.googleapis.com/openimages/web/index.html
    )absolute_import)division)print_functionN)loggingaM  Open Images is a dataset of ~9M images that have been annotated with image-level
 labels and object bounding boxes.

The training set of V4 contains 14.6M bounding boxes for 600 object classes on
1.74M images, making it the largest existing dataset with object location
annotations. The boxes have been largely manually drawn by professional
annotators to ensure accuracy and consistency. The images are very diverse and
often contain complex scenes with several objects (8.4 per image on average).
Moreover, the dataset is annotated with image-level labels spanning thousands of
classes.
a  @article{OpenImages,
  author = {Alina Kuznetsova and
            Hassan Rom and
            Neil Alldrin and
            Jasper Uijlings and
            Ivan Krasin and
            Jordi Pont-Tuset and
            Shahab Kamali and
            Stefan Popov and
            Matteo Malloci and
            Tom Duerig and
            Vittorio Ferrari},
  title = {The Open Images Dataset V4: Unified image classification,
           object detection, and visual relationship detection at scale},
  year = {2018},
  journal = {arXiv:1811.00982}
}
@article{OpenImages2,
  author = {Krasin, Ivan and
            Duerig, Tom and
            Alldrin, Neil and
            Ferrari, Vittorio
            and Abu-El-Haija, Sami and
            Kuznetsova, Alina and
            Rom, Hassan and
            Uijlings, Jasper and
            Popov, Stefan and
            Kamali, Shahab and
            Malloci, Matteo and
            Pont-Tuset, Jordi and
            Veit, Andreas and
            Belongie, Serge and
            Gomes, Victor and
            Gupta, Abhinav and
            Sun, Chen and
            Chechik, Gal and
            Cai, David and
            Feng, Zheyun and
            Narayanan, Dhyanesh and
            Murphy, Kevin},
  title = {OpenImages: A public dataset for large-scale multi-label and
           multi-class image classification.},
  journal = {Dataset available from
             https://storage.googleapis.com/openimages/web/index.html},
  year={2017}
}
c                 C   s&   g | ]}t jjd | t jjjdqS )z?http://open-images-dataset.s3.amazonaws.com/tar/train_%s.tar.gzurlextract_method)tfdsdownloadResourceExtractMethodGZIP).0i_ r   d/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/object_detection/open_images.py
<listcomp>h   s    
r   0123456789abcdefz;http://open-images-dataset.s3.amazonaws.com/tar/test.tar.gzr   zAhttp://open-images-dataset.s3.amazonaws.com/tar/validation.tar.gzz_https://storage.googleapis.com/openimages/2018_04/train/train-annotations-human-imagelabels.csvzahttps://storage.googleapis.com/openimages/2018_04/train/train-annotations-machine-imagelabels.csvz]https://storage.googleapis.com/openimages/2018_04/test/test-annotations-human-imagelabels.csvz_https://storage.googleapis.com/openimages/2018_04/test/test-annotations-machine-imagelabels.csvzihttps://storage.googleapis.com/openimages/2018_04/validation/validation-annotations-human-imagelabels.csvzkhttps://storage.googleapis.com/openimages/2018_04/validation/validation-annotations-machine-imagelabels.csvzRhttps://storage.googleapis.com/openimages/2018_04/train/train-annotations-bbox.csvzPhttps://storage.googleapis.com/openimages/2018_04/test/test-annotations-bbox.csvz\https://storage.googleapis.com/openimages/2018_04/validation/validation-annotations-bbox.csv)train_imagestest_imagesvalidation_imagestrain_human_labelstrain_machine_labelstest_human_labelstest_machine_labelsvalidation_human_labelsvalidation_machine_labelstrain-annotations-bboxtest-annotations-bboxvalidation-annotations-bboxObjectlabel
confidencesourceBboxr"   r$   bboxis_occludedis_truncatedis_group_ofis_depiction	is_inside)verificationzcrowdsource-verificationmachine)freeformxclick	activemilc                       s.   e Zd ZdZd fdd	Zedd Z  ZS )OpenImagesV4ConfigzBuilderConfig for OpenImagesV4.Nc                    s2   t jdd|d< tt| jdi | || _dS )zBuilderConfig for OpenImagesV4.

    Args:
      target_pixels: If given, rescale the images so that the number of pixels
        is roughly this value.
      **kwargs: keyword arguments forward to super.
    z2.0.0z6New split API (https://tensorflow.org/datasets/splits)versionNr   )r	   coreVersionsuperr2   __init___target_pixels)selftarget_pixelskwargs	__class__r   r   r7      s
   
zOpenImagesV4Config.__init__c                 C   s   | j S N)r8   )r9   r   r   r   r:      s   z OpenImagesV4Config.target_pixelsr>   )__name__
__module____qualname____doc__r7   propertyr:   __classcell__r   r   r<   r   r2      s
    r2   c                   @   sR   e Zd ZdZedddedddded	d
ddgZdd Zdd Z	dddZdS )OpenImagesV4zOpen Images v4.originalz0Images at their original resolution and quality.)namedescription300kz7Images have roughly 300,000 pixels, at 72 JPEG quality.i )rG   rH   r:   200kz7Images have roughly 200,000 pixels, at 72 JPEG quality.i@ c                 C   s   t jjtt d}t jjt jtj	ddd}t jjt jtj	ddd}t jjt jtj	ddd}t jj
| tt jt j t j t j|tj|dt j|tj|dt j||t j tjtjtjtjtjdd	d
tdS )N)namesobject_detectionzopen_images_classes_all.txt)
names_filez!open_images_classes_trainable.txtzopen_images_classes_boxable.txtr!   r&   imagezimage/filenameobjectsobjects_trainablebobjectsz8https://storage.googleapis.com/openimages/web/index.html)builderrH   featureshomepagecitation)r	   rT   
ClassLabelIMAGE_LEVEL_SOURCESBBOX_SOURCESr4   get_tfds_pathospathjoinDatasetInfo_DESCRIPTIONFeaturesDictImageTextSequencetfint32BBoxFeatureint8	_CITATION)r9   source_class_labelall_class_labeltrainable_class_labelboxable_class_labelr   r   r   _info   s`   zOpenImagesV4._infoc           
   	      s   | t  fdd}|ddg}|ddg}|ddg} fd	d
}|d}|d}|d}	tjjtjjt d ||dddtjjtjjt d g||ddtjjtjj	t d g||	ddgS )zReturns SplitGenerators.c                    s*   dgt |  }tt fdd| D |S )Nr   c                    s   g | ]} | qS r   r   )r   rG   pathsr   r   r          z@OpenImagesV4._split_generators.<locals>.load.<locals>.<listcomp>)len	functoolspartial_load_objects)rK   csv_positionsrn   r   r   load   s   z,OpenImagesV4._split_generators.<locals>.loadr   r   r   r   r   r   c                    s   dg}t t |  |S )Nr   )rr   rs   _load_bboxes)rG   ru   rn   r   r   
load_boxes   s   z2OpenImagesV4._split_generators.<locals>.load_boxesr   r   r   r   r   )archive_pathsobjects_getterbboxes_getterprefixes)rG   
gen_kwargsr   )ry   rz   r{   r   )
download_and_extract_URLSr	   r4   SplitGeneratorSplitTRAINdictTEST
VALIDATION)
r9   
dl_managerrv   train_objectstest_objectsvalidation_objectsrx   
train_bbox	test_bboxvalidation_bboxr   rn   r   _split_generators   sD   


zOpenImagesV4._split_generatorsNc                 #   s   t | jjd d j t|D ]n\}}|r|| nd}||}||}	td| tj|tjj	j
}
|
D ]G\}}tj|}ttj|d d}dd ||g D }d	d |	|g D } fd
d|D }t|| jjd||||d}||fV  q6qdS )zYields examples.rQ   r"   NzOpening archive %s ...r      c                 S      g | ]}|  qS r   _asdictr   objr   r   r   r     rp   z3OpenImagesV4._generate_examples.<locals>.<listcomp>c                 S   r   r   r   )r   r'   r   r   r   r     rp   c                    s   g | ]
}|d   v r|qS )r"   r   r   trainable_classesr   r   r     s    )r:   rN   )setinforT   rK   	enumerater   r	   r
   iter_archiver   
TAR_STREAMr[   r\   basenameintsplitextget_resize_image_if_necessarybuilder_configr:   )r9   ry   rz   r{   r|   iarchive_pathprefixrP   bboxesarchivefpathfobjfnameimage_idimage_objectsimage_bboxesimage_objects_trainablerecordr   r   r   _generate_examples
  s>   

zOpenImagesV4._generate_examplesr>   )	r?   r@   rA   rB   r2   BUILDER_CONFIGSrm   r   r   r   r   r   r   rE      s*    .)rE   c           
      C   s   |du r| S t jjj}|jtj|  tjddd}|j	\}}}|| }||kr8t
|| }|j|d||d}|d|t|jdg\}}	t|	 S )zResize an image to have (roughly) the given number of target pixels.

  Args:
    image_fobj: File object containing the original image.
    target_pixels: If given, number of pixels that the image must have.

  Returns:
    A file object.
  N)dtype   )flags)dsizefxfyz.jpgH   )r	   r4   lazy_importscv2imdecodenp
fromstringreaduint8shapesqrtresizeimencoder   IMWRITE_JPEG_QUALITYioBytesIOtostring)

image_fobjr:   r   rO   heightwidth_actual_pixelsfactorbuffr   r   r   r   )  s   

r   c              	   C   s   t d| || tt}t| D ]c\}}tjj	|P}|| dkr+|
||  n|  t|}|D ].\}}	}
}|rF|d |krF n| ||< t|d}t|
tt|d |	}|| | q6W d   n1 sow   Y  qt|S )z.Returns objects listed within given CSV files.0Loading CSVs %s from positions %s with prefix %sr   r   
   N)r   r   collectionsdefaultdictlistr   rd   r   gfileGFileseekreadlinecsvreadertellr   _Objectfloatappendr   )	csv_pathsru   r   rP   r   labels_pathcsv_fr   r   r$   r"   r#   current_objr   r   r   rt   E  s*   


rt   c                 C   s>  t d| || tt}tjj| }|d dkr#|	|d  n|
  t|}|D ]P\}}}}	}
}}}}}}}}|rG|d |krG nB| |d< t|d}~	t||tjt|t|
t|t|t|t|t|t|t|}|| | q.W d   t|S W d   t|S 1 sw   Y  t|S )z3Returns bounded boxes listed within given CSV file.r   r   r   N)r   r   r   r   r   rd   r   r   r   r   r   r   r   r   r   _Bboxr	   rT   BBoxr   r   r   )csv_pathru   r   boxesr   r   r   r$   r"   r#   xminxmaxyminymaxr(   r)   r*   r+   r,   current_rowr   r   r   rw   [  sB   






rw   r>   ))rB   
__future__r   r   r   r   r   rr   r   r[   abslr   numpyr   tensorflow.compat.v2compatv2rd   tensorflow_datasets.public_api
public_apir	   r_   rh   r
   r   r   r   r   
namedtupler   r   rX   rY   r4   BuilderConfigr2   GeneratorBasedBuilderrE   r   rt   rw   r   r   r   r   <module>   s\   6 
