o
    Ni                     @   sX   d Z ddlmZ ddlmZ ddlmZ ddlmZ dZdZ	dZ
G d	d
 d
ejjZdS )zPatchCamelyon images dataset.    )absolute_import)division)print_functionNa  The PatchCamelyon benchmark is a new and challenging image classification
dataset. It consists of 327.680 color images (96 x 96px) extracted from
histopathologic scans of lymph node sections. Each image is annoted with a
binary label indicating presence of metastatic tissue. PCam provides a new
benchmark for machine learning models: bigger than CIFAR10, smaller than
Imagenet, trainable on a single GPU.
ap  @misc{b_s_veeling_j_linmans_j_winkens_t_cohen_2018_2546921,
  author       = {B. S. Veeling, J. Linmans, J. Winkens, T. Cohen, M. Welling},
  title        = {Rotation Equivariant CNNs for Digital Pathology},
  month        = sep,
  year         = 2018,
  doi          = {10.1007/978-3-030-00934-2_24},
  url          = {https://doi.org/10.1007/978-3-030-00934-2_24}
}
z*https://patchcamelyon.grand-challenge.org/c                   @   s6   e Zd ZdZejddZdd Zdd Z	dd	 Z
d
S )PatchCamelyonzPatchCamelyon.z2.0.0z6New split API (https://tensorflow.org/datasets/splits)c              
   C   sD   t jj| tt jt j t jjdddt jjddddt	t
dS )	N)`   r      png)shapeencoding_format   )num_classesidimagelabel)r   r   )builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDictTextImage
ClassLabel_URL	_CITATION)self r"   k/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/image_classification/patch_camelyon.py_info4   s   zPatchCamelyon._infoc                 C   s   d}|d |d |d |d |d |d d}| |}tjjtjjtd	|d
dtjjtjjtd|d
dtjjtjjtd|d
dgS )Nz(https://zenodo.org/record/2546921/files/z(camelyonpatch_level_2_split_test_x.h5.gzz(camelyonpatch_level_2_split_test_y.h5.gzz)camelyonpatch_level_2_split_train_x.h5.gzz)camelyonpatch_level_2_split_train_y.h5.gzz)camelyonpatch_level_2_split_valid_x.h5.gzz)camelyonpatch_level_2_split_valid_y.h5.gz)test_xtest_ytrain_xtrain_yvalid_xvalid_ytest)splitpaths)name
gen_kwargstrainvalid)	download_and_extractr   r   SplitGeneratorSplitTESTdictTRAIN
VALIDATION)r!   
dl_managerbase_url	resourcesr-   r"   r"   r#   _split_generatorsD   s,   



zPatchCamelyon._split_generatorsc              	   c   s    t jjj}||d  }||d  }||dT}||d5}|d }|d }	tt||	D ]\}
\}}| d }d||
f }|||d}||fV  q/W d	   n1 sWw   Y  W d	   d	S W d	   d	S 1 sow   Y  d	S )
a,  Generates images and labels given the image directory path.

    Args:
      split: name of the split to generate examples for (test, train, valid).
      paths: dictionary with the paths to the h5 files for each split.

    Yields:
      A dictionary with the image and the corresponding label.
    _x_yrxyr   z%s_%dr   N)r   r   lazy_importsh5pyFile	enumeratezipflatten)r!   r,   r-   rC   
filepath_x
filepath_yf_xf_yimageslabelsir   r   id_recordr"   r"   r#   _generate_examples[   s   

Pz PatchCamelyon._generate_examplesN)__name__
__module____qualname____doc__r   r   VersionVERSIONr$   r<   rQ   r"   r"   r"   r#   r   .   s    r   )rU   
__future__r   r   r   tensorflow_datasets.public_api
public_apir   r   r    r   r   GeneratorBasedBuilderr   r"   r"   r"   r#   <module>   s   
