o
    0i'                     @   s   d dl Z d dlZ d dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlmZ ddlmZmZ ddlmZ g dZd	d
dddddZdddddddZG dd deZdS )    N)Path)AnyCallableDictListOptionalTupleUnion)Image   )download_and_extract_archiveverify_str_arg)VisionDataset)kingdomphylumclassorderfamilygenuszRhttps://ml-inat-competition-datasets.s3.amazonaws.com/2017/train_val_images.tar.gzzOhttps://ml-inat-competition-datasets.s3.amazonaws.com/2018/train_val2018.tar.gzzOhttps://ml-inat-competition-datasets.s3.amazonaws.com/2019/train_val2019.tar.gzzGhttps://ml-inat-competition-datasets.s3.amazonaws.com/2021/train.tar.gzzLhttps://ml-inat-competition-datasets.s3.amazonaws.com/2021/train_mini.tar.gzzEhttps://ml-inat-competition-datasets.s3.amazonaws.com/2021/val.tar.gz)201720182019
2021_train2021_train_mini
2021_valid 7c784ea5e424efaec655bd392f87301f b1c6952ce38f31868cc50ea72d066cc3 c60a6e2962c9b8ccbd458d12c8582644 e0526d53c7f7b2e3167b2b43bb2690ed db6ed8330e634445efc8fec83ae81442 f6f6e0e242e3d4c9569ba56400938afcc                       s   e Zd ZdZ					d deeef dedeee ef d	ee	 d
ee	 de
ddf fddZd!ddZd!ddZdedeeef fddZdefddZdededefddZde
fddZd!ddZ  ZS )"INaturalistaV  `iNaturalist <https://github.com/visipedia/inat_comp>`_ Dataset.

    Args:
        root (str or ``pathlib.Path``): Root directory of dataset where the image files are stored.
            This class does not require/use annotation files.
        version (string, optional): Which version of the dataset to download/use. One of
            '2017', '2018', '2019', '2021_train', '2021_train_mini', '2021_valid'.
            Default: `2021_train`.
        target_type (string or list, optional): Type of target to use, for 2021 versions, one of:

            - ``full``: the full category (species)
            - ``kingdom``: e.g. "Animalia"
            - ``phylum``: e.g. "Arthropoda"
            - ``class``: e.g. "Insecta"
            - ``order``: e.g. "Coleoptera"
            - ``family``: e.g. "Cleridae"
            - ``genus``: e.g. "Trichodes"

            for 2017-2019 versions, one of:

            - ``full``: the full (numeric) category
            - ``super``: the super category, e.g. "Amphibians"

            Can also be a list to output a tuple with all specified target types.
            Defaults to ``full``.
        transform (callable, optional): A function/transform that takes in a PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    r   fullNFrootversiontarget_type	transformtarget_transformdownloadreturnc                    s  t |dt | _t jtj||||d tj	|dd |r$| 
  |  s,tdg | _i | _g | _t|ts=|g}| jd d dkrSdd	 |D | _|   nd
d	 |D | _|   g | _t| jD ]\}}ttj| j|}	|	D ]
}
| j||
f qxqgd S )Nr$   )r&   r'   T)exist_okzHDataset not found or corrupted. You can use download=True to download it   2021c                 S   s    g | ]}t |d dgtR qS )r%   r"   )r   CATEGORIES_2021.0t r1   ^/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torchvision/datasets/inaturalist.py
<listcomp>b   s     z(INaturalist.__init__.<locals>.<listcomp>c                 S   s   g | ]}t |d dqS )r%   )r"   super)r   r.   r1   r1   r2   r3   e   s    )r   DATASET_URLSkeysr$   r4   __init__ospathjoinmakedirsr(   _check_integrityRuntimeErrorall_categoriescategories_indexcategories_map
isinstancelistr%   
_init_2021_init_pre2021index	enumeratelistdirr#   append)selfr#   r$   r%   r&   r'   r(   	dir_indexdir_namefilesfname	__class__r1   r2   r7   C   s0   	

zINaturalist.__init__c                 C   s   t t| j| _dd tD | _t| jD ]c\}}|d}t	|dkr-t
d| d|d |dkrBt
d	|d  d
|di }tt|dd D ]%\}}|| j| v r`| j| | }nt	| j| }|| j| |< |||< qM| j| qdS )zInitialize based on 2021 layoutc                 S   s   i | ]}|i qS r1   r1   )r/   kr1   r1   r2   
<dictcomp>v   s    z*INaturalist._init_2021.<locals>.<dictcomp>_   zUnexpected category name z, wrong number of piecesr   05dzUnexpected category id z, expecting r      N)sortedr8   rG   r#   r>   r-   r?   rF   splitlenr=   zipr@   rH   )rI   rJ   rK   piecescat_mapcatnamecat_idr1   r1   r2   rC   p   s"   

zINaturalist._init_2021c              
   C   sT  di i| _ d}tt| j}t|D ]\}}|| j d |< tttj| j|}|D ]f}| jdkr;|}|d7 }nzt	|}W n t
yO   td| w |t| jkrvt| j}| ji g|| d   | jdg|| d   | j| rtd| d|i| j|< tj||| j|< q-qt| jD ]\}	}
|
std|	 qd	S )
z$Initialize based on 2017-2019 layoutr4   r   r   r   z!Unexpected non-numeric dir name:  zDuplicate category zMissing category N)r?   rV   r8   rG   r#   rF   r9   r:   r$   int
ValueErrorr=   rX   r@   extendr>   )rI   	cat_indexsuper_categoriessindexscatsubcategoriessubcatsubcat_iold_lencindexcr1   r1   r2   rD      s:   




zINaturalist._init_pre2021rE   c                 C   s   | j | \}}ttj| j| j| |}g }| jD ]}|dkr'|	| q|	| j
| |  qt|dkr<t|n|d }| jdurJ| |}| jdurT| |}||fS )z
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where the type of target specified by target_type.
        r"   r   r   N)rE   r
   openr8   r9   r:   r#   r>   r%   rH   r@   rX   tupler&   r'   )rI   rE   r^   rM   imgtargetr0   r1   r1   r2   __getitem__   s   	




zINaturalist.__getitem__c                 C   s
   t | jS )N)rX   rE   rI   r1   r1   r2   __len__   s   
zINaturalist.__len__category_typecategory_idc                 C   sh   |dkr	| j | S || jvrtd| d| j|  D ]\}}||kr)|  S qtd| d| )a  
        Args:
            category_type(str): one of "full", "kingdom", "phylum", "class", "order", "family", "genus" or "super"
            category_id(int): an index (class id) from this category

        Returns:
            the name of the category
        r"   zInvalid category type ''zInvalid category id z for )r>   r?   ra   items)rI   rt   ru   r]   idr1   r1   r2   category_name   s   	

zINaturalist.category_namec                 C   s"   t j| jott | jdkS )Nr   )r8   r9   existsr#   rX   rG   rr   r1   r1   r2   r<      s   "zINaturalist._check_integrityc                 C   s   |   rtd| j dtj| j}tt| j || j dt	| j d tj
|tjt| j d}tj|sDtd| t|| j td| j d d S )	NzThe directory z[ already exists. If you want to re-download or re-extract the images, delete the directory.z.tgz)filenamemd5z.tar.gzz#Unable to find downloaded files at zDataset version 'z*' has been downloaded and prepared for use)r<   r=   r#   r8   r9   dirnamer   r5   r$   DATASET_MD5r:   basenamerstriprz   renameprint)rI   	base_rootorig_dir_namer1   r1   r2   r(      s   "zINaturalist.download)r   r"   NNF)r)   N)__name__
__module____qualname____doc__r	   strr   r   r   r   boolr7   rC   rD   r`   r   r   rq   rs   ry   r<   r(   __classcell__r1   r1   rN   r2   r!       s:    %

-
#r!   )r8   os.pathpathlibr   typingr   r   r   r   r   r   r	   PILr
   utilsr   r   visionr   r-   r5   r~   r!   r1   r1   r1   r2   <module>   s.    $

