o
    }oi.                     @   s~  d dl Z d dlZ d dlmZmZmZmZmZmZm	Z	 d dl
Zd dlmZ d dlmZ z
d dlmZ dZW n eefyA   dZY nw ded	eed
f defddZdedefddZ		d#dedeeef ded	eeed
f  deeegef  deeeef  fddZerG dd deZnG dd dZdZdedejfddZdedefddZdedefdd Z G d!d" d"eZ!dS )$    N)AnyCallableDictListOptionalTuplecast)Image)logging)VisionDatasetTFfilename
extensions.returnc                 C   s   |   |S )zChecks if a file is an allowed extension.
    Args:
        filename (string): path to a file
        extensions (tuple of strings): extensions to consider (lowercase)
    Returns:
        bool: True if the filename ends with one of given extensions
    )lowerendswith)r   r    r   f/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/vision/data/megatron/image_folder.pyhas_file_allowed_extension!   s   r   c                 C   s
   t | tS )zChecks if a file is an allowed image extension.
    Args:
        filename (string): path to a file
    Returns:
        bool: True if the filename ends with a known image extension
    )r   IMG_EXTENSIONS)r   r   r   r   is_image_file,   s   
r   	directoryclass_to_idxdata_per_class_fractionis_valid_filec                    s"  g }t j| }  du o|du } duo|du}|s|r td dur/dtdtf fdd}tttgtf |}t|	 D ]O}|| }	t j
| |}
t j|
sSq?g }tt j|
ddD ] \}}}t|D ]}t j
||}||r}||	f}|| qgq^||d	tt||   q?|S )
a   Generates a list of samples of a form (path_to_sample, class).
    Args:
        directory (str): root dataset directory
        class_to_idx (Dict[str, int]): dictionary mapping class name to class index
        extensions (optional): A list of allowed extensions.
            Either extensions or is_valid_file should be passed. Defaults to None.
        is_valid_file (optional): A function that takes path of a file
            and checks if the file is a valid file
            (used to check of corrupt files) both extensions and
            is_valid_file should not be passed. Defaults to None.
    Raises:
        ValueError: In case ``extensions`` and ``is_valid_file`` are None or both are not None.
    Returns:
        List[Tuple[str, int]]: samples of a form (path_to_sample, class)
    NzMBoth extensions and is_valid_file cannot be None or not None at the same timexr   c                    s   t | tttdf  S )N.)r   r   r   str)r   r   r   r   r   T   s   z#make_dataset.<locals>.is_valid_fileT)followlinksr   )ospath
expanduser
ValueErrorr   boolr   r   sortedkeysjoinisdirwalkappendextendintlen)r   r   r   r   r   	instances	both_noneboth_somethingtarget_classclass_index
target_dirlocal_instancesroot_fnamesfnamer   itemr   r   r   make_dataset6   s2   
 r8   c                       s   e Zd ZdZ						ddedeegef deeedf  dee d	ee d
eeege	f  ddf fddZ
e		ddedeeef dedeeedf  d
eeege	f  deeeef  fddZdedeee eeef f fddZdedeeef fddZdefddZ  ZS )DatasetFoldera  A generic data loader where the samples are arranged in this way: ::
            root/class_x/xxx.ext
            root/class_x/xxy.ext
            root/class_x/[...]/xxz.ext
            root/class_y/123.ext
            root/class_y/nsdf3.ext
            root/class_y/[...]/asd932_.ext
        Args:
            root (string): Root directory path.
            loader (callable): A function to load a sample given its path.
            extensions (tuple[string]): A list of allowed extensions.
                both extensions and is_valid_file should not be passed.
            transform (callable, optional): A function/transform that takes in
                a sample and returns a transformed version.
                E.g, ``transforms.RandomCrop`` for images.
            target_transform (callable, optional): A function/transform that takes
                in the target and transforms it.
            is_valid_file (callable, optional): A function that takes path of a file
                and check if the file is a valid file (used to check of corrupt files)
                both extensions and is_valid_file should not be passed.
         Attributes:
            classes (list): List of the class names sorted alphabetically.
            class_to_idx (dict): Dict with items (class_name, class_index).
            samples (list): List of (sample path, class_index) tuples
            targets (list): The class_index value for each image in the dataset
        N      ?r3   loaderr   .	transformtarget_transformr   r   c	                    s   t t| j|||d || _|| _| | j\}	}
| | j|
| j||}t|dkrBd	| j}|d ur>|d	d
|7 }t||| _|| _t|| _|	| _|
| _|| _dd |D | _d S )N)r<   r=   r   z#Found 0 files in subfolders of: {}
zSupported extensions are: {},c                 S   s   g | ]}|d  qS )   r   ).0sr   r   r   
<listcomp>   s    z*DatasetFolder.__init__.<locals>.<listcomp>)superr9   __init__classes_fractionr   _find_classesr3   r8   r+   formatr%   RuntimeErrorr;   r   totalclassesr   samplestargets)selfr3   r;   r   r<   r=   rE   r   r   rJ   r   rK   msg	__class__r   r   rD      s&   
DatasetFolder.__init__r   r   r   c                 C   s   t | ||||dS )N)r   r   )r8   )r   r   r   r   r   r   r   r   r8      s   
zDatasetFolder.make_datasetdirc                 C   sP   dd t |D }|dtt|| j  }|  dd t|D }||fS )a[  
            Finds the class folders in a dataset.
            Args:
                dir (string): Root directory path.
            Returns:
                tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary.
            Ensures:
                No class is a subdirectory of another.
            c                 S   s   g | ]	}|  r|jqS r   )is_dirname)r@   dr   r   r   rB      s    z/DatasetFolder._find_classes.<locals>.<listcomp>r   c                 S   s   i | ]\}}||qS r   r   )r@   icls_namer   r   r   
<dictcomp>   s    z/DatasetFolder._find_classes.<locals>.<dictcomp>)r   scandirr*   r+   rE   sort	enumerate)rM   rR   all_classesrJ   r   r   r   r   rF      s
   
zDatasetFolder._find_classesindexc                 C   s   |}t | jD ]+}z| j| \}}| |}W  n ty2 } ztjd| j}W Y d}~qd}~ww | jdur=| |}| j	durG| 	|}||fS )z
            Args:
                index (int): Index
            Returns:
                tuple: (sample, target) where target is class_index of the target class.
            r   N)
rangerI   rK   r;   	Exceptionnprandomrandintr<   r=   )rM   r]   
curr_indexr   r   targetsampleer   r   r   __getitem__   s   




zDatasetFolder.__getitem__c                 C   s
   t | jS )N)r+   rK   rM   r   r   r   __len__   s   
zDatasetFolder.__len__)NNNr:   r:   NNN)__name__
__module____qualname____doc__r   r   r   r   r   r"   rD   staticmethodr   r*   floatr   r8   rF   rg   ri   __classcell__r   r   rO   r   r9   l   sT    	
 
&r9   c                       s   e Zd Z fddZ  ZS )r9   c                    s   t    td d S )Nz#Torchvision not found but required.)rC   rD   r
   errorrh   rO   r   r   rD      s   
rQ   )rk   rl   rm   rD   rq   r   r   rO   r   r9      s    )	z.jpgz.jpegz.pngz.ppmz.bmpz.pgmz.tifz.tiffz.webpr   c                 C   sB   t | d}t |}|dW  d    S 1 sw   Y  d S )NrbRGB)openr	   convert)r   fimgr   r   r   
pil_loader   s   
$ry   c                 C   s0   dd l }z|| W S  ty   t|  Y S w )Nr   )accimager	   IOErrorry   )r   rz   r   r   r   accimage_loader   s   r|   c                 C   s&   ddl m} | dkrt| S t| S )Nr   )get_image_backendrz   )torchvisionr}   r|   ry   )r   r}   r   r   r   default_loader   s   
r   c                       sd   e Zd ZdZddddedfdedee dee deegef deeege	f  f
 fd	d
Z
  ZS )ImageFoldera#  A generic data loader where the images are arranged in this way: ::
        root/dog/xxx.png
        root/dog/xxy.png
        root/dog/[...]/xxz.png
        root/cat/123.png
        root/cat/nsdf3.png
        root/cat/[...]/asd932_.png
    Args:
        root (string): Root directory path.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        loader (callable, optional): A function to load an image given its path.
        is_valid_file (callable, optional): A function that takes path of an Image file
            and check if the file is a valid file (used to check of corrupt files)
     Attributes:
        classes (list): List of the class names sorted alphabetically.
        class_to_idx (dict): Dict with items (class_name, class_index).
        imgs (list): List of (image path, class_index) tuples
    Nr:   r3   r<   r=   r;   r   c              
      s8   t t| j|||d u rtnd |||||d | j| _d S )N)r<   r=   rE   r   r   )rC   r   rD   r   rK   imgs)rM   r3   r<   r=   rE   r   r;   r   rO   r   r   rD     s   


zImageFolder.__init__)rk   rl   rm   rn   r   r   r   r   r   r"   rD   rq   r   r   rO   r   r     s&    r   rj   )"r   os.pathtypingr   r   r   r   r   r   r   numpyr`   PILr	   
nemo.utilsr
   torchvision.datasetsr   TORCHVISION_AVAILABLEImportErrorModuleNotFoundErrorr   r"   r   r   r*   rp   r8   r9   r   ry   r|   r   r   r   r   r   r   <module>   sJ   $

4u
	