o
    #i                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZ d dlZd dlmZ ddlmZmZmZ dd	lmZ eejejeej ejf Zeejejeej f Zd
Ze jeddZ G dd deeZ!G dd de!Z"G dd de!Z#G dd de!Z$G dd de!Z%G dd de!Z&G dd de!Z'G dd de!Z(G dd de!Z)G dd de!Z*G d d! d!e!Z+dS )"    N)ABCabstractmethod)glob)Path)CallablecastOptionalUnion)Image   )	_read_pfmdownload_and_extract_archiveverify_str_arg)VisionDataset )slice_channelsc                       s   e Zd ZdZdZddeeef dee	 ddf fddZ
d	eeef dejfd
dZ	ddedee deeeee f  fddZed	edeeej eej f fddZdedeeef fddZdefddZ  ZS )StereoMatchingDatasetz+Base interface for Stereo matching datasetsFNroot
transformsreturnc                    s$   t  j|d || _g | _g | _dS )a}  
        Args:
            root(str): Root directory of the dataset.
            transforms(callable, optional): A function/transform that takes in Tuples of
                (images, disparities, valid_masks) and returns a transformed version of each of them.
                images is a Tuple of (``PIL.Image``, ``PIL.Image``)
                disparities is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (1, H, W)
                valid_masks is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (H, W)
                In some cases, when a dataset does not provide disparities, the ``disparities`` and
                ``valid_masks`` can be Tuples containing None values.
                For training splits generally the datasets provide a minimal guarantee of
                images: (``PIL.Image``, ``PIL.Image``)
                disparities: (``np.ndarray``, ``None``) with shape (1, H, W)
                Optionally, based on the dataset, it can return a ``mask`` as well:
                valid_masks: (``np.ndarray | None``, ``None``) with shape (H, W)
                For some test splits, the datasets provides outputs that look like:
                imgaes: (``PIL.Image``, ``PIL.Image``)
                disparities: (``None``, ``None``)
                Optionally, based on the dataset, it can return a ``mask`` as well:
                valid_masks: (``None``, ``None``)
        r   N)super__init__r   _images_disparities)selfr   r   	__class__r   b/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/torchvision/datasets/_stereo_matching.pyr      s   
zStereoMatchingDataset.__init__	file_pathc                 C   s"   t |}|jdkr|d}|S )NRGB)r
   openmodeconvert)r   r   imgr   r   r   	_read_img:   s   


zStereoMatchingDataset._read_imgpaths_left_patternpaths_right_patternc              
   C   s   t tt|}|rt tt|}n	t dd |D }|s%td| |s.td| t|t|krKtdt| dt| d| d| d	t d	d t||D }|S )
Nc                 s   s    | ]}d V  qd S Nr   .0_r   r   r   	<genexpr>L       z4StereoMatchingDataset._scan_pairs.<locals>.<genexpr>z0Could not find any files matching the patterns: zFound z left files but z# right files using:
 left pattern: z
right pattern: 
c                 s   s    | ]	\}}||fV  qd S r(   r   )r*   leftrightr   r   r   r,   [   s    )listsortedr   FileNotFoundErrorlen
ValueErrorzip)r   r&   r'   
left_pathsright_pathspathsr   r   r   _scan_pairs@   s$   z!StereoMatchingDataset._scan_pairsc                 C   s   d S r(   r   )r   r   r   r   r   _read_disparity^   s   z%StereoMatchingDataset._read_disparityindexc                 C   s   |  | j| d }|  | j| d }| | j| d \}}| | j| d \}}||f}||f}	||f}
| jdurG| ||	|
\}}	}
| jsP|
d durb|d |d |	d ttj|
d fS |d |d |	d fS )ao  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 3 or 4-tuple with ``(img_left, img_right, disparity, Optional[valid_mask])`` where ``valid_mask``
                can be a numpy boolean mask of shape (H, W) if the dataset provides a file
                indicating which disparity pixels are valid. The disparity is a numpy array of
                shape (1, H, W) and the images are PIL images. ``disparity`` is None for
                datasets on which for ``split="test"`` the authors did not provide annotations.
        r   r   N)	r%   r   r;   r   r   _has_built_in_disparity_maskr   npndarray)r   r<   img_left	img_rightdsp_map_leftvalid_mask_leftdsp_map_rightvalid_mask_rightimgsdsp_mapsvalid_masksr   r   r   __getitem__c   s    
$z!StereoMatchingDataset.__getitem__c                 C   s
   t | jS r(   )r4   r   )r   r   r   r   __len__   s   
zStereoMatchingDataset.__len__r(   )__name__
__module____qualname____doc__r=   r	   strr   r   r   r   r
   r%   r1   tupler:   r   r>   r?   r;   intT1T2rI   rJ   __classcell__r   r   r   r   r      s"    (	
(#r   c                       sn   e Zd ZdZddeeef dee ddf fddZ	dede
ejdf fd	d
Zdedef fddZ  ZS )CarlaStereoaz  
    Carla simulator data linked in the `CREStereo github repo <https://github.com/megvii-research/CREStereo>`_.

    The dataset is expected to have the following structure: ::

        root
            carla-highres
                trainingF
                    scene1
                        img0.png
                        img1.png
                        disp0GT.pfm
                        disp1GT.pfm
                        calib.txt
                    scene2
                        img0.png
                        img1.png
                        disp0GT.pfm
                        disp1GT.pfm
                        calib.txt
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory where `carla-highres` is located.
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    Nr   r   r   c           	         s   t  || t|d }t|d d d }t|d d d }| ||}|| _t|d d d }t|d d d }| ||}|| _d S )Nzcarla-highres	trainingF*im0.pngim1.pngdisp0GT.pfmzdisp1GT.pfmr   r   r   rO   r:   r   r   )	r   r   r   left_image_patternright_image_patternrF   left_disparity_patternright_disparity_patterndisparitiesr   r   r   r      s   
zCarlaStereo.__init__r   c                 C      t |}t|}d }||fS r(   _read_pfm_filer>   absr   r   disparity_map
valid_maskr   r   r   r;         
zCarlaStereo._read_disparityr<   c                       t tt |S a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 3-tuple with ``(img_left, img_right, disparity)``.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
            If a ``valid_mask`` is generated within the ``transforms`` parameter,
            a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
        r   rR   r   rI   r   r<   r   r   r   rI         zCarlaStereo.__getitem__r(   rK   rL   rM   rN   r	   rO   r   r   r   r   rP   r>   r?   r;   rQ   rR   rI   rT   r   r   r   r   rU      s
    (rU   c                	       z   e Zd ZdZdZddeeef dedee	 ddf fd	d
Z
dedeeej df fddZdedef fddZ  ZS )Kitti2012Stereoa
  
    KITTI dataset from the `2012 stereo evaluation benchmark <http://www.cvlibs.net/datasets/kitti/eval_stereo_flow.php>`_.
    Uses the RGB images for consistency with KITTI 2015.

    The dataset is expected to have the following structure: ::

        root
            Kitti2012
                testing
                    colored_0
                        1_10.png
                        2_10.png
                        ...
                    colored_1
                        1_10.png
                        2_10.png
                        ...
                training
                    colored_0
                        1_10.png
                        2_10.png
                        ...
                    colored_1
                        1_10.png
                        2_10.png
                        ...
                    disp_noc
                        1.png
                        2.png
                        ...
                    calib

    Args:
        root (str or ``pathlib.Path``): Root directory where `Kitti2012` is located.
        split (string, optional): The dataset split of scenes, either "train" (default) or "test".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    TtrainNr   splitr   r   c                    s   t  || t|ddd t|d |d  }t|d d }t|d d }| ||| _|d	krDt|d
 d }| |d | _d S tdd | jD | _d S )Nrr   rq   testvalid_values	Kitti2012ing	colored_0z*_10.png	colored_1rq   disp_noc*.pngc                 s       | ]}d V  qdS NNNr   r)   r   r   r   r,     r-   z+Kitti2012Stereo.__init__.<locals>.<genexpr>	r   r   r   r   rO   r:   r   r   r1   )r   r   rr   r   left_img_patternright_img_patterndisparity_patternr   r   r   r      s   zKitti2012Stereo.__init__r   c                 C   B   |d u rdS t t|d }|d d d d d f }d }||fS Nr   g      p@r>   asarrayr
   r!   re   r   r   r   r;        zKitti2012Stereo._read_disparityr<   c                    ri   a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
            ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
            generate a valid mask.
            Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test.
        rk   rl   r   r   r   rI        zKitti2012Stereo.__getitem__rq   NrK   rL   rM   rN   r=   r	   rO   r   r   r   r   rP   r>   r?   r;   rQ   rR   rI   rT   r   r   r   r   rp      s    &, rp   c                	       ro   )Kitti2015StereoaM  
    KITTI dataset from the `2015 stereo evaluation benchmark <http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php>`_.

    The dataset is expected to have the following structure: ::

        root
            Kitti2015
                testing
                    image_2
                        img1.png
                        img2.png
                        ...
                    image_3
                        img1.png
                        img2.png
                        ...
                training
                    image_2
                        img1.png
                        img2.png
                        ...
                    image_3
                        img1.png
                        img2.png
                        ...
                    disp_occ_0
                        img1.png
                        img2.png
                        ...
                    disp_occ_1
                        img1.png
                        img2.png
                        ...
                    calib

    Args:
        root (str or ``pathlib.Path``): Root directory where `Kitti2015` is located.
        split (string, optional): The dataset split of scenes, either "train" (default) or "test".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    Trq   Nr   rr   r   r   c                    s   t  || t|ddd t|d |d  }t|d d }t|d d }| ||| _|d	krLt|d
 d }t|d d }| ||| _d S tdd | jD | _d S )Nrr   rs   ru   	Kitti2015rx   image_2r|   image_3rq   
disp_occ_0
disp_occ_1c                 s   r}   r~   r   r)   r   r   r   r,   Z  r-   z+Kitti2015Stereo.__init__.<locals>.<genexpr>r   r   r   rr   r   r   r   r^   r_   r   r   r   r   K  s   zKitti2015Stereo.__init__r   c                 C   r   r   r   re   r   r   r   r;   \  r   zKitti2015Stereo._read_disparityr<   c                    ri   r   rk   rl   r   r   r   rI   g  r   zKitti2015Stereo.__getitem__r   r   r   r   r   r   r     s    ), r   c                       s   e Zd ZdZg dg dg ddZdZ					
		ddeeef dede	e de
de	e de
dd
f fddZdeeef dejf fddZdedeed eejejf f fddZdeeef dd
fddZdedef fddZ  ZS ) Middlebury2014StereoaZ	  Publicly available scenes from the Middlebury dataset `2014 version <https://vision.middlebury.edu/stereo/data/scenes2014/>`.

    The dataset mostly follows the original format, without containing the ambient subdirectories.  : ::

        root
            Middlebury2014
                train
                    scene1-{perfect,imperfect}
                        calib.txt
                        im{0,1}.png
                        im1E.png
                        im1L.png
                        disp{0,1}.pfm
                        disp{0,1}-n.png
                        disp{0,1}-sd.pfm
                        disp{0,1}y.pfm
                    scene2-{perfect,imperfect}
                        calib.txt
                        im{0,1}.png
                        im1E.png
                        im1L.png
                        disp{0,1}.pfm
                        disp{0,1}-n.png
                        disp{0,1}-sd.pfm
                        disp{0,1}y.pfm
                    ...
                additional
                    scene1-{perfect,imperfect}
                        calib.txt
                        im{0,1}.png
                        im1E.png
                        im1L.png
                        disp{0,1}.pfm
                        disp{0,1}-n.png
                        disp{0,1}-sd.pfm
                        disp{0,1}y.pfm
                    ...
                test
                    scene1
                        calib.txt
                        im{0,1}.png
                    scene2
                        calib.txt
                        im{0,1}.png
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory of the Middleburry 2014 Dataset.
        split (string, optional): The dataset split of scenes, either "train" (default), "test", or "additional"
        use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible.
            The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``.
        calibration (string, optional): Whether or not to use the calibrated (default) or uncalibrated scenes.
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
        download (boolean, optional): Whether or not to download the dataset in the ``root`` directory.
    )

Adirondack	Jadeplant
MotorcyclePianoPipesPlayroom	PlaytableRecycleShelvesVintage)BackpackBicycle1Cable
Classroom1CouchFlowersMaskShopvacSticksStorageSword1Sword2Umbrella)PlantsClassroom2E
Classroom2	AustraliaDjembeLCrusadePCrusadeHoopsBicycle2	StaircaseNewkuba
AustraliaPDjembe
LivingroomComputer)rq   
additionalrt   Trq   perfectFNr   rr   calibrationuse_ambient_viewsr   downloadr   c                    s  t  || t|ddd || _|r#t|ddd |dkr"tdn|dkr2td| d	| d
|r9| | t|d }tj	|| sOt
d| d| j|  t fddt|| D slt
d| ddgdgdgddgd| }|D ]T}d| }	t|| |	 d }
t|| |	 d }|  j| |
|7  _|dkrtdd | jD | _q|t|| |	 d }t|| |	 d }|  j| ||7  _q||| _d S )Nrr   )rq   rt   r   ru   r   )r   	imperfectbothNrt   zMSplit 'test' has only no calibration settings, please set `calibration=None`.zSplit 'zr' has calibration settings, however None was provided as an argument.
Setting calibration to 'perfect' for split 'zF'. Available calibration settings are: 'perfect', 'imperfect', 'both'.Middlebury2014zThe z7 directory was not found in the provided root directoryc                 3   s$    | ]} D ]}| |V  qqd S r(   )
startswith)r*   scenessplit_scenesr   r   r,     s    z0Middlebury2014Stereo.__init__.<locals>.<genexpr>z:Provided root folder does not contain any scenes from the z split. z-perfectz
-imperfect)Nr   r   r   rW   rX   rY   c                 s   r}   r~   r   r)   r   r   r   r,     r-   z	disp0.pfmz	disp1.pfm)r   r   r   rr   r5   _download_datasetr   ospathexistsr3   splitsanylistdirrO   r   r:   r1   r   r   )r   r   rr   r   r   r   r   calibrartion_suffixescalibration_suffixscene_patternr   r   left_dispartity_patternright_dispartity_patternr   r   r   r     sT   	


zMiddlebury2014Stereo.__init__r   c                    sp   t |ts	t|}|jdkr2| jr2|j t fdddD }ttdd |}|| t	|}t
 |S )a  
        Function that reads either the original right image or an augmented view when ``use_ambient_views`` is True.
        When ``use_ambient_views`` is True, the dataset will return at random one of ``[im1.png, im1E.png, im1L.png]``
        as the right image.
        rY   c                 3   s    | ]} | V  qd S r(   r   )r*   	view_name	base_pathr   r   r,   0  s    z1Middlebury2014Stereo._read_img.<locals>.<genexpr>)zim1E.pngzim1L.pngc                 S   s   t j| S r(   )r   r   r   )pr   r   r   <lambda>2  s    z0Middlebury2014Stereo._read_img.<locals>.<lambda>)
isinstancer   namer   parentr1   filterappendrandomchoicer   r%   )r   r   ambient_file_pathsr   r   r   r%   "  s   


zMiddlebury2014Stereo._read_imgr   c                 C   sB   |d u rdS t |}t|}d||tjk< |dkd}||fS )Nr   r   )rc   r>   rd   infsqueezere   r   r   r   r;   8  s   
z$Middlebury2014Stereo._read_disparityc                    s@  d}t  d  | j}|dkrD| j| D ]-} | }dD ]$}| d| }| d| d}||  s@t|| dt|dd	 qqd S t d  t fd
d| jd D rd}	t|	t dd t	t d D ]%\}
}}|D ]} d }t |
| }tj|dd t
t|t| qtqmt
t d  d S d S )Nz8https://vision.middlebury.edu/stereo/data/scenes2014/zipr   rt   )r   r   -/z.zipT)urlfilenamedownload_rootremove_finishedc                 3   s"    | ]}|t  d  vV  qdS )rt   N)r   r   )r*   r   r   r   r   r,   Y  s     z9Middlebury2014Stereo._download_dataset.<locals>.<genexpr>zEhttps://vision.middlebury.edu/stereo/submit3/zip/MiddEval3-data-F.zip)r   r   r   zMiddEval3/testF)exist_ok	MiddEval3)r   rr   r   r   r   rO   r   makedirsr   walkshutilmovermtree)r   r   base_url
split_namesplit_scene
split_rootr   
scene_name	scene_urltest_set_url	scene_dirscene_namesr+   r   scene_dst_dirscene_src_dirr   r   r   r   C  s@   z&Middlebury2014Stereo._download_datasetr<   c                    ri   )az  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
            ``valid_mask`` is implicitly ``None`` for `split=test`.
        r   rS   r   rI   rl   r   r   r   rI   i     z Middlebury2014Stereo.__getitem__)rq   r   FNF)rK   rL   rM   rN   r   r=   r	   rO   r   r   boolr   r   r
   r%   rP   r>   r?   r;   r   rQ   rS   rI   rT   r   r   r   r   r   w  s>    9/
 A*&r   c                       st   e Zd ZdZdZ	ddeeef dee	 ddf fddZ
d	edeejdf fd
dZdedef fddZ  ZS )	CREStereoa  Synthetic dataset used in training the `CREStereo <https://arxiv.org/pdf/2203.11483.pdf>`_ architecture.
    Dataset details on the official paper `repo <https://github.com/megvii-research/CREStereo>`_.

    The dataset is expected to have the following structure: ::

        root
            CREStereo
                tree
                    img1_left.jpg
                    img1_right.jpg
                    img1_left.disp.jpg
                    img1_right.disp.jpg
                    img2_left.jpg
                    img2_right.jpg
                    img2_left.disp.jpg
                    img2_right.disp.jpg
                    ...
                shapenet
                    img1_left.jpg
                    img1_right.jpg
                    img1_left.disp.jpg
                    img1_right.disp.jpg
                    ...
                reflective
                    img1_left.jpg
                    img1_right.jpg
                    img1_left.disp.jpg
                    img1_right.disp.jpg
                    ...
                hole
                    img1_left.jpg
                    img1_right.jpg
                    img1_left.disp.jpg
                    img1_right.disp.jpg
                    ...

    Args:
        root (str): Root directory of the dataset.
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    TNr   r   r   c                    s   t  || t|d }g d}|D ]<}t|| d }t|| d }| ||}|  j|7  _t|| d }t|| d }	| ||	}
|  j|
7  _qd S )Nr   )shapenet
reflectivetreeholez
*_left.jpgz*_right.jpgz*_left.disp.pngz*_right.disp.pngr[   )r   r   r   dirsr   r\   r]   rF   r^   r_   r`   r   r   r   r     s   zCREStereo.__init__r   c                 C   <   t jt|t jd}|d d d d d f d }d }||fS )Ndtypeg      @@r>   r   r
   r!   float32re   r   r   r   r;        zCREStereo._read_disparityr<   c                    ri   )a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
            ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
            generate a valid mask.
        rk   rl   r   r   r   rI     rm   zCREStereo.__getitem__r(   r   r   r   r   r   r   w  s    )
r   c                	       r   e Zd ZdZddeeef dedee ddf fdd	Z	d
ede
ejdf fddZdedef fddZ  ZS )FallingThingsStereoa  `FallingThings <https://research.nvidia.com/publication/2018-06_falling-things-synthetic-dataset-3d-object-detection-and-pose-estimation>`_ dataset.

    The dataset is expected to have the following structure: ::

        root
            FallingThings
                single
                    dir1
                        scene1
                            _object_settings.json
                            _camera_settings.json
                            image1.left.depth.png
                            image1.right.depth.png
                            image1.left.jpg
                            image1.right.jpg
                            image2.left.depth.png
                            image2.right.depth.png
                            image2.left.jpg
                            image2.right
                            ...
                        scene2
                    ...
                mixed
                    scene1
                        _object_settings.json
                        _camera_settings.json
                        image1.left.depth.png
                        image1.right.depth.png
                        image1.left.jpg
                        image1.right.jpg
                        image2.left.depth.png
                        image2.right.depth.png
                        image2.left.jpg
                        image2.right
                        ...
                    scene2
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory where FallingThings is located.
        variant (string): Which variant to use. Either "single", "mixed", or "both".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    singleNr   variantr   r   c                    s   t  || t|d }t|ddd dgdgddgd| }tdd tdd}|D ]H}t|| ||  d	 }t|| ||  d
 }|  j| ||7  _t|| ||  d }	t|| ||  d }
|  j| |	|
7  _q-d S )NFallingThingsr  )r  mixedr   ru   r  r  rW   )r  r  z
*.left.jpgz*.right.jpgz*.left.depth.pngz*.right.depth.pngr   r   r   r   rO   r   r:   r   )r   r   r  r   variantssplit_prefixr   r   r   r^   r_   r   r   r   r     s(   
zFallingThingsStereo.__init__r   c                 C   s   t t|}t|jd }t|8}t|}|d d d d }d\}}|| | |t j	 }	|	d d d d d f }	d }
|	|
fW  d    S 1 sNw   Y  d S )Nz_camera_settings.jsoncamera_settingsr   intrinsic_settingsfx)   d   )
r>   r   r
   r!   r   r   jsonloadastyper	  )r   r   depthcamera_settings_pathf
intrinsicsfocalbaselinepixel_constantrf   rg   r   r   r   r;     s   

$z#FallingThingsStereo._read_disparityr<   c                    ri   rj   rk   rl   r   r   r   rI   (  rm   zFallingThingsStereo.__getitem__)r  Nrn   r   r   r   r   r    s
    ,,r  c                       s|   e Zd ZdZ			ddeeef dededee d	df
 fd
dZ	ded	e
ejdf fddZded	ef fddZ  ZS )SceneFlowStereoa  Dataset interface for `Scene Flow <https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html>`_ datasets.
    This interface provides access to the `FlyingThings3D, `Monkaa` and `Driving` datasets.

    The dataset is expected to have the following structure: ::

        root
            SceneFlow
                Monkaa
                    frames_cleanpass
                        scene1
                            left
                                img1.png
                                img2.png
                            right
                                img1.png
                                img2.png
                        scene2
                            left
                                img1.png
                                img2.png
                            right
                                img1.png
                                img2.png
                    frames_finalpass
                        scene1
                            left
                                img1.png
                                img2.png
                            right
                                img1.png
                                img2.png
                        ...
                        ...
                    disparity
                        scene1
                            left
                                img1.pfm
                                img2.pfm
                            right
                                img1.pfm
                                img2.pfm
                FlyingThings3D
                    ...
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory where SceneFlow is located.
        variant (string): Which dataset variant to user, "FlyingThings3D" (default), "Monkaa" or "Driving".
        pass_name (string): Which pass to use, "clean" (default), "final" or "both".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.

    FlyingThings3DcleanNr   r  	pass_namer   r   c                    s(  t  || t|d }t|ddd t|ddd dgdgddgd| }|| }td	td	d	 d	 td	d	 d	 d
}|D ]P}t|| ||  d d }t|| ||  d d }	|  j| ||	7  _t|d ||  d d }
t|d ||  d d }|  j| |
|7  _qAd S )N	SceneFlowr  )r$  DrivingMonkaaru   r&  )r%  finalr   frames_cleanpassframes_finalpassrW   )r)  r$  r(  r/   r|   r0   	disparityz*.pfmr  )r   r   r  r&  r   passesprefix_directoriesr   r\   r]   r^   r_   r   r   r   r   m  s.   zSceneFlowStereo.__init__r   c                 C   ra   r(   rb   re   r   r   r   r;     rh   zSceneFlowStereo._read_disparityr<   c                    ri   rj   rk   rl   r   r   r   rI     rm   zSceneFlowStereo.__getitem__)r$  r%  Nrn   r   r   r   r   r#  7  s$    8
%r#  c                	       s   e Zd ZdZdZddeeef dedee	 ddf fd	d
Z
dedeeef fddZdedeed eejejf f fddZdedef fddZ  ZS )SintelStereoa  Sintel `Stereo Dataset <http://sintel.is.tue.mpg.de/stereo>`_.

    The dataset is expected to have the following structure: ::

        root
            Sintel
                training
                    final_left
                        scene1
                            img1.png
                            img2.png
                            ...
                        ...
                    final_right
                        scene2
                            img1.png
                            img2.png
                            ...
                        ...
                    disparities
                        scene1
                            img1.png
                            img2.png
                            ...
                        ...
                    occlusions
                        scene1
                            img1.png
                            img2.png
                            ...
                        ...
                    outofframe
                        scene1
                            img1.png
                            img2.png
                            ...
                        ...

    Args:
        root (str or ``pathlib.Path``): Root directory where Sintel Stereo is located.
        pass_name (string): The name of the pass to use, either "final", "clean" or "both".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    Tr*  Nr   r&  r   r   c           	         s   t  || t|ddd t|d }dgdgddgd| }|D ]B}t|d | d d	 d
 }t|d | d d	 d
 }|  j| ||7  _t|d d d	 d
 }|  j| |d 7  _q"d S )Nr&  )r*  r%  r   ru   Sintelr*  r%  training_leftrW   r|   _rightr`   )r   r   r   r   rO   r   r:   r   )	r   r   r&  r   
pass_namesr   r   r   r   r   r   r   r     s    zSintelStereo.__init__r   c                 C   s   t |}|j}|j}|jj}t|d |j | }t|d |j | }tj|s2td| dtj|s@td| d||fS )N
occlusions
outofframezOcclusion mask z does not existzOut of frame mask )r   r   r   rO   r   r   r   r3   )r   r   fpathbasenamescenedir	sampledirocclusion_pathoutofframe_pathr   r   r   _get_occlussion_mask_paths  s   z'SintelStereo._get_occlussion_mask_pathsr   c           
      C   s   |d u rdS t jt|t jd}t j|ddd\}}}|d |d  |d  }t |d	}| |\}}t t|d
k}t t|d
k}	t |	|}||fS )Nr   r     )axis   @   i @  )   r   r   r   )	r>   r   r
   r!   r	  rr   	transposer>  logical_and)
r   r   rf   rgbocclued_mask_pathout_of_frame_mask_pathrg   off_maskr   r   r   r;     s   zSintelStereo._read_disparityr<   c                    ri   )a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images whilst
            the valid_mask is a numpy array of shape (H, W).
        r   rl   r   r   r   rI     r   zSintelStereo.__getitem__)r*  N)rK   rL   rM   rN   r=   r	   rO   r   r   r   r   rP   r>  r>   r?   r;   rQ   rS   rI   rT   r   r   r   r   r0    s    ,,*r0  c                	       r  )
InStereo2ka  `InStereo2k <https://github.com/YuhuaXu/StereoDataset>`_ dataset.

    The dataset is expected to have the following structure: ::

        root
            InStereo2k
                train
                    scene1
                        left.png
                        right.png
                        left_disp.png
                        right_disp.png
                        ...
                    scene2
                    ...
                test
                    scene1
                        left.png
                        right.png
                        left_disp.png
                        right_disp.png
                        ...
                    scene2
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory where InStereo2k is located.
        split (string): Either "train" or "test".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    rq   Nr   rr   r   r   c                    s   t  || t|d | }t|ddd t|d d }t|d d }| ||| _t|d d }t|d d	 }| ||| _d S )
NrM  rr   rs   ru   rW   zleft.pngz	right.pngzleft_disp.pngzright_disp.png)r   r   r   r   rO   r:   r   r   r   r   r   r   r   A  s   zInStereo2k.__init__r   c                 C   r  )Nr  g      @r  re   r   r   r   r;   P  r
  zInStereo2k._read_disparityr<   c                    ri   rj   rk   rl   r   r   r   rI   W  rm   zInStereo2k.__getitem__r   rn   r   r   r   r   rM  !  s
    ,rM  c                	       s   e Zd ZdZdZddeeef dedee	 ddf fd	d
Z
dedeed eejejf f fddZdedef fddZ  ZS )ETH3DStereoaf  ETH3D `Low-Res Two-View <https://www.eth3d.net/datasets>`_ dataset.

    The dataset is expected to have the following structure: ::

        root
            ETH3D
                two_view_training
                    scene1
                        im1.png
                        im0.png
                        images.txt
                        cameras.txt
                        calib.txt
                    scene2
                        im1.png
                        im0.png
                        images.txt
                        cameras.txt
                        calib.txt
                    ...
                two_view_training_gt
                    scene1
                        disp0GT.pfm
                        mask0nocc.png
                    scene2
                        disp0GT.pfm
                        mask0nocc.png
                    ...
                two_view_testing
                    scene1
                        im1.png
                        im0.png
                        images.txt
                        cameras.txt
                        calib.txt
                    scene2
                        im1.png
                        im0.png
                        images.txt
                        cameras.txt
                        calib.txt
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory of the ETH3D Dataset.
        split (string, optional): The dataset split of scenes, either "train" (default) or "test".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    Trq   Nr   rr   r   r   c           	         s   t  || t|ddd t|d }|dkrdnd}d}t|| d	 d
 }t|| d	 d }| ||| _|dkrJtdd | jD | _d S t|| d	 d }| |d | _d S )Nrr   rs   ru   ETH3Drq   two_view_trainingtwo_view_testtwo_view_training_gtrW   rX   rY   rt   c                 s   r}   r~   r   r)   r   r   r   r,     r-   z'ETH3DStereo.__init__.<locals>.<genexpr>rZ   )	r   r   r   r   rO   r:   r   r1   r   )	r   r   rr   r   img_diranot_dirr   r   r   r   r   r   r     s   zETH3DStereo.__init__r   r   c                 C   sN   |d u rdS t |}t|}t|jd }t|}t|t	}||fS )Nr   zmask0nocc.png)
rc   r>   rd   r   r   r
   r!   r   r  r   )r   r   rf   	mask_pathrg   r   r   r   r;     s   

zETH3DStereo._read_disparityr<   c                    ri   r   r   rl   r   r   r   rI     r   zETH3DStereo.__getitem__r   )rK   rL   rM   rN   r=   r	   rO   r   r   r   r   rP   r>   r?   r;   rQ   rS   rI   rT   r   r   r   r   rN  f  s    1,*rN  ),	functoolsr  r   r   r   abcr   r   r   pathlibr   typingr   r   r   r	   numpyr>   PILr
   utilsr   r   r   visionr   rP   r?   rR   rS   __all__partialrc   r   rU   rp   r   r   r   r  r#  r0  rM  rN  r   r   r   r   <module>   s<    q@UX  XhpzE