o
    0iL                     @   sl  d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ ddlmZ d	d
lmZmZ d	dlmZ eejejeej eej f Zeejejeej f ZdZG dd deeZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#de$dejfddZ%de$deejejf fddZ&dS )    N)ABCabstractmethod)globPath)CallableListOptionalTupleUnion)Image   )_read_png_16   )	_read_pfmverify_str_arg)VisionDataset)	KittiFlowSintelFlyingThings3DFlyingChairsHD1Kc                       s   e Zd ZdZddeeef dee ddf fddZ	dede
j
fd	d
ZedefddZdedeeef fddZdefddZdedejjjfddZ  ZS )FlowDatasetFNroot
transformsreturnc                    s$   t  j|d || _g | _g | _d S )N)r   )super__init__r   
_flow_list_image_list)selfr   r   	__class__ `/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torchvision/datasets/_optical_flow.pyr   #   s   
zFlowDataset.__init__	file_namec                 C   s"   t |}|jdkr|d}|S )NRGB)r   openmodeconvert)r    r%   imgr#   r#   r$   	_read_img+   s   


zFlowDataset._read_imgc                 C   s   d S Nr#   r    r%   r#   r#   r$   
_read_flow1   s   zFlowDataset._read_flowindexc                 C   s   |  | j| d }|  | j| d }| jr*| | j| }| jr'|\}}nd }nd  }}| jd ur?| ||||\}}}}| jsF|d urL||||fS |||fS )Nr   r   )r+   r   r   r.   _has_builtin_flow_maskr   )r    r/   img1img2flowvalid_flow_maskr#   r#   r$   __getitem__6   s   


zFlowDataset.__getitem__c                 C   s
   t | jS r,   )lenr   )r    r#   r#   r$   __len__M   s   
zFlowDataset.__len__vc                 C   s   t jj| g| S r,   )torchutilsdataConcatDataset)r    r8   r#   r#   r$   __rmul__P   s   zFlowDataset.__rmul__r,   )__name__
__module____qualname__r0   r   strr   r	   r   r   r   r+   r   r.   intT1T2r5   r7   r9   r:   r;   r<   r=   __classcell__r#   r#   r!   r$   r      s    ( r   c                       s|   e Zd ZdZ			ddeeef dededee d	df
 fd
dZ	de
d	eeef f fddZded	ejfddZ  ZS )r   a  `Sintel <http://sintel.is.tue.mpg.de/>`_ Dataset for optical flow.

    The dataset is expected to have the following structure: ::

        root
            Sintel
                testing
                    clean
                        scene_1
                        scene_2
                        ...
                    final
                        scene_1
                        scene_2
                        ...
                training
                    clean
                        scene_1
                        scene_2
                        ...
                    final
                        scene_1
                        scene_2
                        ...
                    flow
                        scene_1
                        scene_2
                        ...

    Args:
        root (str or ``pathlib.Path``): Root directory of the Sintel Dataset.
        split (string, optional): The dataset split, either "train" (default) or "test"
        pass_name (string, optional): The pass to use, either "clean" (default), "final", or "both". See link above for
            details on the different passes.
        transforms (callable, optional): A function/transform that takes in
            ``img1, img2, flow, valid_flow_mask`` and returns a transformed version.
            ``valid_flow_mask`` is expected for consistency with other datasets which
            return a built-in valid mask, such as :class:`~torchvision.datasets.KittiFlow`.
    traincleanNr   split	pass_namer   r   c              	      s  t  j||d t|ddd t|ddd |dkrdd	gn|g}t|d
 }|d d }|D ]T}|dkr7dn|}|| | }t|D ]>}	ttt||	 d }
t	t
|
d D ]}|  j|
| |
|d  gg7  _qZ|dkr|  jttt||	 d 7  _qDq/d S )Nr   r   rH   rF   testvalid_valuesrI   rG   finalbothrQ   rG   rP   r   trainingr3   rF   *.pngr   *.flo)r   r   r   r   oslistdirsortedr   rA   ranger6   r   r   )r    r   rH   rI   r   passes	flow_root	split_dir
image_rootscene
image_listir!   r#   r$   r   }   s$   ""zSintel.__init__r/   c                       t  |S a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 3-tuple with ``(img1, img2, flow)``.
            The flow is a numpy array of shape (2, H, W) and the images are PIL images.
            ``flow`` is None if ``split="test"``.
            If a valid flow mask is generated within the ``transforms`` parameter,
            a 4-tuple with ``(img1, img2, flow, valid_flow_mask)`` is returned.
        r   r5   r    r/   r!   r#   r$   r5         zSintel.__getitem__r%   c                 C      t |S r,   	_read_flor-   r#   r#   r$   r.         zSintel._read_flow)rF   rG   Nr>   r?   r@   __doc__r   rA   r   r	   r   r   rB   rC   rD   r5   npndarrayr.   rE   r#   r#   r!   r$   r   T   s$    +
r   c                	       s   e Zd ZdZdZddeeef dedee	 ddf fd	d
Z
dedeeef f fddZdedeejejf fddZ  ZS )r   a  `KITTI <http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=flow>`__ dataset for optical flow (2015).

    The dataset is expected to have the following structure: ::

        root
            KittiFlow
                testing
                    image_2
                training
                    image_2
                    flow_occ

    Args:
        root (str or ``pathlib.Path``): Root directory of the KittiFlow Dataset.
        split (string, optional): The dataset split, either "train" (default) or "test"
        transforms (callable, optional): A function/transform that takes in
            ``img1, img2, flow, valid_flow_mask`` and returns a transformed version.
    TrF   Nr   rH   r   r   c                    s   t  j||d t|ddd t|d |d  }ttt|d d }ttt|d d	 }|r5|s9td
t||D ]\}}|  j	||gg7  _	q>|dkr`ttt|d d | _
d S d S )NrJ   rH   rK   rM   r   ingimage_2z*_10.pngz*_11.pngzZCould not find the Kitti flow images. Please make sure the directory structure is correct.rF   flow_occ)r   r   r   r   rW   r   rA   FileNotFoundErrorzipr   r   )r    r   rH   r   images1images2r1   r2   r!   r#   r$   r      s   zKittiFlow.__init__r/   c                    r`   )a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img1, img2, flow, valid_flow_mask)``
            where ``valid_flow_mask`` is a numpy boolean mask of shape (H, W)
            indicating which flow values are valid. The flow is a numpy array of
            shape (2, H, W) and the images are PIL images. ``flow`` and ``valid_flow_mask`` are None if
            ``split="test"``.
        rb   rc   r!   r#   r$   r5      rd   zKittiFlow.__getitem__r%   c                 C   re   r,   )_read_16bits_png_with_flow_and_valid_maskr-   r#   r#   r$   r.      rh   zKittiFlow._read_flowrF   N)r>   r?   r@   rj   r0   r   rA   r   r	   r   r   rB   rC   rD   r5   r
   rk   rl   r.   rE   r#   r#   r!   r$   r      s    ,&r   c                	       sr   e Zd ZdZddeeef dedee ddf fdd	Z	d
e
deeef f fddZdedejfddZ  ZS )r   a  `FlyingChairs <https://lmb.informatik.uni-freiburg.de/resources/datasets/FlyingChairs.en.html#flyingchairs>`_ Dataset for optical flow.

    You will also need to download the FlyingChairs_train_val.txt file from the dataset page.

    The dataset is expected to have the following structure: ::

        root
            FlyingChairs
                data
                    00001_flow.flo
                    00001_img1.ppm
                    00001_img2.ppm
                    ...
                FlyingChairs_train_val.txt


    Args:
        root (str or ``pathlib.Path``): Root directory of the FlyingChairs Dataset.
        split (string, optional): The dataset split, either "train" (default) or "val"
        transforms (callable, optional): A function/transform that takes in
            ``img1, img2, flow, valid_flow_mask`` and returns a transformed version.
            ``valid_flow_mask`` is expected for consistency with other datasets which
            return a built-in valid mask, such as :class:`~torchvision.datasets.KittiFlow`.
    rF   Nr   rH   r   r   c           
         s  t  j||d t|ddd t|d }ttt|d d }ttt|d d }d	}tj	|| s;t
d
tjt|| tjd}tt|D ]4}|| }	|dkr[|	dksc|dkr|	dkr|  j|| g7  _|  j|d|  |d| d  gg7  _qMd S )NrJ   rH   )rF   valrM   r   r;   z*.ppmrT   zFlyingChairs_train_val.txtzmThe FlyingChairs_train_val.txt file was not found - please download it from the dataset page (see docstring).)dtyperF   r   rw   r   )r   r   r   r   rW   r   rA   rU   pathexistsrp   rk   loadtxtint32rX   r6   r   r   )
r    r   rH   r   imagesflowssplit_file_name
split_listr_   split_idr!   r#   r$   r     s$    (zFlyingChairs.__init__r/   c                    r`   )a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 3-tuple with ``(img1, img2, flow)``.
            The flow is a numpy array of shape (2, H, W) and the images are PIL images.
            ``flow`` is None if ``split="val"``.
            If a valid flow mask is generated within the ``transforms`` parameter,
            a 4-tuple with ``(img1, img2, flow, valid_flow_mask)`` is returned.
        rb   rc   r!   r#   r$   r5     rd   zFlyingChairs.__getitem__r%   c                 C   re   r,   rf   r-   r#   r#   r$   r.   (  rh   zFlyingChairs._read_flowrv   ri   r#   r#   r!   r$   r      s
    ,r   c                       s   e Zd ZdZ				ddeeef deded	ed
ee ddf fddZ	de
deeef f fddZdedejfddZ  ZS )r   a  `FlyingThings3D <https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html>`_ dataset for optical flow.

    The dataset is expected to have the following structure: ::

        root
            FlyingThings3D
                frames_cleanpass
                    TEST
                    TRAIN
                frames_finalpass
                    TEST
                    TRAIN
                optical_flow
                    TEST
                    TRAIN

    Args:
        root (str or ``pathlib.Path``): Root directory of the intel FlyingThings3D Dataset.
        split (string, optional): The dataset split, either "train" (default) or "test"
        pass_name (string, optional): The pass to use, either "clean" (default) or "final" or "both". See link above for
            details on the different passes.
        camera (string, optional): Which camera to return images from. Can be either "left" (default) or "right" or "both".
        transforms (callable, optional): A function/transform that takes in
            ``img1, img2, flow, valid_flow_mask`` and returns a transformed version.
            ``valid_flow_mask`` is expected for consistency with other datasets which
            return a built-in valid mask, such as :class:`~torchvision.datasets.KittiFlow`.
    rF   rG   leftNr   rH   rI   camerar   r   c              	      s  t  j||d t|ddd | }t|ddd dgdgddgd| }t d	d
d  dkr5ddgn g}t|d }d}t|||D ]\} ttt	|| | d }	t fdd|	D }	ttt	|d | d }
t fdd|
D }
|	r|
st
dt|	|
D ]b\}}ttt	|d }ttt	|d }tt|d D ]A}dkr|  j|| ||d  gg7  _|  j|| g7  _qdkr|  j||d  || gg7  _|  j||d  g7  _qqqGd S )NrJ   rH   rK   rM   rI   rO   frames_cleanpassframes_finalpassr   )r   rightrQ   rQ   r   r   r   )into_future	into_pastz*/*c                 3   s    | ]	}t |  V  qd S r,   r   ).0	image_dir)r   r#   r$   	<genexpr>e  s    z*FlyingThings3D.__init__.<locals>.<genexpr>optical_flowc                 3   s     | ]}t |   V  qd S r,   r   )r   flow_dirr   	directionr#   r$   r   h  s    zcCould not find the FlyingThings3D flow images. Please make sure the directory structure is correct.rS   z*.pfmr   r   r   )r   r   r   upperr   	itertoolsproductrW   r   rA   rp   rq   rX   r6   r   r   )r    r   rH   rI   r   r   rY   cameras
directions
image_dirs	flow_dirsr   r   r}   r~   r_   r!   r   r$   r   I  sJ     zFlyingThings3D.__init__r/   c                    r`   ra   rb   rc   r!   r#   r$   r5   {  rd   zFlyingThings3D.__getitem__r%   c                 C   re   r,   )r   r-   r#   r#   r$   r.     rh   zFlyingThings3D._read_flow)rF   rG   r   Nri   r#   r#   r!   r$   r   ,  s*    
2r   c                	       s   e Zd ZdZdZddeeef dedee	 ddf fd	d
Z
dedeejejf fddZdedeeef f fddZ  ZS )r   a  `HD1K <http://hci-benchmark.iwr.uni-heidelberg.de/>`__ dataset for optical flow.

    The dataset is expected to have the following structure: ::

        root
            hd1k
                hd1k_challenge
                    image_2
                hd1k_flow_gt
                    flow_occ
                hd1k_input
                    image_2

    Args:
        root (str or ``pathlib.Path``): Root directory of the HD1K Dataset.
        split (string, optional): The dataset split, either "train" (default) or "test"
        transforms (callable, optional): A function/transform that takes in
            ``img1, img2, flow, valid_flow_mask`` and returns a transformed version.
    TrF   Nr   rH   r   r   c                    sF  t  j||d t|ddd t|d }|dkrjtdD ]K}ttt|d d	 |d
d }ttt|d d |d
d }tt|d D ]}|  j	|| g7  _	|  j
|| ||d  gg7  _
qKqn0ttt|d d d }ttt|d d d }	t||	D ]\}
}|  j
|
|gg7  _
q| j
stdd S )NrJ   rH   rK   rM   hd1krF   $   hd1k_flow_gtro   06dz_*.png
hd1k_inputrn   r   hd1k_challengez*10.pngz*11.pngzTCould not find the HD1K images. Please make sure the directory structure is correct.)r   r   r   r   rX   rW   r   rA   r6   r   r   rq   rp   )r    r   rH   r   seq_idxr~   r}   r_   rr   rs   image1image2r!   r#   r$   r     s*   $$"zHD1K.__init__r%   c                 C   re   r,   rt   r-   r#   r#   r$   r.     rh   zHD1K._read_flowr/   c                    r`   )a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img1, img2, flow, valid_flow_mask)`` where ``valid_flow_mask``
            is a numpy boolean mask of shape (H, W)
            indicating which flow values are valid. The flow is a numpy array of
            shape (2, H, W) and the images are PIL images. ``flow`` and ``valid_flow_mask`` are None if
            ``split="test"``.
        rb   rc   r!   r#   r$   r5     rd   zHD1K.__getitem__rv   )r>   r?   r@   rj   r0   r   rA   r   r	   r   r   r
   rk   rl   r.   rB   rC   rD   r5   rE   r#   r#   r!   r$   r     s    ,&r   r%   r   c                 C   s   t | dF}tj|ddd }|dkrtdttj|ddd}ttj|ddd}tj|d	d
| | d}|||d
d
ddW  d   S 1 sNw   Y  dS )z#Read .flo file in Middlebury formatrbc   )counts   PIEHz)Magic number incorrect. Invalid .flo filez<i4r   z<f4r   r   N)r'   rk   fromfiletobytes
ValueErrorrB   reshape	transpose)r%   fmagicwhr;   r#   r#   r$   rg     s   $rg   c                 C   sf   t | tj}|d dd d d d f |dd d d d f }}|d d }| }| | fS )Nr   i   @   )r   tor9   float32boolnumpy)r%   flow_and_validr3   r4   r#   r#   r$   ru     s
   2ru   )'r   rU   abcr   r   r   pathlibr   typingr   r   r	   r
   r   r   rk   r9   PILr   io.imager   r:   r   r   visionr   rl   rC   rD   __all__r   r   r   r   r   r   rA   rg   ru   r#   r#   r#   r$   <module>   s.     	7W=DbC"