o
    oi                     @  sh   d dl mZ d dlmZ d dlZd dlmZmZ ddlm	Z	 ddl
mZ ddlmZ G d	d
 d
eZdS )    )annotations)OptionalN)ModuleTensor   )Unet)heatmap_to_keypoints)DISKFeaturesc                      sR   e Zd ZdZd%d& fd
dZd'ddZ				d(d)ddZed*d+d#d$Z  Z	S ),DISKa!  Module which detects and described local features in an image using the DISK method.

    See :cite:`tyszkiewicz2020disk` for details.

    .. image:: _static/img/disk_outdoor_depth.jpg

    Args:
        desc_dim: The dimension of the descriptor.
        unet: The U-Net to use. If None, a default U-Net is used. Kornia doesn't provide the training code for DISK
              so this is only useful when using a custom checkpoint trained using the code released with the paper.
              The unet should take as input a tensor of shape :math:`(B, C, H, W)` and output a tensor of shape
              :math:`(B, \mathrm{desc\_dim} + 1, H, W)`.

    Example:
        >>> disk = DISK.from_pretrained('depth')
        >>> images = torch.rand(1, 3, 256, 256)
        >>> features = disk(images)

       Ndesc_dimintunetNone | ModulereturnNonec              	     sB   t    || _|d u rtddg dddd|d gd}|| _d S )N      )       @   r   r   r   r   )in_featuressizedownup)super__init__r   r   r   )selfr   r   	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/kornia/feature/disk/disk.pyr   4   s
   
 
zDISK.__init__imagesr   tuple[Tensor, Tensor]c                 C  sp   |  |}|jd | jd krtd|jd  d| j d|ddd| jf }|dd| jdf }||fS )a  Return the heatmap and the dense descriptors.

        .. image:: _static/img/DISK.png

        Args:
            images: The image to detect features in. Shape :math:`(B, 3, H, W)`.

        Returns:
            A tuple of dense detection scores and descriptors.
            Shapes are :math:`(B, 1, H, W)` and :math:`(B, D, H, W)`, where
            :math:`D` is the descriptor dimension.

        r   zU-Net output has z& channels, but expected self.desc_dim=z + 1.N)r   shaper   
ValueError)r   r"   unet_outputdescriptorsheatmapsr    r    r!   heatmap_and_dense_descriptors=   s   
z"DISK.heatmap_and_dense_descriptorsr           FnOptional[int]window_sizescore_thresholdfloatpad_if_not_divisibleboollist[DISKFeatures]c                 C  s   |j d }|r:|j dd \}}|d dkrd|d  nd}	|d dkr*d|d  nd}
tjjj|d|
d|	fdd}| |\}}|rY|dd|d|f }|dd|d|f }t||||d}g }t|D ]}||| 	||  qg|S )	a  Detect features in an image, returning keypoint locations, descriptors and detection scores.

        Args:
            images: The image to detect features in. Shape :math:`(B, 3, H, W)`.
            n: The maximum number of keypoints to detect. If None, all keypoints are returned.
            window_size: The size of the non-maxima suppression window used to filter detections.
            score_threshold: The minimum score a detection must have to be returned.
                             See :py:class:`DISKFeatures` for details.
            pad_if_not_divisible: if True, the non-16 divisible input is zero-padded to the closest 16-multiply

        Returns:
            A list of length :math:`B` containing the detected features.

        r      Nr   r*   )value.)r+   r-   r.   )
r$   torchnn
functionalpadr)   r   rangeappendmerge_with_descriptors)r   r"   r+   r-   r.   r0   Bhwpd_hpd_wr(   r'   	keypointsfeaturesir    r    r!   forwardW   s   
zDISK.forwarddepth
checkpointstrdeviceOptional[torch.device]c                 C  sl   ddd}||vrt d| |du rtd}tjj|| |d}|  |}||d  |  |S )	a~  Load a pretrained model.

        Depth model was trained using depth map supervision and is slightly more precise but biased to detect keypoints
        only where SfM depth is available. Epipolar model was trained using epipolar geometry supervision and
        is less precise but detects keypoints everywhere where they are matchable. The difference is especially
        pronounced on thin structures and on edges of objects.

        Args:
            checkpoint: The checkpoint to load. One of 'depth' or 'epipolar'.
            device: The device to load the model to.

        Returns:
            The pretrained model.

        zGhttps://raw.githubusercontent.com/cvlab-epfl/disk/master/depth-save.pthzJhttps://raw.githubusercontent.com/cvlab-epfl/disk/master/epipolar-save.pth)rE   epipolarzUnknown pretrained model: Ncpu)map_location	extractor)r%   r5   rH   hubload_state_dict_from_urltoload_state_dicteval)clsrF   rH   urlspretrained_dictmodelr    r    r!   from_pretrained   s   
zDISK.from_pretrained)r   N)r   r   r   r   r   r   )r"   r   r   r#   )Nr   r*   F)r"   r   r+   r,   r-   r   r.   r/   r0   r1   r   r2   )rE   N)rF   rG   rH   rI   r   r
   )
__name__
__module____qualname____doc__r   r)   rD   classmethodrW   __classcell__r    r    r   r!   r
      s    
	*r
   )
__future__r   typingr   r5   kornia.corer   r   _unetsr   detectorr   structsr	   r
   r    r    r    r!   <module>   s   