o
    oi,                     @   s   U d dl mZmZmZmZ d dlZd dlm  mZ	 d dl
mZmZ d dlmZ d dlmZ d dlmZ ddlmZmZmZmZ dd	lmZmZ d
ddddddddddddZeeeeef f ed< G dd deZdS )    )DictLiteralOptionalTupleN)ModuleTensor)KORNIA_CHECK_SHAPE)	Normalize)map_location_to_cpu   )DeDoDeDescriptorDeDoDeDetectorget_descriptorget_detector)$dedode_denormalize_pixel_coordinatessample_keypointszchttps://github.com/Parskatt/DeDoDe/releases/download/dedode_pretrained_models/dedode_detector_L.pthz`https://github.com/georg-bn/rotation-steerers/releases/download/release-2/dedode_detector_C4.pthzahttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/dedode_detector_SO2.pthzPhttps://github.com/Parskatt/DeDoDe/releases/download/v2/dedode_detector_L_v2.pth)z	L-uprightzL-C4zL-SO2L-C4-v2zehttps://github.com/Parskatt/DeDoDe/releases/download/dedode_pretrained_models/dedode_descriptor_B.pthzlhttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/B_C4_Perm_descriptor_setting_C.pthzohttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/B_SO2_Spread_descriptor_setting_C.pthzehttps://github.com/Parskatt/DeDoDe/releases/download/dedode_pretrained_models/dedode_descriptor_G.pthzlhttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/G_C4_Perm_descriptor_setting_C.pthzohttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/G_SO2_Spread_descriptor_setting_C.pth)z	B-uprightzB-C4zB-SO2	G-uprightzG-C4zG-SO2)detector
descriptorurlsc                       s2  e Zd ZdZddejfded ded dejdd	f fd
dZ			d!de	de
e dededee	e	e	f f
ddZe 							d"de	de
e dedede
e de
e dee	e	f fddZe 							d#de	de
e	 dede
e de
e de	fddZeddejfdededejdefdd Z  ZS )$DeDoDeaG  Module which detects and/or describes local features in an image using the DeDode method.

    See :cite:`edstedt2024dedode` for details.

    .. note:: DeDode takes ImageNet normalized images as input (not in range [0, 1]).

    Args:
        detector_model: The detector model kind. Available options are: `L`.
        descriptor_model: The descriptor model kind. Available options are: `G` or `B`
        amp_dtype: The automatic mixed precision desired.

    Example:
        >>> dedode = DeDoDe.from_pretrained(detector_weights="L-C4-v2", descriptor_weights="B-upright")
        >>> images = torch.randn(1, 3, 256, 256)
        >>> keypoints, scores = dedode.detect(images)
        >>> descriptions = dedode.describe(images, keypoints = keypoints)
        >>> keypoints, scores, features = dedode(images) # alternatively do both

    LGdetector_modeldescriptor_model)r   B	amp_dtypereturnNc                    sH   t    t||| _t||| _ttg dtg dd| _	d S )N)g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?)std)
super__init__r   r   r   r   r	   torchtensor
normalizer)selfr   r   r   	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/kornia/feature/dedode/dedode.pyr!   G   s   
&zDeDoDe.__init__'  Timagesnapply_imagenet_normalizationpad_if_not_divisiblec                 C   s   |r|  |}|j\}}}}|jdd \}	}
|rC|	d dkr%d|	d  nd}|
d dkr3d|
d  nd}tjjj|d|d|fdd}| j||d|	|
d\}}| j||d|	|
d	}t|||||fS )
aO  Detect and describe keypoints in the input images.

        Args:
            images: A tensor of shape :math:`(B, 3, H, W)` containing the ImageNet-Normalized input images.
            n: The number of keypoints to detect.
            apply_imagenet_normalization: Whether to apply ImageNet normalization to the input images.
            pad_if_not_divisible: pad image shape if not evenly divisible.

        Returns:
            keypoints: A tensor of shape :math:`(B, N, 2)` containing the detected keypoints in the image range,
            unlike `.detect()` function
            scores: A tensor of shape :math:`(B, N)` containing the scores of the detected keypoints.
            descriptions: A tensor of shape :math:`(B, N, DIM)` containing the descriptions of the detected keypoints.
            DIM is 256 for B and 512 for G.

           N   r           valueF)r,   r-   crop_hcrop_w)r-   r4   r5   )	r$   shaper"   nn
functionalpaddetectdescriber   )r%   r+   r,   r-   r.   _B_CHWhwpd_hpd_w	keypointsscoresdescriptionsr(   r(   r)   forwardR   s   
zDeDoDe.forwardr4   r5   c                 C   s*  t |g d | d |j\}}}	}
|rH|jdd \}}|d dkr*d|d  nd}|d dkr8d|d  nd}tjjj|d|d|fdd}|rO| |}| j	|}|d	d|	d|
f }|durx|durx|d	d|d|f }||}	}
|
||	|
 jd
d
||	|
}t||d\}}||fS )aP  Detect keypoints in the input images.

        Args:
            images: A tensor of shape :math:`(B, 3, H, W)` containing the input images.
            n: The number of keypoints to detect.
            apply_imagenet_normalization: Whether to apply ImageNet normalization to the input images.
            pad_if_not_divisible: pad image shape if not evenly divisible.
            crop_h: The height of the crop to be used for detection. If None, the full image is used.
            crop_w: The width of the crop to be used for detection. If None, the full image is used.

        Returns:
            keypoints: A tensor of shape :math:`(B, N, 2)` containing the detected keypoints,
            normalized to the range :math:`[-1, 1]`.
            scores: A tensor of shape :math:`(B, N)` containing the scores of the detected keypoints.

        r   3r>   r?   Fr/   Nr0   r   r1   r2   .)dim)num_samples)r   trainr6   r"   r7   r8   r9   r$   r   rG   reshapesoftmaxr   )r%   r+   r,   r-   r.   r4   r5   r   r=   r>   r?   r@   rA   rB   rC   logitsscoremaprD   
confidencer(   r(   r)   r:   u   s$   


"zDeDoDe.detectrD   c                 C   s   t |g d |j\}}}}	|durt |g d |r | |}| d | j|}
|durC|durC|
dd|d|f }
||}}	|durdtj|
 |dddf ddddddddf j	}|S |
S )	aV  Describe keypoints in the input images. If keypoints are not provided, returns the dense descriptors.

        Args:
            images: A tensor of shape :math:`(B, 3, H, W)` containing the input images.
            keypoints: An optional tensor of shape :math:`(B, N, 2)` containing the detected keypoints.
            apply_imagenet_normalization: Whether to apply ImageNet normalization to the input images.
            crop_h: The height of the crop to be used for description. If None, the full image is used.
            crop_w: The width of the crop to be used for description. If None, the full image is used.

        Returns:
            descriptions: A tensor of shape :math:`(B, N, DIM)` containing the descriptions of the detected keypoints.
            If the dense descriptors are requested, the shape is :math:`(B, DIM, H, W)`.

        rH   N)r   N2F.bilinear)modealign_cornersr   )
r   r6   r$   rM   r   rG   Fgrid_samplefloatmT)r%   r+   rD   r-   r4   r5   r<   r=   r>   r?   rF   described_keypointsr(   r(   r)   r;      s*   


zDeDoDe.describer   r   detector_weightsdescriptor_weightsc                 C   sn   | |d |d |d}|j tjjtd | tdd |jtjjtd | tdd |  |S )a+  Load a pretrained model.

        Args:
            detector_weights: The weights to load for the detector.
                One of 'L-upright' (original paper, https://arxiv.org/abs/2308.08479),
                'L-C4', 'L-SO2' (from steerers, better for rotations, https://arxiv.org/abs/2312.02152),
                'L-C4-v2' (from dedode v2, better at rotations, less clustering, https://arxiv.org/abs/2404.08928)
                Default is 'L-C4-v2', but perhaps it should be 'L-C4-v2'?
            descriptor_weights: The weights to load for the descriptor.
                One of 'B-upright','G-upright' (original paper, https://arxiv.org/abs/2308.08479),
                'B-C4', 'B-SO2', 'G-C4', 'G-SO2' (from steerers, better for rotations, https://arxiv.org/abs/2312.02152).
                Default is 'G-upright'.
            amp_dtype: the dtype to use for the model. One of torch.float16 or torch.float32.
            Default is torch.float16, suitable for CUDA. Use torch.float32 for CPU or MPS

        Returns:
            The pretrained model.

        r   )r   r   r   r   cpu)map_locationr   )	r   load_state_dictr"   hubload_state_dict_from_urlr   devicer   eval)clsr]   r^   r   modelr(   r(   r)   from_pretrained   s   zDeDoDe.from_pretrained)r*   TT)r*   TTNN)NTNN)__name__
__module____qualname____doc__r"   float16r   dtyper!   r   r   intboolr   rG   inference_moder:   r;   classmethodstrr   rh   __classcell__r(   r(   r&   r)   r   1   s    
#
-)r   )typingr   r   r   r   r"   torch.nn.functionalr7   r8   rX   kornia.corer   r   kornia.core.checkr   kornia.enhance.normalizer	   kornia.utils.helpersr
   dedode_modelsr   r   r   r   utilsr   r   r   rs   __annotations__r   r(   r(   r(   r)   <module>   s.   