o
    €o™iô,  ã                   @   sÞ   U d dl mZmZmZmZ d dlZd dlm  mZ	 d dl
mZmZ d dlmZ d dlmZ d dlmZ ddlmZmZmZmZ dd	lmZmZ d
ddddœdddddddœdœZeeeeef f ed< G dd„ deƒZdS )é    )ÚDictÚLiteralÚOptionalÚTupleN)ÚModuleÚTensor)ÚKORNIA_CHECK_SHAPE)Ú	Normalize)Úmap_location_to_cpué   )ÚDeDoDeDescriptorÚDeDoDeDetectorÚget_descriptorÚget_detector)Ú$dedode_denormalize_pixel_coordinatesÚsample_keypointszchttps://github.com/Parskatt/DeDoDe/releases/download/dedode_pretrained_models/dedode_detector_L.pthz`https://github.com/georg-bn/rotation-steerers/releases/download/release-2/dedode_detector_C4.pthzahttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/dedode_detector_SO2.pthzPhttps://github.com/Parskatt/DeDoDe/releases/download/v2/dedode_detector_L_v2.pth)z	L-uprightzL-C4zL-SO2úL-C4-v2zehttps://github.com/Parskatt/DeDoDe/releases/download/dedode_pretrained_models/dedode_descriptor_B.pthzlhttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/B_C4_Perm_descriptor_setting_C.pthzohttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/B_SO2_Spread_descriptor_setting_C.pthzehttps://github.com/Parskatt/DeDoDe/releases/download/dedode_pretrained_models/dedode_descriptor_G.pthzlhttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/G_C4_Perm_descriptor_setting_C.pthzohttps://github.com/georg-bn/rotation-steerers/releases/download/release-2/G_SO2_Spread_descriptor_setting_C.pth)z	B-uprightzB-C4zB-SO2ú	G-uprightzG-C4zG-SO2)ÚdetectorÚ
descriptorÚurlsc                       s2  e Zd ZdZddejfded ded dejdd	f‡ fd
d„Z			d!de	de
e dededee	e	e	f f
dd„Ze ¡ 							d"de	de
e dedede
e de
e dee	e	f fdd„ƒZe ¡ 							d#de	de
e	 dede
e de
e de	fdd„ƒZeddejfdededejdefdd „ƒZ‡  ZS )$ÚDeDoDeaG  Module which detects and/or describes local features in an image using the DeDode method.

    See :cite:`edstedt2024dedode` for details.

    .. note:: DeDode takes ImageNet normalized images as input (not in range [0, 1]).

    Args:
        detector_model: The detector model kind. Available options are: `L`.
        descriptor_model: The descriptor model kind. Available options are: `G` or `B`
        amp_dtype: The automatic mixed precision desired.

    Example:
        >>> dedode = DeDoDe.from_pretrained(detector_weights="L-C4-v2", descriptor_weights="B-upright")
        >>> images = torch.randn(1, 3, 256, 256)
        >>> keypoints, scores = dedode.detect(images)
        >>> descriptions = dedode.describe(images, keypoints = keypoints)
        >>> keypoints, scores, features = dedode(images) # alternatively do both

    ÚLÚGÚdetector_modelÚdescriptor_model)r   ÚBÚ	amp_dtypeÚreturnNc                    sH   t ƒ  ¡  t||ƒ| _t||ƒ| _tt g d¢¡t g d¢¡d| _	d S )N)g
×£p=
ß?gÉv¾Ÿ/Ý?g–C‹lçûÙ?)gZd;ßOÍ?gyé&1¬Ì?gÍÌÌÌÌÌÌ?)Ústd)
ÚsuperÚ__init__r   r   r   r   r	   ÚtorchÚtensorÚ
normalizer)Úselfr   r   r   ©Ú	__class__© úP/home/ubuntu/.local/lib/python3.10/site-packages/kornia/feature/dedode/dedode.pyr!   G   s   
&zDeDoDe.__init__é'  TÚimagesÚnÚapply_imagenet_normalizationÚpad_if_not_divisiblec                 C   sÄ   |r|   |¡}|j\}}}}|jdd… \}	}
|rC|	d dkr%d|	d  nd}|
d dkr3d|
d  nd}tjjj|d|d|fdd}| j||d|	|
d\}}| j||d|	|
d	}t|||ƒ||fS )
aO  Detect and describe keypoints in the input images.

        Args:
            images: A tensor of shape :math:`(B, 3, H, W)` containing the ImageNet-Normalized input images.
            n: The number of keypoints to detect.
            apply_imagenet_normalization: Whether to apply ImageNet normalization to the input images.
            pad_if_not_divisible: pad image shape if not evenly divisible.

        Returns:
            keypoints: A tensor of shape :math:`(B, N, 2)` containing the detected keypoints in the image range,
            unlike `.detect()` function
            scores: A tensor of shape :math:`(B, N)` containing the scores of the detected keypoints.
            descriptions: A tensor of shape :math:`(B, N, DIM)` containing the descriptions of the detected keypoints.
            DIM is 256 for B and 512 for G.

        é   Né   r   ç        ©ÚvalueF)r,   r-   Úcrop_hÚcrop_w)r-   r4   r5   )	r$   Úshaper"   ÚnnÚ
functionalÚpadÚdetectÚdescriber   )r%   r+   r,   r-   r.   Ú_BÚ_CÚHÚWÚhÚwÚpd_hÚpd_wÚ	keypointsÚscoresÚdescriptionsr(   r(   r)   ÚforwardR   s   
zDeDoDe.forwardr4   r5   c                 C   s*  t |g d¢ƒ |  d¡ |j\}}}	}
|rH|jdd… \}}|d dkr*d|d  nd}|d dkr8d|d  nd}tjjj|d|d|fdd}|rO|  |¡}| j 	|¡}|d	d|	…d|
…f }|durx|durx|d	d|…d|…f }||}	}
| 
||	|
 ¡jd
d 
||	|
¡}t||d\}}||fS )aP  Detect keypoints in the input images.

        Args:
            images: A tensor of shape :math:`(B, 3, H, W)` containing the input images.
            n: The number of keypoints to detect.
            apply_imagenet_normalization: Whether to apply ImageNet normalization to the input images.
            pad_if_not_divisible: pad image shape if not evenly divisible.
            crop_h: The height of the crop to be used for detection. If None, the full image is used.
            crop_w: The width of the crop to be used for detection. If None, the full image is used.

        Returns:
            keypoints: A tensor of shape :math:`(B, N, 2)` containing the detected keypoints,
            normalized to the range :math:`[-1, 1]`.
            scores: A tensor of shape :math:`(B, N)` containing the scores of the detected keypoints.

        ©r   Ú3r>   r?   Fr/   Nr0   r   r1   r2   .éÿÿÿÿ)Údim)Únum_samples)r   Útrainr6   r"   r7   r8   r9   r$   r   rG   ÚreshapeÚsoftmaxr   )r%   r+   r,   r-   r.   r4   r5   r   r=   r>   r?   r@   rA   rB   rC   ÚlogitsÚscoremaprD   Ú
confidencer(   r(   r)   r:   u   s$   


"zDeDoDe.detectrD   c                 C   sÌ   t |g d¢ƒ |j\}}}}	|durt |g d¢ƒ |r |  |¡}|  d¡ | j |¡}
|durC|durC|
dd|…d|…f }
||}}	|durdtj|
 ¡ |dd…df ddddd…dd…df j	}|S |
S )	aV  Describe keypoints in the input images. If keypoints are not provided, returns the dense descriptors.

        Args:
            images: A tensor of shape :math:`(B, 3, H, W)` containing the input images.
            keypoints: An optional tensor of shape :math:`(B, N, 2)` containing the detected keypoints.
            apply_imagenet_normalization: Whether to apply ImageNet normalization to the input images.
            crop_h: The height of the crop to be used for description. If None, the full image is used.
            crop_w: The width of the crop to be used for description. If None, the full image is used.

        Returns:
            descriptions: A tensor of shape :math:`(B, N, DIM)` containing the descriptions of the detected keypoints.
            If the dense descriptors are requested, the shape is :math:`(B, DIM, H, W)`.

        rH   N)r   ÚNÚ2F.Úbilinear)ÚmodeÚalign_cornersr   )
r   r6   r$   rM   r   rG   ÚFÚgrid_sampleÚfloatÚmT)r%   r+   rD   r-   r4   r5   r<   r=   r>   r?   rF   Údescribed_keypointsr(   r(   r)   r;   £   s*   


ÿþþzDeDoDe.describer   r   Údetector_weightsÚdescriptor_weightsc                 C   sn   | |d |d |d}|j  tjjtd | t d¡d¡ |j tjjtd | t d¡d¡ | ¡  |S )a+  Load a pretrained model.

        Args:
            detector_weights: The weights to load for the detector.
                One of 'L-upright' (original paper, https://arxiv.org/abs/2308.08479),
                'L-C4', 'L-SO2' (from steerers, better for rotations, https://arxiv.org/abs/2312.02152),
                'L-C4-v2' (from dedode v2, better at rotations, less clustering, https://arxiv.org/abs/2404.08928)
                Default is 'L-C4-v2', but perhaps it should be 'L-C4-v2'?
            descriptor_weights: The weights to load for the descriptor.
                One of 'B-upright','G-upright' (original paper, https://arxiv.org/abs/2308.08479),
                'B-C4', 'B-SO2', 'G-C4', 'G-SO2' (from steerers, better for rotations, https://arxiv.org/abs/2312.02152).
                Default is 'G-upright'.
            amp_dtype: the dtype to use for the model. One of torch.float16 or torch.float32.
            Default is torch.float16, suitable for CUDA. Use torch.float32 for CPU or MPS

        Returns:
            The pretrained model.

        r   )r   r   r   r   Úcpu)Úmap_locationr   )	r   Úload_state_dictr"   ÚhubÚload_state_dict_from_urlr   Údevicer   Úeval)Úclsr]   r^   r   Úmodelr(   r(   r)   Úfrom_pretrainedÍ   s   ýÿÿzDeDoDe.from_pretrained)r*   TT)r*   TTNN)NTNN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r"   Úfloat16r   Údtyper!   r   r   ÚintÚboolr   rG   Úinference_moder:   r;   ÚclassmethodÚstrr   rh   Ú__classcell__r(   r(   r&   r)   r   1   s     üþýüûûþýüû
ú#ùþýüûúù
ø-úþýüûúù)üþýüûr   )Útypingr   r   r   r   r"   Útorch.nn.functionalr7   r8   rX   Úkornia.corer   r   Úkornia.core.checkr   Úkornia.enhance.normalizer	   Úkornia.utils.helpersr
   Údedode_modelsr   r   r   r   Úutilsr   r   r   rs   Ú__annotations__r   r(   r(   r(   r)   Ú<module>   s.   üúù