o
    €o™iñ/  ã                   @  sÖ   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
Z
ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddddddœZG dd„ deƒZeG dd„ dƒƒZG dd„ dee ƒZdS )z#Based on code from PaddleDetection.é    )ÚannotationsN)Ú	dataclass)ÚEnum)ÚOptional)Ú	ModelBase)Ú	PPHGNetV2)ÚHybridEncoder)ÚResNetD)Ú
RTDETRHead)ÚTensorzdhttps://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pthzdhttps://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pthzahttps://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pthz_https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pthz`https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth)Úrtdetr_r18vdÚrtdetr_r34vdÚrtdetr_r50vd_mÚrtdetr_r50vdÚrtdetr_r101vdc                   @  s,   e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
ÚRTDETRModelTypez(Enum class that maps RT-DETR model type.r   é   é   é   é   é   é   N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ú	resnet18dÚ	resnet34dÚ	resnet50dÚ
resnet101dÚ	hgnetv2_lÚ	hgnetv2_xÚresnet50d_m© r#   r#   úW/home/ubuntu/.local/lib/python3.10/site-packages/kornia/contrib/models/rt_detr/model.pyr   -   s    r   c                   @  sž   e Zd ZU dZded< ded< dZded< dZd	ed
< dZded< dZded< dZ	ded< dZ
ded< dZded< dZded< dZded< edddd„ƒZdS ) ÚRTDETRConfiga`  Configuration to construct RT-DETR model.

    Args:
        model_type: model variant. Available models are

            - ResNetD-18: ``0``, ``'resnet18d'`` or :attr:`RTDETRModelType.resnet18d`
            - ResNetD-34: ``1``, ``'resnet34d'`` or :attr:`RTDETRModelType.resnet34d`
            - ResNetD-50: ``2``, ``'resnet50d'`` or :attr:`RTDETRModelType.resnet50d`
            - ResNetD-101: ``3``, ``'resnet101d'`` or :attr:`RTDETRModelType.resnet101d`
            - HGNetV2-L: ``4``, ``'hgnetv2_l'`` or :attr:`RTDETRModelType.hgnetv2_l`
            - HGNetV2-X: ``5``, ``'hgnetv2_x'`` or :attr:`RTDETRModelType.hgnetv2_x`

        num_classes: number of classes.
        checkpoint: URL or local path of model weights.
        neck_hidden_dim: hidden dim for neck.
        neck_dim_feedforward: feed-forward network dim for neck.
        neck_expansion: expansion ratio for neck.
        head_hidden_dim: hidden dim for head.
        head_num_queries: number of queries for Deformable DETR transformer decoder.
        head_num_decoder_layers: number of decoder layers for Deformable DETR transformer decoder.

    zRTDETRModelType | str | intÚ
model_typeÚintÚnum_classesé€  Ú
input_sizeNzOptional[str]Ú
checkpointzOptional[int]Úneck_hidden_dimÚneck_dim_feedforwardzOptional[float]Úneck_expansioné   Úhead_hidden_dimi,  Úhead_num_queriesÚhead_num_decoder_layersg333333Ó?ÚfloatÚconfidence_thresholdéP   Ú
model_nameÚstrÚreturnc                 C  s   | dkrt tj|dd}|S | dkrt tj|dd}|S | dkr*t tj|dd}|S | dkr8t tj|dd}|S | dkrFt tj|dd}|S t‚)zàLoad model without pretrained weights.

        Args:
            model_name: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'.
            num_classes: Number of classes to detect.

        r   r)   )r*   r   r   r   r   )r%   r   r   r   r"   r   r   Ú
ValueError)r6   r(   Úconfigr#   r#   r$   Ú	from_name_   s    	õ
÷ùûþzRTDETRConfig.from_name©r5   )r6   r7   r(   r'   r8   r%   )r   r   r   r   Ú__annotations__r*   r+   r,   r-   r.   r0   r1   r2   r4   Ústaticmethodr;   r#   r#   r#   r$   r%   9   s   
 r%   c                      sX   e Zd ZdZd‡ fdd	„Zeddd„ƒZeddd„ƒZed d!dd„ƒZd"dd„Z	‡  Z
S )#ÚRTDETRzQRT-DETR Object Detection model, as described in https://arxiv.org/abs/2304.08069.ÚbackboneúResNetD | PPHGNetV2Úencoderr   Údecoderr
   c                   s    t ƒ  ¡  || _|| _|| _dS )a  Construct RT-DETR Object Detection model.

        Args:
            backbone: backbone network for feature extraction.
            encoder: neck network for feature fusion.
            decoder: head network to decode features into detection results.

        N)ÚsuperÚ__init__r@   rB   rC   )Úselfr@   rB   rC   ©Ú	__class__r#   r$   rE   {   s   
	
zRTDETR.__init__r:   r%   r8   c              
   C  s(  | j }t|tƒrt|ƒ}n
t|tƒrtt|ƒ}|tjkr6t d¡}| j	p%d}| j
p*d}| jp/d}| jp4d}n¹|tjkrUt d¡}| j	pDd}| j
pId}| jpNd}| jpSd}nš|tjkrtt d¡}| j	pcd}| j
phd}| jpmd	}| jprd
}n{|tjkr“t d¡}| j	p‚d}| j
p‡d}| jpŒd	}| jp‘d}n\|tjkr²t d¡}| j	p¡d}| j
p¦d}| jp«d	}| jp°d
}n=|tjkrÑt d¡}| j	pÀd}| j
pÅd}| jpÊd	}| jpÏd
}n|tjkrït d¡}| j	pßd}| j
päd}| jpéd	}| jpîd
}t|t|j|||ƒt| j| j| j|gd |dƒ}| jr| | j¡ |S )aè  Construct RT-DETR Object Detection model from a config object.

        Args:
            config: configuration object for RT-DETR.

        .. note::
            For ``config.neck_hidden_dim``, ``config.neck_dim_feedforward``, ``config.neck_expansion``, and
            ``config.head_num_decoder_layers``, if they are ``None``, their values will be replaced with the
            default values depending on the ``config.model_type``. See the source code for the default values.

        é   r/   i   r   g      à?é"   r   é2   r   g      ð?ée   i€  i   ÚLÚX)r(   Ú
hidden_dimÚnum_queriesÚin_channelsÚnum_decoder_layers)r&   Ú
isinstancer'   r   r7   Úgetattrr   r	   Úfrom_configr,   r-   r2   r.   r   r   r"   r   r    r   r!   r?   r   Úout_channelsr
   r(   r0   r1   r+   Úload_checkpoint)r:   r&   r@   r,   r-   r2   r.   Úmodelr#   r#   r$   rU   ‰   sz   







































ûýzRTDETR.from_configr6   r7   c                   s€   | t vrtd| › dtt  ¡ ƒ› dƒ‚tjjt |  tj ¡ r dndd}dd
d„‰ d‡ fdd„}t	j
| dd}| ||ƒ¡ |S )z§Load model from pretrained weights.

        Args:
            model_name: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'.

        zNo pretrained model for 'z'. Please select from Ú.zcuda:0Úcpu)Úmap_locationÚold_namer7   r8   c                 S  sj   | }t  dd|¡}t  dd|¡}t  dd|¡}t  dd|¡}t  d	d
|¡}t  dd|¡}t  dd|¡}|S )Nzencoder.pan_blockszencoder.ccfm.pan_blockszencoder.downsample_convszencoder.ccfm.downsample_convszencoder.fpn_blockszencoder.ccfm.fpn_blockszencoder.lateral_convszencoder.ccfm.lateral_convsz
.branch2b.z.convs.branch2b.z
.branch2a.z.convs.branch2a.z
.branch2c.z.convs.branch2c.)ÚreÚsub)r\   Únew_namer#   r#   r$   Úmap_nameî   s   z(RTDETR.from_pretrained.<locals>.map_nameÚ
state_dictúdict[str, Tensor]c                   s6   | d d } i }|   ¡ D ]}ˆ |ƒ}| | ||< q|S )NÚemaÚmodule)Úkeys)ra   Únew_state_dictr\   r_   ©r`   r#   r$   Ú_state_dict_procþ   s   z0RTDETR.from_pretrained.<locals>._state_dict_procr5   )r(   N)r\   r7   r8   r7   )ra   rb   r8   rb   )ÚURLsr9   Úlistre   ÚtorchÚhubÚload_state_dict_from_urlÚcudaÚis_availabler?   r;   Úload_state_dict)r6   ra   rh   rX   r#   rg   r$   Úfrom_pretrainedß   s   ÿ
zRTDETR.from_pretrainedr5   r(   r'   c                 C  s   t  t | |¡¡}|S )zàLoad model without pretrained weights.

        Args:
            model_name: 'rtdetr_r18vd', 'rtdetr_r34vd', 'rtdetr_r50vd_m', 'rtdetr_r50vd', 'rtdetr_r101vd'.
            num_classes: number of classes to detect.

        )r?   rU   r%   r;   )r6   r(   rX   r#   r#   r$   r;     s   	zRTDETR.from_nameÚimagesr   útuple[Tensor, Tensor]c                 C  s*   |   |¡}|  |¡}|  |¡\}}||fS )aŽ  Detect objects in an image.

        Args:
            images: images to be detected. Shape :math:`(N, C, H, W)`.

        Returns:
            - **logits** - Tensor of shape :math:`(N, Q, K)`, where :math:`Q` is the number of queries,
              :math:`K` is the number of classes.
            - **boxes** - Tensor of shape :math:`(N, Q, 4)`, where :math:`Q` is the number of queries.

        )r@   rB   rC   )rF   rr   ÚfeatsÚ	feats_bufÚlogitsÚboxesr#   r#   r$   Úforward  s   

zRTDETR.forward)r@   rA   rB   r   rC   r
   )r:   r%   r8   r?   )r6   r7   r8   r?   r<   )r6   r7   r(   r'   r8   r?   )rr   r   r8   rs   )r   r   r   r   rE   r>   rU   rq   r;   rx   Ú__classcell__r#   r#   rG   r$   r?   x   s    U.r?   )r   Ú
__future__r   r]   Údataclassesr   Úenumr   Útypingr   rk   Úkornia.contrib.models.baser   Ú2kornia.contrib.models.rt_detr.architecture.hgnetv2r   Ú9kornia.contrib.models.rt_detr.architecture.hybrid_encoderr   Ú3kornia.contrib.models.rt_detr.architecture.resnet_dr	   Ú6kornia.contrib.models.rt_detr.architecture.rtdetr_headr
   Úkornia.corer   ri   r   r%   r?   r#   r#   r#   r$   Ú<module>   s.   û	>