o
    پiH                     @   s~  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZmZ d	d
lmZ d	dlmZmZmZ dgZG dd deZdpddZdqddZei deddddeddddeddddeddddd d!edd"d#d$d%edd&d#d$d'edd(d#d$d)edd*ddd#d+d,edd-dd.edd/ddd d0edd1dd2edd3dd4edd5ddd d6edd7dd8edd9ddd d:edd;dd<edd=dd>edd?ddd edd@dd>eddAdd>eddBddd eddCdd>eddDddd eddEdd>dFZedrdGefdHdIZedrdGefdJdKZedrdGefdLdMZedrdGefdNdOZedrdGefdPdQZedrdGefdRdSZ edrdGefdTdUZ!edrdGefdVdWZ"edrdGefdXdYZ#edrdGefdZd[Z$edrdGefd\d]Z%edrdGefd^d_Z&edrdGefd`daZ'edrdGefdbdcZ(edrdGefdddeZ)edrdGefdfdgZ*ee+d<dhdidjdkdldmdndo dS )sa[   DeiT - Data-efficient Image Transformers

DeiT model defs and weights from https://github.com/facebookresearch/deit, original copyright below

paper: `DeiT: Data-efficient Image Transformers` - https://arxiv.org/abs/2012.12877

paper: `DeiT III: Revenge of the ViT` - https://arxiv.org/abs/2204.07118

Modifications copyright 2021, Ross Wightman
    )partial)OptionalN)nnIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)resample_abs_pos_embed)VisionTransformertrunc_normal_checkpoint_filter_fn   )build_model_with_cfg)generate_default_cfgsregister_modelregister_model_deprecationsVisionTransformerDistilledc                       s   e Zd ZdZ fddZd fdd	Zejjddd	Z	ejjd
e
jfddZddedee fddZejjdddZdd Zdded
ejfddZ  ZS )r   z Vision Transformer w/ Distillation Token and Head

    Distillation token & head support for `DeiT: Data-efficient Image Transformers`
        - https://arxiv.org/abs/2012.12877
    c                    s   | dd}t j|i |ddi | jdv sJ d| _ttdd| j	| _
ttd| jj| j | j	| _| jdkrGt| j	| jnt | _d| _| | d S )	Nweight_init skip)token   r   r   F)popsuper__init__global_poolnum_prefix_tokensr   	Parametertorchzeros	embed_dim
dist_tokenpatch_embednum_patches	pos_embednum_classesLinearIdentity	head_distdistilled_traininginit_weights)selfargskwargsr   	__class__ D/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/deit.pyr   #   s   $z#VisionTransformerDistilled.__init__r   c                    s    t | jdd t j|d d S )Ng{Gz?)std)mode)r
   r    r   r)   )r*   r2   r-   r/   r0   r)   1   s   z'VisionTransformerDistilled.init_weightsFc                 C   s   t dddgdS )Nz+^cls_token|pos_embed|patch_embed|dist_token)z^blocks\.(\d+)N)z^norm)i )stemblocks)dict)r*   coarser/   r/   r0   group_matcher5   s   z(VisionTransformerDistilled.group_matcherreturnc                 C   s   | j | jfS Nheadr'   )r*   r/   r/   r0   get_classifier>   s   z)VisionTransformerDistilled.get_classifierNr$   r   c                 C   sR   || _ |dkrt| j|nt | _|dkr"t| j| j | _d S t | _d S )Nr   )r$   r   r%   r   r&   r;   r'   )r*   r$   r   r/   r/   r0   reset_classifierB   s    ,z+VisionTransformerDistilled.reset_classifierTc                 C   s
   || _ d S r9   )r(   )r*   enabler/   r/   r0   set_distilled_trainingG   s   
z1VisionTransformerDistilled.set_distilled_trainingc                 C   s   | j r'|j\}}}}| jj}t| j||f|| jrdn| jd}||d|}n| j}| jrN|| }t	j
| j|jd dd| j|jd dd|fdd}n t	j
| j|jd dd| j|jd dd|fdd}|| }| |S )Nr   )new_sizeold_sizer   r   )dim)dynamic_img_sizeshaper!   	grid_sizer   r#   no_embed_classr   viewr   cat	cls_tokenexpandr    pos_drop)r*   xBHWCprev_grid_sizer#   r/   r/   r0   
_pos_embedK   s:   
z%VisionTransformerDistilled._pos_embed
pre_logitsc                 C   sp   |d d df |d d df }}|r|| d S |  |}| |}| jr2| jr2tj s2||fS || d S )Nr   r   r   )r;   r'   r(   trainingr   jitis_scripting)r*   rM   rT   x_distr/   r/   r0   forward_headl   s   "

z'VisionTransformerDistilled.forward_headr   Fr9   )T)__name__
__module____qualname____doc__r   r)   r   rV   ignorer7   r   Moduler<   intr   strr=   r?   rS   boolTensorrY   __classcell__r/   r/   r-   r0   r      s    !Fc                 K   sF   | dd}|r
tnt}t|| |fttddt|ddd|}|S )Nout_indices   T)adapt_layer_scalegetter)rg   feature_cls)pretrained_filter_fnfeature_cfg)r   r   r	   r   r   r   r5   )variant
pretrained	distilledr,   rg   	model_clsmodelr/   r/   r0   _create_deitz   s   

rs   r   c                 K   s    | ddd dddt tddd|S )	Ni  )rh      rt   g?bicubicTzpatch_embed.projr;   )urlr$   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizemeanr1   
first_conv
classifierr   )rv   r,   r/   r/   r0   _cfg   s   r   zdeit_tiny_patch16_224.fb_in1kztimm/zFhttps://dl.fbaipublicfiles.com/deit/deit_tiny_patch16_224-a1311bcf.pth)	hf_hub_idrv   zdeit_small_patch16_224.fb_in1kzGhttps://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pthzdeit_base_patch16_224.fb_in1kzFhttps://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pthzdeit_base_patch16_384.fb_in1kzFhttps://dl.fbaipublicfiles.com/deit/deit_base_patch16_384-8de9b5d1.pth)rh     r   g      ?)r   rv   rw   ry   z'deit_tiny_distilled_patch16_224.fb_in1kzPhttps://dl.fbaipublicfiles.com/deit/deit_tiny_distilled_patch16_224-b40b3cf7.pthr:   )r   rv   r~   z(deit_small_distilled_patch16_224.fb_in1kzQhttps://dl.fbaipublicfiles.com/deit/deit_small_distilled_patch16_224-649709d9.pthz'deit_base_distilled_patch16_224.fb_in1kzPhttps://dl.fbaipublicfiles.com/deit/deit_base_distilled_patch16_224-df68dfff.pthz'deit_base_distilled_patch16_384.fb_in1kzPhttps://dl.fbaipublicfiles.com/deit/deit_base_distilled_patch16_384-d0272ac0.pth)r   rv   rw   ry   r~   zdeit3_small_patch16_224.fb_in1kz;https://dl.fbaipublicfiles.com/deit/deit_3_small_224_1k.pthzdeit3_small_patch16_384.fb_in1kz;https://dl.fbaipublicfiles.com/deit/deit_3_small_384_1k.pthz deit3_medium_patch16_224.fb_in1kz<https://dl.fbaipublicfiles.com/deit/deit_3_medium_224_1k.pthzdeit3_base_patch16_224.fb_in1kz:https://dl.fbaipublicfiles.com/deit/deit_3_base_224_1k.pthzdeit3_base_patch16_384.fb_in1kz:https://dl.fbaipublicfiles.com/deit/deit_3_base_384_1k.pthzdeit3_large_patch16_224.fb_in1kz;https://dl.fbaipublicfiles.com/deit/deit_3_large_224_1k.pthzdeit3_large_patch16_384.fb_in1kz;https://dl.fbaipublicfiles.com/deit/deit_3_large_384_1k.pthzdeit3_huge_patch14_224.fb_in1kz:https://dl.fbaipublicfiles.com/deit/deit_3_huge_224_1k.pthz(deit3_small_patch16_224.fb_in22k_ft_in1kz<https://dl.fbaipublicfiles.com/deit/deit_3_small_224_21k.pth)r   rv   ry   z<https://dl.fbaipublicfiles.com/deit/deit_3_small_384_21k.pthz=https://dl.fbaipublicfiles.com/deit/deit_3_medium_224_21k.pthz;https://dl.fbaipublicfiles.com/deit/deit_3_base_224_21k.pthz;https://dl.fbaipublicfiles.com/deit/deit_3_base_384_21k.pthz<https://dl.fbaipublicfiles.com/deit/deit_3_large_224_21k.pthz<https://dl.fbaipublicfiles.com/deit/deit_3_large_384_21k.pthz>https://dl.fbaipublicfiles.com/deit/deit_3_huge_224_21k_v1.pth)(deit3_small_patch16_384.fb_in22k_ft_in1k)deit3_medium_patch16_224.fb_in22k_ft_in1k'deit3_base_patch16_224.fb_in22k_ft_in1k'deit3_base_patch16_384.fb_in22k_ft_in1k(deit3_large_patch16_224.fb_in22k_ft_in1k(deit3_large_patch16_384.fb_in22k_ft_in1k'deit3_huge_patch14_224.fb_in22k_ft_in1kr8   c                 K   2   t ddddd}td	d| it |fi |}|S )
z DeiT-tiny model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
             rh   
patch_sizer   depth	num_headsdeit_tiny_patch16_224ro   N)r   r5   rs   ro   r,   
model_argsrr   r/   r/   r0   r         r   c                 K   r   )
z DeiT-small model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r      r   deit_small_patch16_224ro   N)r   r   r   r/   r/   r0   r      r   r   c                 K   2   t ddddd}tdd| it |fi |}|S )	z DeiT base model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r      r   r   deit_base_patch16_224ro   N)r   r   r   r/   r/   r0   r     r   r   c                 K   r   )	z DeiT base model @ 384x384 from paper (https://arxiv.org/abs/2012.12877).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   r   deit_base_patch16_384ro   N)r   r   r   r/   r/   r0   r     r   r   c                 K   6   t ddddd}t	d
| ddt |fi |}|S )z DeiT-tiny distilled model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   rh   r   deit_tiny_distilled_patch16_224Tro   rp   N)r   r   r   r/   r/   r0   r        r   c                 K   r   )z DeiT-small distilled model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   r   r    deit_small_distilled_patch16_224Tr   N)r   r   r   r/   r/   r0   r   '  r   r   c                 K   6   t ddddd}t	d	| ddt |fi |}|S )
z DeiT-base distilled model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   r   deit_base_distilled_patch16_224Tr   N)r   r   r   r/   r/   r0   r   2  r   r   c                 K   r   )
z DeiT-base distilled model @ 384x384 from paper (https://arxiv.org/abs/2012.12877).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   r   deit_base_distilled_patch16_384Tr   N)r   r   r   r/   r/   r0   r   =  r   r   c                 K   6   t ddddddd}tdd	| it |fi |}|S )z DeiT-3 small model @ 224x224 from paper (https://arxiv.org/abs/2204.07118).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   r   Tư>r   r   r   r   rG   init_valuesdeit3_small_patch16_224ro   N)r   r   r   r/   r/   r0   r   H     r   c                 K   r   )z DeiT-3 small model @ 384x384 from paper (https://arxiv.org/abs/2204.07118).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   r   Tr   r   deit3_small_patch16_384ro   N)r   r   r   r/   r/   r0   r   R  r   r   c                 K   r   )z DeiT-3 medium model @ 224x224 (https://arxiv.org/abs/2012.12877).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   i   r      Tr   r   deit3_medium_patch16_224ro   N)r   r   r   r/   r/   r0   r   \  r   r   c                 K   6   t ddddddd}td
d| it |fi |}|S )z DeiT-3 base model @ 224x224 from paper (https://arxiv.org/abs/2204.07118).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   Tr   r   deit3_base_patch16_224ro   N)r   r   r   r/   r/   r0   r   f  r   r   c                 K   r   ) DeiT-3 base model @ 384x384 from paper (https://arxiv.org/abs/2204.07118).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   Tr   r   deit3_base_patch16_384ro   N)r   r   r   r/   r/   r0   r   p  r   r   c                 K   6   t ddddddd}td
d| it |fi |}|S )z DeiT-3 large model @ 224x224 from paper (https://arxiv.org/abs/2204.07118).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r         Tr   r   deit3_large_patch16_224ro   N)r   r   r   r/   r/   r0   r   z  r   r   c                 K   r   )z DeiT-3 large model @ 384x384 from paper (https://arxiv.org/abs/2204.07118).
    ImageNet-1k weights from https://github.com/facebookresearch/deit.
    r   r   r   Tr   r   deit3_large_patch16_384ro   N)r   r   r   r/   r/   r0   r     r   r   c                 K   r   )r      i       r   Tr   r   deit3_huge_patch14_224ro   N)r   r   r   r/   r/   r0   r     r   r   r   r   r   r   r   r   r   ) deit3_small_patch16_224_in21ft1k deit3_small_patch16_384_in21ft1k!deit3_medium_patch16_224_in21ft1kdeit3_base_patch16_224_in21ft1kdeit3_base_patch16_384_in21ft1k deit3_large_patch16_224_in21ft1k deit3_large_patch16_384_in21ft1kdeit3_huge_patch14_224_in21ft1k)FFrZ   r[   ),r_   	functoolsr   typingr   r   r   	timm.datar   r   timm.layersr   timm.models.vision_transformerr	   r
   r   _builderr   	_registryr   r   r   __all__r   rs   r   default_cfgsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r\   r/   r/   r/   r0   <module>   sx   
^
"%),/36:>B
a				



								