o
    پiT                     @   sb  d Z ddlmZ ddlmZmZmZmZ ddlZddl	m
Z
 ddlmZmZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ g dZG dd de
jZ G dd de
jZ!G dd de
jZ"G dd de
jZ#G dd de
jZ$dDddZ%dEddZ&dFddZ'ee'dd d!d"e'dd#d$e'dd%d!d"e'dd&d$e'dd'd$e'dd(d!d"e'dd)d$e'dd*d$e'dd+d$e'dd,d-d"d.
Z(edEd/e$fd0d1Z)edEd/e$fd2d3Z*edEd/e$fd4d5Z+edEd/e$fd6d7Z,edEd/e$fd8d9Z-edEd/e$fd:d;Z.edEd/e$fd<d=Z/edEd/e$fd>d?Z0edEd/e$fd@dAZ1edEd/e$fdBdCZ2dS )Ga3   Class-Attention in Image Transformers (CaiT)

Paper: 'Going deeper with Image Transformers' - https://arxiv.org/abs/2103.17239

Original code and weights from https://github.com/facebookresearch/deit, copyright below

Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman
    )partial)ListOptionalTupleUnionNIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)
PatchEmbedMlpDropPathtrunc_normal_use_fused_attn   )build_model_with_cfg)feature_take_indices)
checkpointcheckpoint_seq)register_modelgenerate_default_cfgs)Cait	ClassAttnLayerScaleBlockClassAttnLayerScaleBlockTalkingHeadAttnc                       s8   e Zd ZU ejje ed< d	 fdd	Zdd Z	  Z
S )
r   
fused_attn   F        c                    s   t    || _|| }|d | _t | _tj|||d| _tj|||d| _	tj|||d| _
t|| _t||| _t|| _d S )N      ࿩bias)super__init__	num_headsscaler   r   nnLinearqkvDropout	attn_dropproj	proj_dropselfdimr#   qkv_biasr+   r-   head_dim	__class__ D/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/cait.pyr"       s   

zClassAttn.__init__c           
      C   s.  |j \}}}| |d d df d|d| j|| j dddd}| |||| j|| j dddd}| |||| j|| j dddd}| jrdt	j
jj|||| jr_| jjndd}n|| j }||dd }	|	jdd	}	| |	}	|	| }|dd|d|}| |}| |}|S )
Nr   r         r   )	dropout_pr0   )shaper'   	unsqueezereshaper#   permuter(   r)   r   torchr%   
functionalscaled_dot_product_attentiontrainingr+   pr$   	transposesoftmaxr,   r-   )
r/   xBNCr'   r(   r)   x_clsattnr5   r5   r6   forward.   s$   <**



zClassAttn.forwardr   Fr   r   )__name__
__module____qualname__rA   jitFinalbool__annotations__r"   rN   __classcell__r5   r5   r3   r6   r      s   
 r   c                
       >   e Zd Zdddddejejeedf
 fdd	Zdd Z	  Z
S )	r         @Fr   -C6?c                       t    |	|| _|
|||||d| _|dkrt|nt | _|	|| _t	|| }|||||d| _
t|t| | _t|t| | _d S N)r#   r1   r+   r-   r   )in_featureshidden_features	act_layerdropr!   r"   norm1rM   r   r%   Identity	drop_pathnorm2intmlp	ParameterrA   onesgamma_1gamma_2r/   r0   r#   	mlp_ratior1   r-   r+   rd   r_   
norm_layer
attn_block	mlp_blockinit_valuesmlp_hidden_dimr3   r5   r6   r"   J   (   


z!LayerScaleBlockClassAttn.__init__c              	   C   sV   t j||fdd}|| | j| | |  }|| | j| | |  }|S )Nr   r<   )	rA   catrd   rj   rM   rb   rk   rg   re   )r/   rH   rL   ur5   r5   r6   rN   n   s     z LayerScaleBlockClassAttn.forward)rP   rQ   rR   r%   GELU	LayerNormr   r   r"   rN   rW   r5   r5   r3   r6   r   G       $r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )	r   r   Fr   c                    s~   t    || _|| }|d | _tj||d |d| _t|| _t||| _	t||| _
t||| _t|| _d S )Nr   r8   r   )r!   r"   r#   r$   r%   r&   qkvr*   r+   r,   proj_lproj_wr-   r.   r3   r5   r6   r"   x   s   

zTalkingHeadAttn.__init__c           
      C   s   |j \}}}| |||d| j|| j ddddd}|d | j |d |d }}}||dd }	| |	dddddddd}	|	jdd}	| 	|	dddddddd}	| 
|	}	|	| dd|||}| |}| |}|S )	Nr8   r7   r   r      r:   r;   r<   )r=   ry   r?   r#   r@   r$   rF   rz   rG   r{   r+   r,   r-   )
r/   rH   rI   rJ   rK   ry   r'   r(   r)   rM   r5   r5   r6   rN      s   ."""


zTalkingHeadAttn.forwardrO   )rP   rQ   rR   r"   rN   rW   r5   r5   r3   r6   r   u   s    r   c                
       rX   )	r   rY   Fr   rZ   c                    r[   r\   ra   rl   r3   r5   r6   r"      rs   zLayerScaleBlock.__init__c              	   C   sD   ||  | j| | |  }||  | j| | |  }|S N)rd   rj   rM   rb   rk   rg   re   r/   rH   r5   r5   r6   rN      s     zLayerScaleBlock.forward)rP   rQ   rR   r%   rv   rw   r   r   r"   rN   rW   r5   r5   r3   r6   r      rx   r   c                       sx  e Zd Zdddddddddd	d
d
d
d
d
eeeeejddej	e
edeeddf fdd	Zdd Zejjdd Zejjd7ddZejjd8ddZejjdejfddZd9dedee fd d!Z				"	d:d#ejd$eeeee f  d%ed&ed'ed(edeeej eejeej f f fd)d*Z 	+			d;d$eeee f d,ed-efd.d/Z!d0d1 Z"d8d2efd3d4Z#d5d6 Z$  Z%S )<r         r8     token      rY   Tr   ư>)epsrZ   r7   c                    s  t    |dv sJ || _|| _ | _ | _| _d| _||||d| _| jj	}t
| jdr5| j n|ttdd| _ttd|| _tj|d| _fddt|D tj 	
fd	dt|D  | _fd
dt|D | _t 	f
ddt|D | _| _t|| _|dkrt|nt | _t| jdd t| jdd |  | j! d S )N r   avgF)img_size
patch_sizein_chans	embed_dim
feat_ratior   )rE   c                    s   g | ]} qS r5   r5   .0i)drop_path_rater5   r6   
<listcomp>  s    z!Cait.__init__.<locals>.<listcomp>c                    s0   g | ]}
| 	 d qS ))r0   r#   rm   r1   r-   r+   rd   rn   r_   ro   rp   rq   r5   r   )r_   ro   attn_drop_rateblock_layersdprr   rq   rp   rm   rn   r#   proj_drop_rater1   r5   r6   r     s     
c                    s    g | ]}t  d | dqS )blocks.)num_chs	reductionmodule)dictr   )r   rr5   r6   r     s     c                    s&   g | ]}	 d 	qS ))	r0   r#   rm   r1   rn   r_   ro   rp   rq   r5   )r   _)
r_   attn_block_token_onlyblock_layers_tokenr   rq   mlp_block_token_onlymlp_ratio_token_onlyrn   r#   r1   r5   r6   r     s    

r   {Gz?std)"r!   r"   num_classesglobal_poolnum_featureshead_hidden_sizer   grad_checkpointingpatch_embednum_patcheshasattrr   r%   rh   rA   zeros	cls_token	pos_embedr*   pos_droprange
Sequentialblocksfeature_info
ModuleListblocks_token_onlynorm	head_dropr&   rc   headr   apply_init_weights)r/   r   r   r   r   r   r   depthr#   rm   r1   	drop_ratepos_drop_rater   r   r   r   r   patch_layerrn   r_   ro   rp   rq   r   r   depth_token_onlyr   r   r3   )r_   ro   r   r   r   r   r   r   r   rq   rp   r   rm   r   rn   r#   r   r1   r   r6   r"      s>   
&
 


zCait.__init__c                 C   s   t |tjr&t|jdd t |tjr"|jd ur$tj|jd d S d S d S t |tjr>tj|jd tj|jd d S d S )Nr   r   r         ?)	
isinstancer%   r&   r   weightr    init	constant_rw   )r/   mr5   r5   r6   r   (  s   zCait._init_weightsc                 C   s   ddhS )Nr   r   r5   r/   r5   r5   r6   no_weight_decay1  s   zCait.no_weight_decayc                 C   s
   || _ d S r}   )r   )r/   enabler5   r5   r6   set_grad_checkpointing5  s   
zCait.set_grad_checkpointingFc                    s    fdd}|S )Nc                    s   t  fdddD rdS  drt dd d S  dr9tjtj d }t dd | S  d	rCtjS td
S )Nc                    s   g | ]}  |qS r5   )
startswith)r   nnamer5   r6   r   <  s    z8Cait.group_matcher.<locals>._matcher.<locals>.<listcomp>)r   r   r   r   r   .r   zblocks_token_only.znorm.inf)anyr   rf   splitlenr   r   float)r   	to_offsetr   r   r6   _matcher;  s   



z$Cait.group_matcher.<locals>._matcherr5   )r/   coarser   r5   r   r6   group_matcher9  s   zCait.group_matcherreturnc                 C   s   | j S r}   )r   r   r5   r5   r6   get_classifierJ  s   zCait.get_classifierNr   r   c                 C   sJ   || _ |d ur|dv sJ || _|dkrt| j|| _d S t | _d S )Nr   r   )r   r   r%   r&   r   rc   r   )r/   r   r   r5   r5   r6   reset_classifierN  s
   *zCait.reset_classifierNCHWrH   indicesr   
stop_early
output_fmtintermediates_onlyc                    sh  |dv sJ d|dk}g }t t| j|\}	}
|j\ }}}| |}|| j }| |}tj	 s5|s9| j}n	| jd|
d  }t
|D ]&\}}| jrXtj	 sXt||}n||}||	v rl||ri| |n| qF|r| j||f\ fdd|D }|r|S | j|jd d	d	}t
| jD ]	\}}|||}qtj||fdd
}| |}||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        )r   NLCz)Output format must be one of NCHW or NLC.r   Nr   c                    s,   g | ]}|  d dddd qS )r;   r   r8   r   r7   )r?   r@   
contiguous)r   yrI   HWr5   r6   r     s   , z.Cait.forward_intermediates.<locals>.<listcomp>r   r;   r<   )r   r   r   r=   r   r   r   rA   rS   is_scripting	enumerater   r   appendr   dynamic_feat_sizer   expandr   rt   )r/   rH   r   r   r   r   r   r?   intermediatestake_indices	max_indexr   heightwidthr   r   blk
cls_tokensr5   r   r6   forward_intermediatesU  s:   



zCait.forward_intermediatesr   
prune_norm
prune_headc                 C   sT   t t| j|\}}| jd|d  | _|rt | _|r(t | _| dd |S )z@ Prune layers not required for specified intermediates.
        Nr   r   r   )	r   r   r   r%   rc   r   r   r   r   )r/   r   r   r   r   r   r5   r5   r6   prune_intermediate_layers  s   

zCait.prune_intermediate_layersc                 C   s   |  |}|| j }| |}| jrtj st| j|}n| |}| j	
|jd dd}t| jD ]	\}}|||}q3tj||fdd}| |}|S )Nr   r;   r   r<   )r   r   r   r   rA   rS   r   r   r   r   r   r=   r   r   rt   r   )r/   rH   r   r   r   r5   r5   r6   forward_features  s   




zCait.forward_features
pre_logitsc                 C   sX   | j r| j dkr|d d dd f jddn|d d df }| |}|r'|S | |S )Nr   r   r<   r   )r   meanr   r   )r/   rH   r   r5   r5   r6   forward_head  s   6
zCait.forward_headc                 C   s   |  |}| |}|S r}   )r   r   r~   r5   r5   r6   rN     s   

zCait.forward)TFr}   )NFFr   F)r   FT)&rP   rQ   rR   r   r   r
   r   r%   rw   rv   r   r   r   r"   r   rA   rS   ignorer   r   r   Moduler   rf   r   strr   Tensorr   r   rU   r   r   r   r   r   rN   rW   r5   r5   r3   r6   r      s    Y	

 
?
r   c                 C   s:   d| v r| d } i }|   D ]\}}|||dd< q|S )Nmodelzmodule.r   )itemsreplace)
state_dictr  checkpoint_no_moduler(   r)   r5   r5   r6   checkpoint_filter_fn  s   r  Fc                 K   s2   | dd}tt| |ftt|ddd|}|S )Nout_indicesr8   getter)r  feature_cls)pretrained_filter_fnfeature_cfg)popr   r   r  r   )variant
pretrainedkwargsr  r  r5   r5   r6   _create_cait  s   
r  r   c                 K   s    | ddd dddt tddd|S )	Nr   )r8     r  r   bicubicTzpatch_embed.projr   )urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizer   r   
first_conv
classifierr   )r  r  r5   r5   r6   _cfg  s   r  ztimm/z1https://dl.fbaipublicfiles.com/deit/XXS24_224.pth)r8   r   r   )	hf_hub_idr  r  z1https://dl.fbaipublicfiles.com/deit/XXS24_384.pth)r  r  z1https://dl.fbaipublicfiles.com/deit/XXS36_224.pthz1https://dl.fbaipublicfiles.com/deit/XXS36_384.pthz0https://dl.fbaipublicfiles.com/deit/XS24_384.pthz/https://dl.fbaipublicfiles.com/deit/S24_224.pthz/https://dl.fbaipublicfiles.com/deit/S24_384.pthz/https://dl.fbaipublicfiles.com/deit/S36_384.pthz/https://dl.fbaipublicfiles.com/deit/M36_384.pthz/https://dl.fbaipublicfiles.com/deit/M48_448.pth)r8     r  )
zcait_xxs24_224.fb_dist_in1kzcait_xxs24_384.fb_dist_in1kzcait_xxs36_224.fb_dist_in1kzcait_xxs36_384.fb_dist_in1kzcait_xs24_384.fb_dist_in1kzcait_s24_224.fb_dist_in1kzcait_s24_384.fb_dist_in1kzcait_s36_384.fb_dist_in1kzcait_m36_384.fb_dist_in1kzcait_m48_448.fb_dist_in1kr   c                 K   4   t dddddd}td	d| it |fi |}|S )
Nr         r|   h㈵>r   r   r   r#   rq   cait_xxs24_224r  )r#  r   r  r  r  
model_argsr  r5   r5   r6   r#       r#  c                 K   r  )
Nr   r  r   r|   r!  r"  cait_xxs24_384r  )r(  r$  r%  r5   r5   r6   r(    r'  r(  c                 K   r  )
Nr   r  $   r|   r!  r"  cait_xxs36_224r  )r*  r$  r%  r5   r5   r6   r*    r'  r*  c                 K   r  )
Nr   r  r)  r|   r!  r"  cait_xxs36_384r  )r+  r$  r%  r5   r5   r6   r+  $  r'  r+  c                 K   r  )
Nr   i   r      r!  r"  cait_xs24_384r  )r-  r$  r%  r5   r5   r6   r-  +  r'  r-  c                 K   r  )
Nr   r  r   r   r!  r"  cait_s24_224r  )r.  r$  r%  r5   r5   r6   r.  2  r'  r.  c                 K   r  )
Nr   r  r   r   r!  r"  cait_s24_384r  )r/  r$  r%  r5   r5   r6   r/  9  r'  r/  c                 K   r  )
Nr   r  r)  r   r   r"  cait_s36_384r  )r0  r$  r%  r5   r5   r6   r0  @  r'  r0  c                 K   4   t dddddd}tdd| it |fi |}|S )	Nr   r   r)  r   r"  cait_m36_384r  )r2  r$  r%  r5   r5   r6   r2  G  r'  r2  c                 K   r1  )	Nr   r   0   r   r"  cait_m48_448r  )r4  r$  r%  r5   r5   r6   r4  N  r'  r4  r}   r   )r   )3__doc__	functoolsr   typingr   r   r   r   rA   torch.nnr%   	timm.datar   r	   timm.layersr
   r   r   r   r   _builderr   	_featuresr   _manipulater   r   	_registryr   r   __all__r   r   r   r   r   r   r  r  r  default_cfgsr#  r(  r*  r+  r-  r.  r/  r0  r2  r4  r5   r5   r5   r6   <module>   s    
,.*- 
s
	
0