o
    پiU                     @   s
  d Z ddlmZmZmZmZmZ ddlZddlm	Z	 ddl
mZmZ ddlmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ dgZddddZddddZG dd dej	jZ G dd de	j!Z"G dd de	jZ#G dd de	jZ$G dd de	jZ%G dd de	jZ&G dd  d e	jZ'G d!d" d"e	jZ(G d#d$ d$e	jZ)G d%d& d&e	jZ*G d'd( d(e	jZ+G d)d de	jZ,d*d+ Z-d<d-d.Z.ee.d/d0e.d/d0e.d/d0d1Z/d=d3d4Z0ed=d5e,fd6d7Z1ed=d5e,fd8d9Z2ed=d5e,fd:d;Z3dS )>a   EfficientFormer

@article{li2022efficientformer,
  title={EfficientFormer: Vision Transformers at MobileNet Speed},
  author={Li, Yanyu and Yuan, Geng and Wen, Yang and Hu, Eric and Evangelidis, Georgios and Tulyakov,
   Sergey and Wang, Yanzhi and Ren, Jian},
  journal={arXiv preprint arXiv:2206.01191},
  year={2022}
}

Based on Apache 2.0 licensed code at https://github.com/snap-research/EfficientFormer, Copyright (c) 2022 Snap Inc.

Modifications and timm support by / Copyright 2022, Ross Wightman
    )DictListOptionalTupleUnionNIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathtrunc_normal_	to_2tupleMlpndgrid   )build_model_with_cfg)feature_take_indices)checkpoint_seq)generate_default_cfgsregister_modelEfficientFormer)0   `      i  )@      i@  i   )r        i   )l1l3l7)            )r#   r#      r"   )r"   r"         c                       sp   e Zd ZU eeejf ed< 					d fdd	Ze	 d fd
d	Z
dejdejfddZdd Z  ZS )	Attentionattention_bias_cacher       r&   r#      c                    s  t    || _|d | _|| _|| | _t|| | _| j| | _|| _	t
|| jd | j | _t
| j|| _t|}ttt|d t|d d}|dd d d f |dd d d f   }|d |d  |d  }tj
t||d |d  | _| d| i | _d S )Ng      r!   r   r   .attention_bias_idxs)super__init__	num_headsscalekey_dimkey_attn_dimintval_dimval_attn_dim
attn_rationnLinearqkvprojr   torchstackr   arangeflattenabs	Parameterzerosattention_biasesregister_bufferr(   )selfdimr0   r.   r5   
resolutionposrel_pos	__class__ O/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/efficientformer.pyr-   .   s"   


*("
zAttention.__init__Tc                    s(   t  | |r| jri | _d S d S d S N)r,   trainr(   )rC   moderH   rJ   rK   rM   J   s   

zAttention.traindevicereturnc                 C   sZ   t j s| jr| jd d | jf S t|}|| jvr(| jd d | jf | j|< | j| S rL   )r:   jit
is_tracingtrainingrA   r+   strr(   )rC   rO   
device_keyrJ   rJ   rK   get_attention_biasesP   s   

zAttention.get_attention_biasesc           
      C   s   |j \}}}| |}|||| jddddd}|j| j| j| jgdd\}}}||dd | j	 }	|	| 
|j }	|	jdd}	|	| dd||| j}| |}|S )Nr   r!   r   r    rD   )shaper8   reshaper.   permutesplitr0   r3   	transposer/   rV   rO   softmaxr4   r9   )
rC   xBNCr8   qkvattnrJ   rJ   rK   forwardY   s   
 
zAttention.forward)r   r)   r&   r#   r*   T)__name__
__module____qualname__r   rT   r:   Tensor__annotations__r-   no_gradrM   rO   rV   rh   __classcell__rJ   rJ   rH   rK   r'   +   s   
 	r'   c                       s&   e Zd Zejejf fdd	Z  ZS )Stem4c              
      s   t    d| _| dtj||d dddd | d||d  | d|  | d	tj|d |dddd | d
|| | d|  d S )Nr#   conv1r!   r    r   kernel_sizestridepaddingnorm1act1conv2norm2act2)r,   r-   ru   
add_moduler6   Conv2d)rC   in_chsout_chs	act_layer
norm_layerrH   rJ   rK   r-   i   s   
  zStem4.__init__)rj   rk   rl   r6   ReLUBatchNorm2dr-   rp   rJ   rJ   rH   rK   rq   h   s    rq   c                       s4   e Zd ZdZdddejf fdd	Zdd Z  ZS )	
Downsamplez
    Downsampling via strided conv w/ norm
    Input: tensor in shape [B, C, H, W]
    Output: tensor in shape [B, C, H/stride, W/stride]
    r    r!   Nc                    s>   t    |d u r|d }tj|||||d| _||| _d S )Nr!   rs   )r,   r-   r6   r}   convnorm)rC   r~   r   rt   ru   rv   r   rH   rJ   rK   r-   |   s
   
zDownsample.__init__c                 C      |  |}| |}|S rL   )r   r   rC   r`   rJ   rJ   rK   rh         

zDownsample.forward)	rj   rk   rl   __doc__r6   r   r-   rh   rp   rJ   rJ   rH   rK   r   u   s    r   c                       s$   e Zd Z fddZdd Z  ZS )Flatc                    s   t    d S rL   )r,   r-   rC   rH   rJ   rK   r-         zFlat.__init__c                 C   s   | ddd}|S )Nr!   r   )r=   r^   r   rJ   rJ   rK   rh      s   zFlat.forwardrj   rk   rl   r-   rh   rp   rJ   rJ   rH   rK   r      s    r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )PoolingzP
    Implementation of pooling for PoolFormer
    --pool_size: pooling size
    r    c                    s&   t    tj|d|d dd| _d S )Nr   r!   F)ru   rv   count_include_pad)r,   r-   r6   	AvgPool2dpool)rC   	pool_sizerH   rJ   rK   r-      s   
zPooling.__init__c                 C   s   |  || S rL   )r   r   rJ   rJ   rK   rh      r   zPooling.forward)r    )rj   rk   rl   r   r-   rh   rp   rJ   rJ   rH   rK   r      s    r   c                       s8   e Zd ZdZddejejdf fdd	Zdd Z  Z	S )ConvMlpWithNormz`
    Implementation of MLP with 1*1 convolutions.
    Input: tensor with shape [B, C, H, W]
    N        c                    s   t    |p|}|p|}t||d| _|d ur||nt | _| | _t||d| _|d ur6||nt | _	t
|| _d S )Nr   )r,   r-   r6   r}   fc1Identityrw   actfc2rz   Dropoutdrop)rC   in_featureshidden_featuresout_featuresr   r   r   rH   rJ   rK   r-      s   
	zConvMlpWithNorm.__init__c                 C   sJ   |  |}| |}| |}| |}| |}| |}| |}|S rL   )r   rw   r   r   r   rz   r   rJ   rJ   rK   rh      s   






zConvMlpWithNorm.forward)
rj   rk   rl   r   r6   GELUr   r-   rh   rp   rJ   rJ   rH   rK   r      s    r   c                       &   e Zd Zd fdd	Zdd Z  ZS )
LayerScaleh㈵>Fc                    *   t    || _t|t| | _d S rL   r,   r-   inplacer6   r?   r:   onesgammarC   rD   init_valuesr   rH   rJ   rK   r-         
zLayerScale.__init__c                 C   s   | j r	|| jS || j S rL   )r   mul_r   r   rJ   rJ   rK   rh      s   zLayerScale.forwardr   Fr   rJ   rJ   rH   rK   r          r   c                       s6   e Zd Zdejejdddf fdd	Zdd Z  ZS )MetaBlock1d      @r   r   c                    sx   t    ||| _t|| _||| _t|t|| ||d| _|dkr)t	|nt
 | _t||| _t||| _d S )N)r   r   r   r   r   )r,   r-   rw   r'   token_mixerrz   r   r2   mlpr
   r6   r   	drop_pathr   ls1ls2)rC   rD   	mlp_ratior   r   	proj_dropr   layer_scale_init_valuerH   rJ   rK   r-      s   





zMetaBlock1d.__init__c              
   C   sD   ||  | | | | }||  | | | | }|S rL   )r   r   r   rw   r   r   rz   r   rJ   rJ   rK   rh      s     zMetaBlock1d.forward)	rj   rk   rl   r6   r   	LayerNormr-   rh   rp   rJ   rJ   rH   rK   r      s    r   c                       r   )LayerScale2dr   Fc                    r   rL   r   r   rH   rJ   rK   r-      r   zLayerScale2d.__init__c                 C   s*   | j dddd}| jr||S || S )Nr   rW   )r   viewr   r   )rC   r`   r   rJ   rJ   rK   rh      s   zLayerScale2d.forwardr   r   rJ   rJ   rH   rK   r      r   r   c                       s8   e Zd Zddejejdddf fdd	Zdd Z  ZS )	MetaBlock2dr    r   r   r   c	           	         s   t    t|d| _t||| _|dkrt|nt | _	t
|t|| |||d| _t||| _|dkr=t|| _d S t | _d S )N)r   r   )r   r   r   r   )r,   r-   r   r   r   r   r
   r6   r   
drop_path1r   r2   r   r   
drop_path2)	rC   rD   r   r   r   r   r   r   r   rH   rJ   rK   r-      s   

$zMetaBlock2d.__init__c                 C   s8   ||  | | | }|| | | | }|S rL   )r   r   r   r   r   r   r   rJ   rJ   rK   rh     s   zMetaBlock2d.forward)	rj   rk   rl   r6   r   r   r-   rh   rp   rJ   rJ   rH   rK   r      s    r   c                
       s@   e Zd Zddddejejejdddf
 fdd	Zd	d
 Z  Z	S )EfficientFormerStageTr   r    r   r   r   c                    s   t    d| _|rt|||	d| _|}n||ksJ t | _g }|r.||kr.|t  t	|D ]<}|| d }|rQ||krQ|t
||||
||| |d q2|t|||||	||| |d |rn||krn|t  q2tj| | _d S )NF)r~   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r   )r,   r-   grad_checkpointingr   
downsampler6   r   appendr   ranger   r   
Sequentialblocks)rC   rD   dim_outdepthr   num_vitr   r   r   r   norm_layer_clr   r   r   r   	block_idx
remain_idxrH   rJ   rK   r-     sP   

zEfficientFormerStage.__init__c                 C   s8   |  |}| jrtj st| j|}|S | |}|S rL   )r   r   r:   rQ   is_scriptingr   r   r   rJ   rJ   rK   rh   [  s   

zEfficientFormerStage.forward)
rj   rk   rl   r6   r   r   r   r-   rh   rp   rJ   rJ   rH   rK   r     s    <r   c                       sl  e Zd Zdddddddddejejejdddf fd	d
	Zdd Ze	j
jdd Ze	j
jd3ddZe	j
jd4ddZe	j
jdejfddZd5dedee fddZe	j
jd4ddZ					d6de	jd eeeee f  d!ed"ed#ed$edeee	j ee	jee	j f f fd%d&Z	'		d7d eeee f d(ed)efd*d+Zd,d- Zd3d.efd/d0Zd1d2 Z  Z S )8r   Nr      avgr   r#   r   r   c                    s  t    || _|| _t||d |d| _|d }t|| _| jd }dd t	d|t
||D }|p>dd| jd   }g }g | _t| jD ]@}t||| || || ||kr\|nd|	||||||| |
d}|| }|| |  jt|| d	|d	  d
| dg7  _qItj| | _|d  | _| _|| j| _t|| _|dkrt| j|nt | _|dkrt|d |nt | _d| _| | j d S )Nr   )r   r   c                 S   s   g | ]}|  qS rJ   )tolist).0r`   rJ   rJ   rK   
<listcomp>  s    z,EfficientFormer.__init__.<locals>.<listcomp>Fri   )
r   r   r   r   r   r   r   r   r   r   r!   stages.)num_chs	reductionmodulerW   F) r,   r-   num_classesglobal_poolrq   stemlen
num_stagesr:   linspacesumr]   feature_infor   r   r   dictr6   r   stagesnum_featureshead_hidden_sizer   r   	head_dropr7   r   head	head_distdistilled_trainingapply_init_weights)rC   depths
embed_dimsin_chansr   r   downsamplesr   
mlp_ratiosr   r   r   r   r   	drop_rateproj_drop_ratedrop_path_ratekwargsprev_dim
last_stagedprr   istagerH   rJ   rK   r-   f  sL   


"
. "zEfficientFormer.__init__c                 C   sP   t |tjr"t|jdd t |tjr$|jd ur&tj|jd d S d S d S d S )Ng{Gz?)stdr   )
isinstancer6   r7   r   weightbiasinit	constant_)rC   mrJ   rJ   rK   r     s   zEfficientFormer._init_weightsc                 C   s   dd |   D S )Nc                 S   s   h | ]
\}}d |v r|qS )rA   rJ   )r   re   _rJ   rJ   rK   	<setcomp>  s    z2EfficientFormer.no_weight_decay.<locals>.<setcomp>)named_parametersr   rJ   rJ   rK   no_weight_decay  s   zEfficientFormer.no_weight_decayFc                 C   s   t dddgd}|S )Nz^stem)z^stages\.(\d+)N)z^norm)i )r   r   )r   )rC   coarsematcherrJ   rJ   rK   group_matcher  s
   zEfficientFormer.group_matcherTc                 C   s   | j D ]}||_qd S rL   )r   r   )rC   enablesrJ   rJ   rK   set_grad_checkpointing  s   
z&EfficientFormer.set_grad_checkpointingrP   c                 C   s   | j | jfS rL   r   r   r   rJ   rJ   rK   get_classifier  s   zEfficientFormer.get_classifierr   r   c                 C   s^   || _ |d ur
|| _|dkrt| j|nt | _|dkr(t| j|| _d S t | _d S )Nr   )r   r   r6   r7   r   r   r   r   )rC   r   r   rJ   rJ   rK   reset_classifier  s
    *z EfficientFormer.reset_classifierc                 C   s
   || _ d S rL   )r   )rC   r  rJ   rJ   rK   set_distilled_training  s   
z&EfficientFormer.set_distilled_trainingNCHWr`   indicesr   
stop_early
output_fmtintermediates_onlyc              	   C   s   |dv sJ dg }t t| j|\}}	| |}|j\}
}}}| jd }tj s,|s0| j}n	| jd|	d  }d}t	|D ]?\}}||}||k rR|j\}
}}}||v r~||kry|ra| 
|n|}|||
|d |d ddddd q?|| q?|r|S ||kr| 
|}||fS )	a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r  zOutput shape must be NCHW.r   Nr   r!   rW   r    )r   r   r   r   rZ   r   r:   rQ   r   	enumerater   r   r[   r\   )rC   r`   r  r   r  r  r  intermediatestake_indices	max_indexra   rc   HWlast_idxr   feat_idxr   x_interrJ   rJ   rK   forward_intermediates  s2   

,

z%EfficientFormer.forward_intermediatesr   
prune_norm
prune_headc                 C   sJ   t t| j|\}}| jd|d  | _|rt | _|r#| dd |S )z@ Prune layers not required for specified intermediates.
        Nr   r    )r   r   r   r6   r   r   r	  )rC   r  r  r  r  r  rJ   rJ   rK   prune_intermediate_layers  s   
z)EfficientFormer.prune_intermediate_layersc                 C   s"   |  |}| |}| |}|S rL   )r   r   r   r   rJ   rJ   rK   forward_features  s   


z EfficientFormer.forward_features
pre_logitsc                 C   sh   | j dkr|jdd}| |}|r|S | || |}}| jr.| jr.tj	 s.||fS || d S )Nr   r   rX   r!   )
r   meanr   r   r   r   rS   r:   rQ   r   )rC   r`   r  x_distrJ   rJ   rK   forward_head  s   

zEfficientFormer.forward_headc                 C   r   rL   )r  r"  r   rJ   rJ   rK   rh   )  r   zEfficientFormer.forwardr   ri   rL   )NFFr  F)r   FT)!rj   rk   rl   r6   r   r   r   r-   r   r:   rQ   ignorer   r  r  Moduler  r2   r   rT   r	  r
  rm   r   r   boolr   r  r  r  r"  rh   rp   rJ   rJ   rH   rK   r   d  s|    C
 
8
c                 C   s   d| v r| S i }ddl }d}|  D ]]\}}|dr3|dd}|dd}|d	d
}|dd}|d|r=|d7 }|dd| d|}|dd| d|}|dd| d|}|dd|}|dd}|||< q|S )z$ Remap original checkpoints -> timm zstem.0.weightr   Npatch_embedzpatch_embed.0
stem.conv1zpatch_embed.1z
stem.norm1zpatch_embed.3z
stem.conv2zpatch_embed.4z
stem.norm2znetwork\.(\d+)\.proj\.weightr   znetwork.(\d+).(\d+)r   z
.blocks.\2znetwork.(\d+).projz.downsample.convznetwork.(\d+).normz.downsample.normzlayer_scale_([0-9])z
ls\1.gamma	dist_headr   )reitems
startswithreplacematchsub)
state_dictmodelout_dictr)  	stage_idxre   rf   rJ   rJ   rK   checkpoint_filter_fn/  s(   

r3  r  c                 K   s    | ddd dddt tddd|S )	Nr   )r    r   r   Tgffffff?bicubicr'  r  )urlr   
input_sizer   fixed_input_sizecrop_pctinterpolationr   r   
first_conv
classifierr   )r5  r   rJ   rJ   rK   _cfgJ  s   r<  ztimm/)	hf_hub_id)z!efficientformer_l1.snap_dist_in1kz!efficientformer_l3.snap_dist_in1kz!efficientformer_l7.snap_dist_in1kFc                 K   s2   | dd}tt| |ftt|ddd|}|S )Nout_indicesr#   getter)r>  feature_cls)pretrained_filter_fnfeature_cfg)popr   r   r3  r   )variant
pretrainedr   r>  r0  rJ   rJ   rK   _create_efficientformerb  s   
rF  rP   c                 K   4   t td td dd}tdd| it |fi |S )Nr   r   r   r   r   efficientformer_l1rE  )rI  r   EfficientFormer_depthEfficientFormer_widthrF  rE  r   
model_argsrJ   rJ   rK   rI  m     rI  c                 K   rG  )Nr   r#   rH  efficientformer_l3rE  )rP  rJ  rM  rJ   rJ   rK   rP  w  rO  rP  c                 K   rG  )Nr   r&   rH  efficientformer_l7rE  )rQ  rJ  rM  rJ   rJ   rK   rQ    rO  rQ  )r  r   )4r   typingr   r   r   r   r   r:   torch.nnr6   	timm.datar   r	   timm.layersr
   r   r   r   r   _builderr   	_featuresr   _manipulater   	_registryr   r   __all__rL  rK  r$  r'   r   rq   r   r   r   r   r   r   r   r   r   r   r3  r<  default_cfgsrF  rI  rP  rQ  rJ   rJ   rJ   rK   <module>   sh    =
$
!"G L

		