o
    پi<I                     @   sD  d Z ddlmZmZmZmZ ddlZddlmZ ddl	m
Z
mZ ddlmZmZmZmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZ dgZG dd dejZG dd dejZG dd dejZ G dd dejZ!G dd dejZ"G dd dejZ#G dd dejZ$G dd dejZ%G dd dejZ&G dd dejZ'd8d d!Z(ee(d"d#e(d"d#e(d"d#e(d"d#e(d"d#e(d"d#e(d"d#e(d"d#e(d"d#e(d"d#e(d"d#e(d"d#e(d"d#d$Z)d9d&d'Z*ed9d(d)Z+ed9d*d+Z,ed9d,d-Z-ed9d.d/Z.ed9d0d1Z/ed9d2d3Z0ed9d4d5Z1ed9d6d7Z2dS ):a   RepViT

Paper: `RepViT: Revisiting Mobile CNN From ViT Perspective`
    - https://arxiv.org/abs/2307.09283

@misc{wang2023repvit,
      title={RepViT: Revisiting Mobile CNN From ViT Perspective},
      author={Ao Wang and Hui Chen and Zijia Lin and Hengjun Pu and Guiguang Ding},
      year={2023},
      eprint={2307.09283},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

Adapted from official impl at https://github.com/jameslahm/RepViT
    )ListOptionalTupleUnionNIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)SqueezeExcitetrunc_normal_	to_ntuple	to_2tuple   )build_model_with_cfg)feature_take_indices)
checkpointcheckpoint_seq)register_modelgenerate_default_cfgsRepVitc                       .   e Zd Zd fdd	Ze dd Z  ZS )ConvNormr   r   c	           	         sf   t    | dtj|||||||dd | dt| tj| jj	| tj| jj
d d S )NcFbiasbnr   )super__init__
add_modulennConv2dBatchNorm2dinit	constant_r   weightr   )	selfin_dimout_dimksstridepaddilationgroupsbn_weight_init	__class__ F/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/repvit.pyr   !   s
   
"zConvNorm.__init__c              
   C   s   | j  \}}|j|j|j d  }|j|d d d d d f  }|j|j|j |j|j d   }tj|	d| j
j |	d|jdd  | j
j| j
j| j
j| j
j|jjd}|jj| |jj| |S )N      ?r   r      )r(   paddingr*   r+   device)_modulesvaluesr#   running_varepsr   running_meanr   r   sizer   r+   shaper(   r3   r*   r4   datacopy_)r$   r   r   wbmr/   r/   r0   fuse(   s"   "
zConvNorm.fuse)r   r   r   r   r   r   __name__
__module____qualname__r   torchno_gradrA   __classcell__r/   r/   r-   r0   r       s    r   c                       r   )
NormLinearT{Gz?c                    sb   t    | dt| | dtj|||d t| jj|d |r/tj	
| jjd d S d S )Nr   lr   )stdr   )r   r   r   r   BatchNorm1dLinearr
   rK   r#   r!   r"   r   )r$   r%   r&   r   rL   r-   r/   r0   r   >   s   
zNormLinear.__init__c                 C   s   | j  \}}|j|j|j d  }|j| jj| jj |j|j d   }|j|d d d f  }|jd u r=|| jjj	 }n|j|d d d f  
d| jj }tj|d|d|jjd}|jj| |jj| |S )Nr1   r   r   r4   )r5   r6   r#   r7   r8   r   r   r9   rK   Tviewr   rN   r:   r4   r<   r=   )r$   r   rK   r>   r?   r@   r/   r/   r0   rA   F   s   &
$ zNormLinear.fuse)TrJ   rB   r/   r/   r-   r0   rI   =   s    rI   c                       s6   e Zd Zd fdd	Zdd Ze dd Z  ZS )	RepVggDwFc                    s   t    t|||d|d d |d| _|r't||ddd|d| _t | _ntj||ddd|d| _t	|| _|| _
|| _d S )Nr   r2   r+   r   )r   r   r   convconv1r   Identityr   r   r    dimlegacy)r$   edkernel_sizerY   r-   r/   r0   r   W   s   

zRepVggDw.__init__c                 C   s   |  | || | | S N)r   rU   rV   r$   xr/   r/   r0   forwardd   s   zRepVggDw.forwardc              	   C   s0  | j  }| jr| j }n| j}|j}|j}|j}|j}tj|g d}tjt	j
|jd |jd dd|jdg d}|| | }|| }	|jj| |jj|	 | js| j}
|
j|
j|
j d  }|j|d d d d d f  }|
j|j|
j |
j |
j|
j d   }|jj| |jj| |S )N)r   r   r   r   r   r   rP   r1   )rU   rA   rY   rV   r#   r   r   
functionalr)   rF   onesr;   r4   r<   r=   r   r7   r8   r9   )r$   rU   rV   conv_wconv_bconv1_wconv1_bidentityfinal_conv_wfinal_conv_br   r>   r?   r/   r/   r0   rA   g   s0   
&(zRepVggDw.fuseF	rC   rD   rE   r   r_   rF   rG   rA   rH   r/   r/   r-   r0   rS   V   s
    rS   c                       $   e Zd Z fddZdd Z  ZS )	RepVitMlpc                    s>   t    t||ddd| _| | _t||ddddd| _d S )Nr   r   )r,   )r   r   r   rV   actconv2)r$   r%   
hidden_dim	act_layerr-   r/   r0   r      s   
zRepVitMlp.__init__c                 C      |  | | |S r\   )rn   rm   rV   r]   r/   r/   r0   r_         zRepVitMlp.forwardrC   rD   rE   r   r_   rH   r/   r/   r-   r0   rl      s    rl   c                       &   e Zd Zd fdd	Zdd Z  ZS )RepViTBlockFc                    sJ   t t|   t|||| _|rt|dnt | _t	||| || _
d S )Ng      ?)r   ru   r   rS   token_mixerr	   r   rW   serl   channel_mixer)r$   r%   	mlp_ratior[   use_serp   rY   r-   r/   r0   r      s   zRepViTBlock.__init__c                 C   s*   |  |}| |}|}| |}|| S r\   )rv   rw   rx   r$   r^   rf   r/   r/   r0   r_      s
   


zRepViTBlock.forwardri   rs   r/   r/   r-   r0   ru          ru   c                       rk   )
RepVitStemc                    sH   t    t||d ddd| _| | _t|d |ddd| _d| _d S )Nr2      r      )r   r   r   rV   act1rn   r(   )r$   in_chsout_chsrp   r-   r/   r0   r      s
   

zRepVitStem.__init__c                 C   rq   r\   )rn   r   rV   r]   r/   r/   r0   r_      rr   zRepVitStem.forwardrs   r/   r/   r-   r0   r}      s    r}   c                       rt   )RepVitDownsampleFc                    sd   t    t|||d||d| _t|||d|d d |d| _t||dd| _t||| || _d S )NF)rz   rp   rY   r2   r   rT   )	r   r   ru   	pre_blockr   spatial_downsamplechannel_downsamplerl   ffn)r$   r%   ry   r&   r[   rp   rY   r-   r/   r0   r      s
   
zRepVitDownsample.__init__c                 C   s4   |  |}| |}| |}|}| |}|| S r\   )r   r   r   r   r{   r/   r/   r0   r_      s   



zRepVitDownsample.forwardri   rs   r/   r/   r-   r0   r      r|   r   c                       s6   e Zd Zd	 fdd	Zdd Ze dd Z  ZS )
RepVitClassifierF        c                    sl   t    t|| _|dkrt||nt | _|| _d| _	|| _
|r4|dkr-t||nt | _d S d S )Nr   F)r   r   r   Dropout	head_droprI   rW   headdistillationdistilled_trainingnum_classes	head_dist)r$   rX   r   r   dropr-   r/   r0   r      s   
 zRepVitClassifier.__init__c                 C   s^   |  |}| jr(| || |}}| jr"| jr"tj s"||fS || d S | |}|S )Nr2   )	r   r   r   r   trainingr   rF   jitis_scripting)r$   r^   x1x2r/   r/   r0   r_      s   

zRepVitClassifier.forwardc                 C   sp   | j dks	t S | j }| jr6| j }| j|j7  _| j|j7  _| jd  _| jd  _|S |S )Nr   r2   )	r   r   rW   r   rA   r   r   r#   r   )r$   r   r   r/   r/   r0   rA      s   


zRepVitClassifier.fuse)Fr   rj   r/   r/   r-   r0   r      s
    
r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )	RepVitStager~   TFc	              
      s~   t    |rt||||||| _n||ksJ t | _g }	d}
t|D ]}|	t||||
|| |
 }
q%tj	|	 | _
d S )NT)r   r   r   
downsampler   rW   rangeappendru   
Sequentialblocks)r$   r%   r&   depthry   rp   r[   r   rY   r   rz   _r-   r/   r0   r      s   

zRepVitStage.__init__c                 C      |  |}| |}|S r\   )r   r   r]   r/   r/   r0   r_         

zRepVitStage.forward)r~   TFrs   r/   r/   r-   r0   r      s    r   c                       s^  e Zd Zddddddddejdd	d
f fdd	Zejjd3ddZ	ejjd4ddZ
ejjdejfddZd5dedee defddZejjd4ddZ		
	
		
d6dejdeeeee f  ded ed!ed"edeeej eejeej f f fd#d$Z	%	
	d7deeee f d&ed'efd(d)Zd*d+ Zd3d,efd-d.Zd/d0 Ze d1d2 Z  ZS )8r   r~      )0   )r2   r2   avg  Tr   Fc                    s^  t t|   d| _|| _|| _|| _|d }t|||	| _| jj	}t
dd tt|t|D }t|}t||}g | _g }t|D ]J}|dkrMdnd}|t||| || || |	|||d |rhdnd | 9 }t
 fd	d|D }|  jt|| |d
| dg7  _|| }qEtj| | _|d  | _| _t|| _t|d ||
| _d S )NFr   c                 S   s   g | ]\}}|| qS r/   r/   ).0ipr/   r/   r0   
<listcomp>  s    z#RepVit.__init__.<locals>.<listcomp>T)ry   rp   r[   r   rY   r2   r   c                    s   g | ]
}|d    d  qS )r   r/   )r   rstage_strider/   r0   r   1  s    zstages.)num_chs	reductionmodulerO   )r   r   r   grad_checkpointingglobal_pool	embed_dimr   r}   stemr(   tuplezipr   lenr   feature_infor   r   r   dictr   r   stagesnum_featureshead_hidden_sizer   r   r   r   )r$   in_chansimg_sizer   r   ry   r   r[   r   rp   r   	drop_raterY   r%   r(   
resolution
num_stages
mlp_ratiosr   r   r   r-   r   r0   r     sH    $
zRepVit.__init__c                 C   s   t dddgd}|S )Nz^stem)z^blocks\.(\d+)N)z^norm)i )r   r   )r   )r$   coarsematcherr/   r/   r0   group_matcher:  s   zRepVit.group_matcherc                 C   s
   || _ d S r\   )r   r$   enabler/   r/   r0   set_grad_checkpointing?  s   
zRepVit.set_grad_checkpointingreturnc                 C   s   | j S r\   )r   r$   r/   r/   r0   get_classifierC  s   zRepVit.get_classifierNr   r   r   c                 C   s,   || _ |d ur
|| _t| jd ||| _d S )NrO   )r   r   r   r   r   )r$   r   r   r   r/   r/   r0   reset_classifierG  s   zRepVit.reset_classifierc                 C   s   || j _d S r\   )r   r   r   r/   r/   r0   set_distilled_trainingM  s   zRepVit.set_distilled_trainingNCHWr^   indicesnorm
stop_early
output_fmtintermediates_onlyc                 C   s   |dv sJ dg }t t| j|\}}	| |}tj s |s$| j}
n	| jd|	d  }
t|
D ]\}}| jrCtj sCt	||}n||}||v rP|
| q1|rU|S ||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r   zOutput shape must be NCHW.Nr   )r   r   r   r   rF   r   r   	enumerater   r   r   )r$   r^   r   r   r   r   r   intermediatestake_indices	max_indexr   feat_idxstager/   r/   r0   forward_intermediatesQ  s"   

zRepVit.forward_intermediatesr   
prune_norm
prune_headc                 C   s<   t t| j|\}}| jd|d  | _|r| dd |S )z@ Prune layers not required for specified intermediates.
        Nr   r    )r   r   r   r   )r$   r   r   r   r   r   r/   r/   r0   prune_intermediate_layers~  s
   z RepVit.prune_intermediate_layersc                 C   s8   |  |}| jrtj st| j|}|S | |}|S r\   )r   r   rF   r   r   r   r   r]   r/   r/   r0   forward_features  s   

zRepVit.forward_features
pre_logitsc                 C   s4   | j dkr|jddd}| |}|r|S | |S )Nr   )r2   r~   F)keepdim)r   meanr   r   )r$   r^   r   r/   r/   r0   forward_head  s   


zRepVit.forward_headc                 C   r   r\   )r   r   r]   r/   r/   r0   r_     r   zRepVit.forwardc                    s    fdd  |  d S )Nc                    sF   |   D ]\}}t|dr| }t| ||  | q | qd S )NrA   )named_childrenhasattrrA   setattr)net
child_namechildfusedfuse_childrenr/   r0   r     s   


z"RepVit.fuse.<locals>.fuse_childrenr/   r   r/   r   r0   rA     s   	zRepVit.fuseri   )T)NF)NFFr   F)r   FT)rC   rD   rE   r   GELUr   rF   r   ignorer   r   Moduler   intr   strboolr   r   Tensorr   r   r   r   r   r   r   r_   rG   rA   rH   r/   r/   r-   r0   r     st    8 
/
r   c                 K   s   | dddddt tddd
|S )	Nr   )r~   r   r   )   r   gffffff?bicubiczstem.conv1.c)zhead.head.lzhead.head_dist.l)
urlr   
input_size	pool_sizecrop_pctinterpolationr   rL   
first_conv
classifierr   )r   kwargsr/   r/   r0   _cfg  s   r   ztimm/)	hf_hub_id)zrepvit_m1.dist_in1kzrepvit_m2.dist_in1kzrepvit_m3.dist_in1kzrepvit_m0_9.dist_300e_in1kzrepvit_m0_9.dist_450e_in1kzrepvit_m1_0.dist_300e_in1kzrepvit_m1_0.dist_450e_in1kzrepvit_m1_1.dist_300e_in1kzrepvit_m1_1.dist_450e_in1kzrepvit_m1_5.dist_300e_in1kzrepvit_m1_5.dist_450e_in1kzrepvit_m2_3.dist_300e_in1kzrepvit_m2_3.dist_450e_in1kFc                 K   s0   | dd}tt| |fdtd|di|}|S )Nout_indices)r   r   r2   r~   feature_cfgT)flatten_sequentialr   )popr   r   r   )variant
pretrainedr   r   modelr/   r/   r0   _create_repvit  s   
r  c                 K   ,   t dddd}tdd| it |fi |S )	z&
    Constructs a RepViT-M1 model
    r   `      i  r2   r2      r2   Tr   r   rY   	repvit_m1r  N)r  r   r  r  r   
model_argsr/   r/   r0   r       r  c                 K   r  )	z&
    Constructs a RepViT-M2 model
    @         i   r2   r2      r2   Tr  	repvit_m2r  N)r  r  r  r/   r/   r0   r    r  r  c                 K   r  )	z&
    Constructs a RepViT-M3 model
    r  )r   r      r2   Tr  	repvit_m3r  N)r  r  r  r/   r/   r0   r  
  r  r  c                 K   *   t ddd}tdd| it |fi |S )z(
    Constructs a RepViT-M0.9 model
    r  r
  r   r   repvit_m0_9r  N)r  r  r  r/   r/   r0   r       r  c                 K   r  )z(
    Constructs a RepViT-M1.0 model
    )8   p   r   i  r
  r  repvit_m1_0r  N)r!  r  r  r/   r/   r0   r!    r  r!  c                 K   r  )z(
    Constructs a RepViT-M1.1 model
    r  r  r  repvit_m1_1r  N)r"  r  r  r/   r/   r0   r"  %  r  r"  c                 K   r  )z(
    Constructs a RepViT-M1.5 model
    r  )r   r      r   r  repvit_m1_5r  N)r$  r  r  r/   r/   r0   r$  .  r  r$  c                 K   r  )z(
    Constructs a RepViT-M2.3 model
    )P      i@  i  )   r'  "   r2   r  repvit_m2_3r  N)r)  r  r  r/   r/   r0   r)  7  r  r)  )r   ri   )3__doc__typingr   r   r   r   rF   torch.nnr   	timm.datar   r   timm.layersr	   r
   r   r   _builderr   	_featuresr   _manipulater   r   	_registryr   r   __all__r   r   rI   r   rS   rl   ru   r}   r   r   r   r   r   default_cfgsr  r  r  r  r  r!  r"  r$  r)  r/   r/   r/   r0   <module>   s    5' 
/
-