o
    پi[                     @   s*  d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
m  mZ ddl	mZ ddlmZmZ ddlmZmZmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ dgZ%eG dd dej&Z'G dd dej&Z(G dd dej&Z)G dd dej&Z*G dd dej&Z+G dd dej&Z,d7ddZ-dd Z.d8d d!Z/d9d#d$Z0e$e0d%d&dd'e0d%d&dd'e0d%d(d)dd*e0d%d(d)dd*e0d%d(d)dd*e0d%d)dd'd+Z1e#d8d,e,fd-d.Z2e#d8d,e,fd/d0Z3e#d8d,e,fd1d2Z4e#d8d,e,fd3d4Z5e#d8d,e,fd5d6Z6dS ):a#   EdgeNeXt

Paper: `EdgeNeXt: Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision Applications`
 - https://arxiv.org/abs/2206.10589

Original code and weights from https://github.com/mmaaz60/EdgeNeXt

Modifications and additions for timm by / Copyright 2022, Ross Wightman
    N)partial)ListOptionalTupleUnion)nnIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)trunc_normal_tf_DropPathLayerNorm2dMlpcreate_conv2dNormMlpClassifierHeadClassifierHead   )build_model_with_cfg)feature_take_indices)register_notrace_module)named_applycheckpoint_seq)register_modelgenerate_default_cfgsEdgeNeXtc                       s6   e Zd Zd	 fdd	Zdeeeef fddZ  ZS )
PositionalEncodingFourier       '  c                    sB   t    tj|d |dd| _dtj | _|| _|| _	|| _
d S )N   r   )kernel_size)super__init__r   Conv2dtoken_projectionmathpiscaletemperature
hidden_dimdim)selfr)   r*   r(   	__class__ H/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/edgenext.pyr"       s   

z"PositionalEncodingFourier.__init__shapec              	   C   s  | j jj}| j jj}t|j|tjd }|jdtj	d}|jdtj	d}d}||d d dd d d f |  | j
 }||d d d d dd f |  | j
 }tj| jtj|dtj	}| jdtj|ddd	 | j  }|d d d d d d d f | }	|d d d d d d d f | }
tj|	d d d d d d d
d df  |	d d d d d d dd df  fddd}	tj|
d d d d d d d
d df  |
d d d d d d dd df  fddd}
tj|
|	fddd
ddd}|  ||}|S )N)devicedtyper   )r2   r   ư>)r2   r1   floor)rounding_moder      r*      )r$   weightr1   r2   torchzerostoboolcumsumfloat32r'   aranger)   int64r(   divstacksincosflattencatpermute)r+   r0   r1   r2   inv_masky_embedx_embedepsdim_tpos_xpos_yposr.   r.   r/   forward(   s>   

((   $$$$z!PositionalEncodingFourier.forward)r   r   r   )__name__
__module____qualname__r"   r   intrR   __classcell__r.   r.   r,   r/   r      s     r   c                
       sD   e Zd Zddddddeejddejdf	 fd	d
	Zdd Z  Z	S )	ConvBlockN   r   Tr7   r3   rM           c                    s   t    |p|}|dkp||k| _t||||d|d| _||| _t|t|| |	d| _|dkr<t	
|t| nd | _|
dkrJt|
| _d S t	 | _d S )Nr   T)r    stride	depthwisebias	act_layerr   r[   )r!   r"   shortcut_after_dwr   conv_dwnormr   rV   mlpr   	Parameterr;   onesgammar   Identity	drop_path)r+   r*   dim_outr    r\   	conv_biasexpand_ratiols_init_value
norm_layerr`   ri   r,   r.   r/   r"   D   s   

"$zConvBlock.__init__c                 C   sr   |}|  |}| jr|}|dddd}| |}| |}| jd ur(| j| }|dddd}|| | }|S )Nr   r   r9   r   )rb   ra   rI   rc   rd   rg   ri   )r+   xshortcutr.   r.   r/   rR   [   s   




zConvBlock.forward
rS   rT   rU   r   r   	LayerNormGELUr"   rR   rW   r.   r.   r,   r/   rX   C   s    rX   c                       s>   e Zd Z				d
 fdd	Zdd Zejjdd	 Z  Z	S )CrossCovarianceAttn   Fr[   c                    sf   t    || _tt|dd| _tj||d |d| _	t
|| _t||| _t
|| _d S )Nr   r9   )r^   )r!   r"   	num_headsr   re   r;   rf   r(   LinearqkvDropout	attn_dropproj	proj_drop)r+   r*   rv   qkv_biasrz   r|   r,   r.   r/   r"   m   s   
zCrossCovarianceAttn.__init__c           
      C   s   |j \}}}| |||d| jdddddd}|d\}}}tj|ddtj|dddd | j	 }	|	j
dd}	| |	}	|	| }|dddd|||}| |}| |}|S )	Nr9   r4   r   r   r7   r   r8   )r0   rx   reshaperv   rI   unbindF	normalize	transposer(   softmaxrz   r{   r|   )
r+   ro   BNCrx   qkvattnr.   r.   r/   rR   ~   s   (*


zCrossCovarianceAttn.forwardc                 C   s   dhS )Nr(   r.   r+   r.   r.   r/   no_weight_decay   s   z#CrossCovarianceAttn.no_weight_decay)ru   Fr[   r[   )
rS   rT   rU   r"   rR   r;   jitignorer   rW   r.   r.   r,   r/   rt   l   s    rt   c                       sJ   e Zd Zdddddddeejddejdddf fdd		Zd
d Z  Z	S )SplitTransposeBlockr   ru   r7   Tr3   rZ   r[   c              
      s@  t    ttt|| tt|| }|| _td|d | _g }t	| jD ]}|
t||dd|d q*t|| _d | _|rJt|d| _|	|| _|dkr]t|t| nd | _t|||||d| _|	|dd	| _t|t|| |
d
| _|dkrt|t| nd | _|dkrt|| _d S t | _d S )Nr   r9   T)r    r]   r^   r8   r   )rv   r}   rz   r|   r3   rZ   r_   r[   )r!   r"   maxrV   r%   ceilr5   width
num_scalesrangeappendr   r   
ModuleListconvspos_embdr   norm_xcare   r;   rf   	gamma_xcart   xcarc   r   rd   rg   r   rh   ri   )r+   r*   r   rv   rl   use_pos_embrk   r}   rm   rn   r`   ri   rz   r|   r   r   ir,   r.   r/   r"      s(   
&
"
"$zSplitTransposeBlock.__init__c              	   C   s\  |}|j t| jd dd}g }|d }t| jD ]\}}|dkr'|||  }||}|| q||d  t|d}|j\}}	}
}|||	|
| 	ddd}| j
d urq| 
||
|f|d|jd 	ddd}|| }|| | j| | |  }|||
||	}| |}| |}| jd ur| j| }|	dddd}|| | }|S )Nr   r8   r   r4   r   r9   )chunklenr   	enumerater   r;   rH   r0   r   rI   r   ri   r   r   r   rc   rd   rg   )r+   ro   rp   spxspospr   convr   r   HWpos_encodingr.   r.   r/   rR      s2   
* 



zSplitTransposeBlock.forwardrq   r.   r.   r,   r/   r      s    'r   c                       sP   e Zd Zddddddddddddeeejd	d
ejf fdd	Zdd Z	  Z
S )EdgeNeXtStager   r   r7   rY   FT      ?Nr3   rZ   c                    s   t    d| _|s|dkrt | _nt||tj||dd|d| _|}g }t|D ]9}||| k rQ|	t
|||rB|dkrB|nd|||	||| ||d
 n|	t||||	|
|||| ||d
 |}q-tj| | _d S )NFr   r   r    r\   r^   r   )
r*   rj   r\   rk   r    rl   rm   ri   rn   r`   )
r*   r   rv   rl   r   rk   rm   ri   rn   r`   )r!   r"   grad_checkpointingr   rh   
downsample
Sequentialr#   r   r   rX   r   blocks)r+   in_chsout_chsr\   depthnum_global_blocksrv   scalesr    rl   r   downsample_blockrk   rm   drop_path_ratesrn   norm_layer_clr`   stage_blocksr   r,   r.   r/   r"      sT   
zEdgeNeXtStage.__init__c                 C   s8   |  |}| jrtj st| j|}|S | |}|S N)r   r   r;   r   is_scriptingr   r   r+   ro   r.   r.   r/   rR   #  s   

zEdgeNeXtStage.forward)rS   rT   rU   r   r   r   rr   rs   r"   rR   rW   r.   r.   r,   r/   r      s$    Cr   c                       sH  e Zd Zddddddddd	d
dddddddejddf fdd	Zejjd5ddZ	ejjd6ddZ
ejjdejfddZd7dedee fddZ				 	d8d!ejd"eeeee f  d#ed$ed%ed&edeeej eejeej f f fd'd(Z	)		d9d"eeee f d*ed+efd,d-Zd.d/ Zd5d0efd1d2Zd3d4 Z  ZS ):r   r9     avg   0   X      r9   r9   	   r9   )r   r   r   r   )r9      rY   r   )ru   ru   ru   ru   )r   r   r9   r7   )FTFFr3   r   r7   FTpatchr[   c              
      sD  t    || _|| _|| _ttdd}ttjdd}g | _	|dv s$J |dkr>t
tj||d dd|d||d | _nt
tj||d ddd|d	||d | _d}g }d
d td|t||D }|d }tdD ]q}|dks{|dkr}dnd}||9 }|td%i d|d|| d|d|| d|| d|| d|| d|	| d|d|| d|
| d|d|d|d|d|d| || }|  j	t||d| d g7  _	qqtj
| | _|d!  | _| _|r|| j| _t| j||| jd"| _nt | _t| j||| j|d#| _ttt|d$|  d S )&Nr3   rZ   )r   overlapr   r   r7   r   r   )r    r\   paddingr^   c                 S   s   g | ]}|  qS r.   )tolist).0ro   r.   r.   r/   
<listcomp>Z  s    z%EdgeNeXt.__init__.<locals>.<listcomp>r   r   r   r   r\   r   r   rv   r   r   rl   r    r   rm   r   rk   rn   r   r`   zstages.)num_chs	reductionmoduler4   )	pool_type	drop_rate)r   r   rn   )head_init_scaler.   )r!   r"   num_classesglobal_poolr   r   r   r   rr   feature_infor   r#   stemr;   linspacesumsplitr   r   r   dictstagesnum_featureshead_hidden_sizenorm_prer   headrh   r   r   _init_weights)r+   in_chansr   r   dimsdepthsglobal_block_countskernel_sizesheads	d2_scalesr   rm   r   rl   r   rk   	stem_typehead_norm_firstr`   drop_path_rater   rn   r   curr_strider   dp_ratesr   r   r\   r,   r.   r/   r"   -  s   


"	
"

zEdgeNeXt.__init__c                 C   s   t d|rddS g ddS )Nz^stemz^stages\.(\d+)))z^stages\.(\d+)\.downsample)r   )z^stages\.(\d+)\.blocks\.(\d+)N)z	^norm_pre)i )r   r   )r   )r+   coarser.   r.   r/   group_matcher  s   zEdgeNeXt.group_matcherc                 C   s   | j D ]}||_qd S r   )r   r   )r+   enablesr.   r.   r/   set_grad_checkpointing  s   
zEdgeNeXt.set_grad_checkpointingreturnc                 C   s   | j jS r   )r   fcr   r.   r.   r/   get_classifier  s   zEdgeNeXt.get_classifierNr   r   c                 C   s   || _ | j|| d S r   )r   r   reset)r+   r   r   r.   r.   r/   reset_classifier  s   zEdgeNeXt.reset_classifierNCHWro   indicesrc   
stop_early
output_fmtintermediates_onlyc                 C   s   |dv sJ dg }t t| j|\}}	| |}t| jd }
tj s'|s+| j}n	| jd|	d  }t|D ]\}}||}||v rW|rP||
krP| |}n|}|	| q8|r\|S ||
kre| |}||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r   zOutput shape must be NCHW.r   N)
r   r   r   r   r;   r   r   r   r   r   )r+   ro   r   rc   r   r   r   intermediatestake_indices	max_indexlast_idxr   feat_idxstagex_interr.   r.   r/   forward_intermediates  s*   


zEdgeNeXt.forward_intermediatesr   
prune_norm
prune_headc                 C   sJ   t t| j|\}}| jd|d  | _|rt | _|r#| dd |S )z@ Prune layers not required for specified intermediates.
        Nr   r    )r   r   r   r   rh   r   r   )r+   r   r  r  r   r   r.   r.   r/   prune_intermediate_layers  s   
z"EdgeNeXt.prune_intermediate_layersc                 C   s"   |  |}| |}| |}|S r   )r   r   r   r   r.   r.   r/   forward_features  s   


zEdgeNeXt.forward_features
pre_logitsc                 C   s   |r	| j |ddS |  |S )NT)r  )r   )r+   ro   r  r.   r.   r/   forward_head  s   zEdgeNeXt.forward_headc                 C   s   |  |}| |}|S r   )r  r	  r   r.   r.   r/   rR     s   

zEdgeNeXt.forwardF)Tr   )NFFr   F)r   FT)rS   rT   rU   r   rs   r"   r;   r   r   r   r   Moduler   rV   r   strr   Tensorr   r   r>   r   r  r  r  r	  rR   rW   r.   r.   r,   r/   r   ,  s|    a
 
4
r   c                 C   s   t | tjrt| jdd | jd urtj| j d S d S t | tjrGt| jdd tj| j |rId|v rK| jj	
| | jj	
| d S d S d S d S )Ng{Gz?)stdhead.)
isinstancer   r#   r   r:   r^   initzeros_rw   datamul_)r   namer   r.   r.   r/   r     s   
r   c                 C   s   d| v sd| v r
| S d| v r| d } nd| v r| d } nd| v r$| d } i }ddl }|  D ]N\}}|dd	}|d
d|}|dd|}|dd}|dd}|dd}|drc|dd}|jdkrxd|vrx| | j}||}|||< q.|S )z Remap FB checkpoints -> timm zhead.norm.weightznorm_pre.weight	model_emamodel
state_dictr   Nzdownsample_layers.0.zstem.zstages.([0-9]+).([0-9]+)zstages.\1.blocks.\2z#downsample_layers.([0-9]+).([0-9]+)zstages.\1.downsample.\2dwconvrb   pwconvzmlp.fcr  zhead.fc.znorm.rc   z	head.normr   r   )	reitemsreplacesub
startswithndimr  r0   r   )r  r  out_dictr  r   r   model_shaper.   r.   r/   checkpoint_filter_fn  s0   




r#  Fc                 K   s&   t t| |fttdddd|}|S )N)r   r   r   r9   T)out_indicesflatten_sequential)pretrained_filter_fnfeature_cfg)r   r   r#  r   )variant
pretrainedkwargsr  r.   r.   r/   _create_edgenext#  s   
r+  r  c                 K   s   | dddddt tddd
|S )	Nr   )r9      r,  )ru   ru   g?bicubiczstem.0zhead.fc)
urlr   
input_size	pool_sizecrop_pctinterpolationmeanr  
first_conv
classifierr   )r.  r*  r.   r.   r/   _cfg,  s   r6  ztimm/)r9      r7  )	hf_hub_idtest_input_sizetest_crop_pctgffffff?)r9   @  r;  )r8  r1  r9  r:  )zedgenext_xx_small.in1kzedgenext_x_small.in1kzedgenext_small.usi_in1kzedgenext_base.usi_in1kzedgenext_base.in21k_ft_in1kzedgenext_small_rw.sw_in1kr   c                 K   ,   t dddd}tdd| it |fi |S )N)r   r      r   r   r7   r7   r7   r7   r   r   r   edgenext_xx_smallr)  )r@  r   r+  r)  r*  
model_argsr.   r.   r/   r@  Q     r@  c                 K   r<  )Nr   )r   @   d      r>  r?  edgenext_x_smallr)  )rH  rA  rB  r.   r.   r/   rH  \  rD  rH  c                 K   s*   t ddd}tdd| it |fi |S )Nr   )r   `      i0  r   r   edgenext_smallr)  )rL  rA  rB  r.   r.   r/   rL  g  s   rL  c                 K   s2   t g dg dd}tdd| it |fi |S )Nr   )P   rJ  r7  iH  rK  edgenext_baser)  )rN  rA  rB  r.   r.   r/   rN  r  s   rN  c                 K   s0   t dddddd}td	d| it |fi |S )
Nr   )r   rI  rG  i  TFr   )r   r   r   rk   r   edgenext_small_rwr)  )rO  rA  rB  r.   r.   r/   rO  }  s
   rO  )Nr   r
  )r  )7__doc__r%   	functoolsr   typingr   r   r   r   r;   torch.nn.functionalr   
functionalr   	timm.datar	   r
   timm.layersr   r   r   r   r   r   r   _builderr   	_featuresr   _features_fxr   _manipulater   r   	_registryr   r   __all__r  r   rX   rt   r   r   r   r   r#  r+  r6  default_cfgsr@  rH  rL  rN  rO  r.   r.   r.   r/   <module>   s~    	$$)'LM 
L

	



