o
    پiY                     @   sd  d Z ddlmZ ddlmZmZmZmZ ddlZddlm	Z	 ddl
mZmZ ddlmZmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ G dd de	jZG dd de	jZG dd de	jZ G dd de	jZ!G dd de	jZ"G dd de	jZ#G dd de	jZ$dd Z%dIddZ&ee&d d!e&d d!e&d d!e&d d!e&d d!e&d d!e&d d"d#d$e&d d"d#d$e&d d"d#d$e&d d!e&d d%d%d&d'd(e&d d)d*e&d+d,d-d.d/Z'dJd1d2Z(edJd3d4Z)edJd5d6Z*edJd7d8Z+edJd9d:Z,edJd;d<Z-edJd=d>Z.edJd?d@Z/edJdAdBZ0edJdCdDZ1edJdEdFZ2edJdGdHZ3dS )Kz
MambaOut models for image classification.
Some implementations are modified from:
timm (https://github.com/rwightman/pytorch-image-models),
MetaFormer (https://github.com/sail-sg/metaformer),
InceptionNeXt (https://github.com/sail-sg/inceptionnext)
    )OrderedDict)ListOptionalTupleUnionN)nnIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)trunc_normal_DropPath	LayerNorm
LayerScaleClNormMlpClassifierHeadget_act_layer   )build_model_with_cfg)feature_take_indices)checkpoint_seq)register_modelgenerate_default_cfgsc                       s<   e Zd ZdZdddejefdef fddZdd	 Z	  Z
S )
StemzV Code modified from InternImage:
        https://github.com/OpenGVLab/InternImage
       `   Tmid_normc                    sj   t    tj||d dddd| _|r||d nd | _| | _tj|d |dddd| _||| _d S )N   r   r   kernel_sizestridepadding)	super__init__r   Conv2dconv1norm1actconv2norm2)selfin_chsout_chsr   	act_layer
norm_layer	__class__ H/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/mambaout.pyr!      s$   
zStem.__init__c                 C   sp   |  |}| jd ur|dddd}| |}|dddd}| |}| |}|dddd}| |}|S )Nr   r   r   r   )r#   r$   permuter%   r&   r'   r(   xr/   r/   r0   forward6   s   





zStem.forward)__name__
__module____qualname____doc__r   GELUr   boolr!   r4   __classcell__r/   r/   r-   r0   r      s    r   c                       ,   e Zd Zddef fdd	Zdd Z  ZS )DownsampleNormFirstr      c                    s.   t    ||| _tj||dddd| _d S Nr   r   r   r   )r    r!   normr   r"   convr(   r)   r*   r,   r-   r/   r0   r!   E   s   

zDownsampleNormFirst.__init__c                 C   s8   |  |}|dddd}| |}|dddd}|S Nr   r   r   r   )r@   r1   rA   r2   r/   r/   r0   r4   U   s
   

zDownsampleNormFirst.forwardr5   r6   r7   r   r!   r4   r;   r/   r/   r-   r0   r=   C       r=   c                       r<   )
Downsampler   r>   c                    s.   t    tj||dddd| _||| _d S r?   )r    r!   r   r"   rA   r@   rB   r-   r/   r0   r!   _   s   
zDownsample.__init__c                 C   s8   | dddd}| |}| dddd}| |}|S rC   )r1   rA   r@   r2   r/   r/   r0   r4   o   s
   

zDownsample.forwardrD   r/   r/   r-   r0   rF   ]   rE   rF   c                       s^   e Zd ZdZddejdeddf fdd	Zddede	e
 defddZddefddZ  ZS )MlpHeadz MLP classification head
      avg           Tc	           
         s   t    |d urt|| }	nd }	|| _|| _|	p|| _||| _|	r@tt	dt
||	fd| fd||	fg| _|	| _n|| _t | _|dkrUtj
| j||dnt | _t|| _d S )Nfcr%   r@   r   )bias)r    r!   int	pool_typein_featureshidden_sizer@   r   
Sequentialr   Linear
pre_logitsnum_featuresIdentityrL   Dropouthead_dropout)
r(   rP   num_classesrO   r+   	mlp_ratior,   	drop_raterM   rQ   r-   r/   r0   r!   {   s&   





$zMlpHead.__init__NFrY   rO   reset_otherc                 C   sX   |d ur|| _ |rt | _t | _| j| _|dkr%t| j|| _d S t | _d S )Nr   )	rO   r   rV   r@   rT   rP   rU   rS   rL   )r(   rY   rO   r\   r/   r/   r0   reset   s   

*zMlpHead.resetrT   c                 C   sH   | j dkr
|d}| |}| |}| |}|r|S | |}|S )NrI   )r   r   )rO   meanr@   rT   rX   rL   r(   r3   rT   r/   r/   r0   r4      s   





zMlpHead.forward)NFF)r5   r6   r7   r8   r   r9   r   r!   rN   r   strr:   r]   r4   r;   r/   r/   r-   r0   rG   w   s    #	rG   c                       s:   e Zd ZdZddddeejdf fdd	Zd	d
 Z  Z	S )GatedConvBlocka   Our implementation of Gated CNN Block: https://arxiv.org/pdf/1612.08083
    Args:
        conv_ratio: control the number of channels to conduct depthwise convolution.
            Conduct convolution on partial channels can improve paraitcal efficiency.
            The idea of partial channels is from ShuffleNet V2 (https://arxiv.org/abs/1807.11164) and
            also used by InceptionNeXt (https://arxiv.org/abs/2303.16900) and FasterNet (https://arxiv.org/abs/2303.03667)
    UUUUUU@         ?NrK   c	                    s   t    ||| _t|| }
t||
d | _| | _t|| }|
|
| |f| _tj	||||d |d| _
t|
|| _|d urGt|nt | _|dkrWt|| _d S t | _d S )Nr   )r   r   groupsrK   )r    r!   r@   rN   r   rS   fc1r%   split_indicesr"   rA   fc2r   rV   lsr   	drop_path)r(   dimexpansion_ratior   
conv_ratiols_init_valuer,   r+   rk   kwargshiddenconv_channelsr-   r/   r0   r!      s"   

$zGatedConvBlock.__init__c                 C   s   |}|  |}| |}tj|| jdd\}}}|dddd}| |}|dddd}| | |tj	||fdd }| 
|}| |}|| S )N)rl   r   r   r   r   )r@   rg   torchsplitrh   r1   rA   ri   r%   catrj   rk   )r(   r3   shortcutgicr/   r/   r0   r4      s   


"

zGatedConvBlock.forward)
r5   r6   r7   r8   r   r   r9   r!   r4   r;   r/   r/   r-   r0   rb      s    rb   c                
       sV   e Zd Zdddddddeejdf
dee d	ed
edee	 f fddZ
dd Z  ZS )MambaOutStageNrJ   rc   rd   re    rK   dim_outdepth
downsamplero   c              	      s   t    p|d| _|dkrt|d| _n|dkr&t|d| _n|ks,J t | _tj fddt	|D  | _
d S )NFrA   )r,   conv_nfc                    s:   g | ]}t  tttfr| nd qS ))rl   rm   r   rn   ro   r,   r+   rk   )rb   
isinstancelisttuple).0jr+   rn   r}   rk   rm   r   ro   r,   r/   r0   
<listcomp>  s    z*MambaOutStage.__init__.<locals>.<listcomp>)r    r!   grad_checkpointingrF   r   r=   r   rV   rR   rangeblocks)r(   rl   r}   r~   rm   r   rn   r   ro   r,   r+   rk   r-   r   r0   r!      s   

zMambaOutStage.__init__c                 C   s8   |  |}| jrtj st| j|}|S | |}|S N)r   r   rt   jitis_scriptingr   r   r2   r/   r/   r0   r4     s   

zMambaOutStage.forward)r5   r6   r7   r   r   r9   r   rN   ra   floatr!   r4   r;   r/   r/   r-   r0   r{      s*    	(r{   c                       sL  e Zd ZdZdddddeejddd	d
dddddf fdd	Zdd Ze	j
jd4ddZe	j
jd5ddZe	j
jdejfddZd6dedee fddZ					d7d e	jd!eeeee f  d"ed#ed$ed%edeee	j ee	jee	j f f fd&d'Z	(		
d8d!eeee f d)ed*efd+d,Zd-d. Zd4d/efd0d1Zd2d3 Z  ZS )9MambaOuta<   MetaFormer
        A PyTorch impl of : `MetaFormer Baselines for Vision`  -
          https://arxiv.org/abs/2210.13452

    Args:
        in_chans (int): Number of input image channels. Default: 3.
        num_classes (int): Number of classes for classification head. Default: 1000.
        depths (list or tuple): Number of blocks at each stage. Default: [3, 3, 9, 3].
        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 576].
        downsample_layers: (list or tuple): Downsampling layers before each stage.
        drop_path_rate (float): Stochastic depth rate. Default: 0.
        output_norm: norm before classifier head. Default: partial(nn.LayerNorm, eps=1e-6).
        head_fn: classification head. Default: nn.Linear.
        head_dropout (float): dropout for MLP classifier. Default: 0.
    r   rH   rI   r   r   	   r   r        i@  re   rc   rd   TNrA   rK   defaultc                    s  t    || _|| _d| _t|ttfs|g}t|ttfs"|g}t|}t	|}|| _
g | _t||d |||d| _|d }dd td|t||D }d}d}t | _t|D ]M}|| }|dksl|dkrndnd}||9 }t|||| |
||	|dkr|nd	||||| d
}| j| |}|  jt||d| dg7  _||| 7 }q^|dkrt|||||d| _nt||t|d |||d| _|| _| jj| _| | j  d S )NNHWCr   )r   r+   r,   c                 S   s   g | ]}|  qS r/   )tolist)r   r3   r/   r/   r0   r   U  s    z%MambaOut.__init__.<locals>.<listcomp>rJ   r   r   r|   )rl   r}   r~   r   rn   rm   r   ro   r,   r+   rk   zstages.)num_chs	reductionmoduler   )rO   r[   r,   )rQ   rO   r,   r[   )!r    r!   rY   r[   
output_fmtr   r   r   r   len	num_stagefeature_infor   stemrt   linspacesumru   r   rR   stagesr   r{   appenddictrG   headr   rN   rU   head_hidden_sizeapply_init_weights)r(   in_chansrY   global_pooldepthsdimsr,   r+   rn   rm   r   stem_mid_normro   r   drop_path_rater[   head_fnr   prev_dimdp_ratescurcurr_stridery   rl   r   stager-   r/   r0   r!   ,  s   
"
 
	

zMambaOut.__init__c                 C   sF   t |tjtjfrt|jdd |jd ur!tj|jd d S d S d S )Ng{Gz?)stdr   )	r   r   r"   rS   r   weightrM   init	constant_)r(   mr/   r/   r0   r     s   
zMambaOut._init_weightsFc                 C   s   t d|rddS ddgdS )Nz^stemz^stages\.(\d+))z^stages\.(\d+)\.downsample)r   )z^stages\.(\d+)\.blocks\.(\d+)N)r   r   )r   )r(   coarser/   r/   r0   group_matcher  s   zMambaOut.group_matcherc                 C   s   | j D ]}||_qd S r   )r   r   )r(   enablesr/   r/   r0   set_grad_checkpointing  s   
zMambaOut.set_grad_checkpointingreturnc                 C   s   | j jS r   )r   rL   )r(   r/   r/   r0   get_classifier  s   zMambaOut.get_classifierrY   r   c                 C   s   || _ | j|| d S r   )rY   r   r]   )r(   rY   r   r/   r/   r0   reset_classifier  s   zMambaOut.reset_classifierNCHWr3   indicesr@   
stop_earlyr   intermediates_onlyc                 C   s   |dv sJ d|dk}g }t t| j|\}	}
| |}tj s$|s(| j}n	| jd|
d  }t|D ]\}}||}||	v rF|| q5|rPdd |D }|rT|S ||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r   r   z*Output format must be one of NCHW or NHWC.r   Nr   c                 S   s    g | ]}| d ddd qS )r   r   r   r   )r1   
contiguous)r   yr/   r/   r0   r     s     z2MambaOut.forward_intermediates.<locals>.<listcomp>)	r   r   r   r   rt   r   r   	enumerater   )r(   r3   r   r@   r   r   r   channel_firstintermediatestake_indices	max_indexr   feat_idxr   r/   r/   r0   forward_intermediates  s$   

zMambaOut.forward_intermediatesr   
prune_norm
prune_headc                 C   s<   t t| j|\}}| jd|d  | _|r| dd |S )z@ Prune layers not required for specified intermediates.
        Nr   r   r|   )r   r   r   r   )r(   r   r   r   r   r   r/   r/   r0   prune_intermediate_layers  s
   z"MambaOut.prune_intermediate_layersc                 C      |  |}| |}|S r   )r   r   r2   r/   r/   r0   forward_features     

zMambaOut.forward_featuresrT   c                 C   s$   |r| j ||d}|S |  |}|S )N)rT   )r   r_   r/   r/   r0   forward_head  s   
zMambaOut.forward_headc                 C   r   r   )r   r   r2   r/   r/   r0   r4     r   zMambaOut.forwardr`   )Tr   )NFFr   F)r   FT)r5   r6   r7   r8   r   r   r9   r!   r   rt   r   ignorer   r   Moduler   rN   r   ra   r   Tensorr   r   r:   r   r   r   r   r   r4   r;   r/   r/   r-   r0   r     sx    \	 
1
r   c                 C   s   d| v r| d } d| v r| S dd l }i }|  D ]?\}}|dd}|dd|}|dd	|}|d
r<|d
d}n|drS|dd}|dd}|dd}|||< q|S )Nmodelzstem.conv1.weightr   zdownsample_layers.0.zstem.zstages.([0-9]+).([0-9]+)zstages.\1.blocks.\2zdownsample_layers.([0-9]+)zstages.\1.downsampleznorm.z
head.norm.zhead.z	head.fc1.zhead.pre_logits.fc.zhead.pre_logits.norm.z	head.fc2.zhead.fc.)reitemsreplacesub
startswith)
state_dictr   r   out_dictkvr/   r/   r0   checkpoint_filter_fn  s$   


r   r|   c                 K   s    | ddddddt tddd	|S )
NrH   )r      r   )r      r   )rd   rd   re   bicubicz
stem.conv1zhead.fc)urlrY   
input_sizetest_input_size	pool_sizecrop_pctinterpolationr^   r   
first_conv
classifierr   )r   rp   r/   r/   r0   _cfg
  s   r   ztimm/)	hf_hub_idgffffff?re   )r   r   test_crop_pct)r   r   r   squash)   r   )r   r   r   	crop_moder   i-.  )r   rY   )r      r   )r   r   r   )   r   )r   r   r   )zmambaout_femto.in1kzmambaout_kobe.in1kzmambaout_tiny.in1kzmambaout_small.in1kzmambaout_base.in1kzmambaout_small_rw.sw_e450_in1kz#mambaout_base_short_rw.sw_e500_in1kz"mambaout_base_tall_rw.sw_e500_in1kz"mambaout_base_wide_rw.sw_e500_in1kz+mambaout_base_plus_rw.sw_e150_in12k_ft_in1kz0mambaout_base_plus_rw.sw_e150_r384_in12k_ft_in1kz#mambaout_base_plus_rw.sw_e150_in12ktest_mambaoutFc                 K   s&   t t| |fttdddd|}|S )N)r   r   r   r   T)out_indicesflatten_sequential)pretrained_filter_fnfeature_cfg)r   r   r   r   )variant
pretrainedrp   r   r/   r/   r0   _create_mambaoutA  s   
r   c                 K   s*   t ddd}tdd| it |fi |S )Nr   0   r   r   r   r   r   mambaout_femtor   )r  r   r   r   rp   
model_argsr/   r/   r0   r  L  s   r  c                 K   2   t g dg dd}tdd| it |fi |S )N)r   r      r   r   r  mambaout_kober   )r  r  r  r/   r/   r0   r  R     r  c                 K   r  )Nr   r   r  mambaout_tinyr   )r
  r  r  r/   r/   r0   r
  W  r	  r
  c                 K   r  )Nr   rJ      r   r   r  mambaout_smallr   )r  r  r  r/   r/   r0   r  ]  r	  r  c                 K   r  )Nr        i   i   r  mambaout_baser   )r  r  r  r/   r/   r0   r  c  r	  r  c                 K   s:   t g dg dddddd}td
d	| it |fi |S )Nr  r   Fr   ư>norm_mlp)r   r   r   r   ro   r   mambaout_small_rwr   )r  r  r  r/   r/   r0   r  i  s   r  c              
   K   6   t ddddddddd	}tdd| it |fi |S )N)r   r      r   r        @      ?Fr   r  r  r   r   rm   rn   r   r   ro   r   mambaout_base_short_rwr   )r  r  r  r/   r/   r0   r  v     
r  c              
   K   r  )Nr   rJ      r   r  g      @r  Fr   r  r  r  mambaout_base_tall_rwr   )r  r  r  r/   r/   r0   r    r  r  c                 K   8   t ddddddddd	d
	}tdd| it |fi |S )Nr  r  r        ?Fr   r  silur  	r   r   rm   rn   r   r   ro   r+   r   mambaout_base_wide_rwr   )r#  r  r  r/   r/   r0   r#       r#  c                 K   r  )Nr  r  r  r   Fr   r  r!  r  r"  mambaout_base_plus_rwr   )r%  r  r  r/   r/   r0   r%    r$  r%  c              
   K   r  )N)r   r   r   r   )       r   @   r   Fr   g-C6?r!  r  )r   r   rm   r   r   ro   r+   r   r   r   )r   r  r  r/   r/   r0   r     r  r   )r|   r`   )4r8   collectionsr   typingr   r   r   r   rt   r   	timm.datar	   r
   timm.layersr   r   r   r   r   r   _builderr   	_featuresr   _manipulater   	_registryr   r   r   r   r=   rF   rG   rb   r{   r   r   r   default_cfgsr   r  r  r
  r  r  r  r  r  r#  r%  r   r/   r/   r/   r0   <module>   s     -<53 W

,