o
    پiC                     @   s  d Z ddlmZ ddlmZmZmZmZ ddlZddl	m
Z
 ddlmZmZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ dgZG dd de
jZG dd de
jZ G dd de
jZ!G dd de
jZ"G dd de
jZ#G dd de
jZ$d/ddZ%ee%dde%dde%dde%ddde%ddd d!d"d#Z&d0d%d&Z'ed0d'd(Z(ed0d)d*Z)ed0d+d,Z*ed0d-d.Z+dS )1z
InceptionNeXt paper: https://arxiv.org/abs/2303.16900
Original implementation & weights from: https://github.com/sail-sg/inceptionnext
    )partial)ListOptionalTupleUnionNIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)trunc_normal_DropPath	to_2tupleget_paddingSelectAdaptivePool2d   )build_model_with_cfg)feature_take_indices)checkpoint_seq)register_modelgenerate_default_cfgsMetaNeXtc                       s2   e Zd ZdZ				d
 fdd	Zdd	 Z  ZS )InceptionDWConv2dz% Inception depthwise convolution
                ?r   c           	         s   t    t|| }t||d}t||d}tj||||||d| _tj||d|fd|fd|f|d| _tj|||df|df|df|d| _|d|  |||f| _	d S )Ndilation)paddingr   groupsr   r   r   )
super__init__intr   nnConv2d	dwconv_hwdwconv_wdwconv_hsplit_indexes)	selfin_chssquare_kernel_sizeband_kernel_sizebranch_ratior   gcsquare_paddingband_padding	__class__ N/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/inception_next.pyr      s"   


zInceptionDWConv2d.__init__c                 C   sB   t j|| jdd\}}}}t j|| || || |fddS )Nr   )dim)torchsplitr&   catr#   r$   r%   )r'   xx_idx_hwx_wx_hr1   r1   r2   forward2   s   zInceptionDWConv2d.forward)r   r   r   r   )__name__
__module____qualname____doc__r   r<   __classcell__r1   r1   r/   r2   r      s    r   c                       s8   e Zd ZdZddejdddf fdd	Zdd Z  ZS )	ConvMlpz MLP using 1x1 convs that keeps spatial dims
    copied from timm: https://github.com/huggingface/pytorch-image-models/blob/v0.6.11/timm/models/layers/mlp.py
    NT        c                    s   t    |p|}|p|}t|}tj||d|d d| _|r#||nt | _| | _t	|| _
tj||d|d d| _d S )Nr   r   )kernel_sizebias)r   r   r   r!   r"   fc1IdentitynormactDropoutdropfc2)r'   in_featureshidden_featuresout_features	act_layer
norm_layerrE   rK   r/   r1   r2   r   B   s   

zConvMlp.__init__c                 C   s6   |  |}| |}| |}| |}| |}|S N)rF   rH   rI   rK   rL   r'   r7   r1   r1   r2   r<   W   s   




zConvMlp.forward)	r=   r>   r?   r@   r!   ReLUr   r<   rA   r1   r1   r/   r2   rB   =   s    rB   c                       sd   e Zd ZdZdddejeejddddf fd	d
	Zdde	de
e fddZddefddZ  ZS )MlpClassifierHeadz MLP classification head
      avgr   ư>)epsrC   Tc	           
         s   t    d| _|| _t||  | _}	|sJ dt|dd| _tj	|| j
  |	|d| _| | _||	| _tj	|	||d| _t|| _d S )NFCannot disable poolingT	pool_typeflatten)rE   )r   r   use_convrM   r    num_featuresr   global_poolr!   Linear	feat_multrF   rI   rH   rL   rJ   rK   )
r'   rM   num_classesr\   	mlp_ratiorP   rQ   rK   rE   rN   r/   r1   r2   r   d   s   

zMlpClassifierHead.__init__Nrc   r\   c                 C   sL   |d ur|s
J dt |dd| _|dkrt| j|| _d S t | _d S )NrZ   Tr[   r   )r   r`   r!   ra   r_   rG   rL   )r'   rc   r\   r1   r1   r2   reset}   s   *zMlpClassifierHead.resetF
pre_logitsc                 C   sD   |  |}| |}| |}| |}| |}|r|S | |S rR   )r`   rF   rI   rH   rK   rL   r'   r7   rf   r1   r1   r2   r<      s   




zMlpClassifierHead.forwardrR   F)r=   r>   r?   r@   r!   GELUr   	LayerNormr   r    r   strre   boolr<   rA   r1   r1   r/   r2   rU   `   s    rU   c                       s>   e Zd ZdZdeejedejddf fdd	Z	dd	 Z
  ZS )
MetaNeXtBlockz MetaNeXtBlock Block
    Args:
        dim (int): Number of input channels.
        drop_path (float): Stochastic depth rate. Default: 0.0
        ls_init_value (float): Init value for Layer Scale. Default: 1e-6.
    r      rX   rC   c
           
         s|   t    |||d| _||| _||t|| |d| _|r)t|t	| nd | _
|	dkr7t|	| _d S t | _d S )Nr   )rP   rC   )r   r   token_mixerrH   r    mlpr!   	Parameterr4   onesgammar   rG   	drop_path)
r'   r3   r   ro   rQ   	mlp_layerrd   rP   ls_init_valuert   r/   r1   r2   r      s   

$zMetaNeXtBlock.__init__c                 C   sV   |}|  |}| |}| |}| jd ur"|| jdddd}| || }|S )Nr   )ro   rH   rp   rs   mulreshapert   )r'   r7   shortcutr1   r1   r2   r<      s   



zMetaNeXtBlock.forward)r=   r>   r?   r@   r   r!   BatchNorm2drB   ri   r   r<   rA   r1   r1   r/   r2   rm      s    
rm   c                	       s:   e Zd Zdddddeejddf	 fdd	Zdd	 Z  ZS )
MetaNeXtStage   )r   r   N      ?rn   c                    s   t    d| _|dks|d |d kr(t|
|tj||d||d d| _nt | _|p3dg| }g }t|D ]}|	t
||d || |||	|
|d q:tj| | _d S )NFr   r   r}   )rD   strider   rC   )r3   r   rt   rv   ro   rP   rQ   rd   )r   r   grad_checkpointingr!   
Sequentialr"   
downsamplerG   rangeappendrm   blocks)r'   r(   out_chsr   depthr   drop_path_ratesrv   ro   rP   rQ   rd   stage_blocksir/   r1   r2   r      s8   



zMetaNeXtStage.__init__c                 C   s8   |  |}| jrtj st| j|}|S | |}|S rR   )r   r   r4   jitis_scriptingr   r   rS   r1   r1   r2   r<      s   

zMetaNeXtStage.forward)	r=   r>   r?   r   r!   ri   r   r<   rA   r1   r1   r/   r2   r|      s    -r|   c                       sX  e Zd ZdZddddddeejejdd	d	d
f fdd	Zdd Z	e
jjd4ddZe
jjdejfddZd5dedee fddZe
jjd6ddZe
jjdd Z					d7d e
jd!eeeee f  d"ed#ed$ed%edeee
j ee
jee
j f f fd&d'Z	(		d8d!eeee f d)ed*efd+d,Zd-d. Zd4d/efd0d1Zd2d3 Z  Z S )9r   a   MetaNeXt
        A PyTorch impl of : `InceptionNeXt: When Inception Meets ConvNeXt` - https://arxiv.org/abs/2303.16900

    Args:
        in_chans (int): Number of input image channels. Default: 3
        num_classes (int): Number of classes for classification head. Default: 1000
        depths (tuple(int)): Number of blocks at each stage. Default: (3, 3, 9, 3)
        dims (tuple(int)): Feature dimension at each stage. Default: (96, 192, 384, 768)
        token_mixers: Token mixer function. Default: nn.Identity
        norm_layer: Normalization layer. Default: nn.BatchNorm2d
        act_layer: Activation function for MLP. Default: nn.GELU
        mlp_ratios (int or tuple(int)): MLP ratios. Default: (4, 4, 4, 3)
        drop_rate (float): Head dropout rate
        drop_path_rate (float): Stochastic depth rate. Default: 0.
        ls_init_value (float): Init value for Layer Scale. Default: 1e-6.
    r   rV   rW       r   r   	   r   `        i   )rn   rn   rn   r   rC   rX   c                    s  t    t|}t|ttfs|g| }t|
ttfs!|
g| }
|| _|| _|| _g | _	t
t
j||d ddd||d | _dd td|t||D }|d }d}d}t
 | _t|D ]a}|dksm|dkrodnd}||kr|dkr||9 }d}||9 }|dv rdnd}|| }| jt|||dkr|nd||f|| || ||	|| ||
| d	 |}|  j	t||d
| dg7  _	qc|| _t| j|| j|d| _| jj| _| | j d S )Nr   rn   )rD   r   c                 S   s   g | ]}|  qS r1   )tolist).0r7   r1   r1   r2   
<listcomp>  s    z%MetaNeXt.__init__.<locals>.<listcomp>r   r}   )r   r}   )	r   r   r   r   rv   rP   ro   rQ   rd   zstages.)num_chs	reductionmodule)r\   rK   )r   r   len
isinstancelisttuplerc   r`   	drop_ratefeature_infor!   r   r"   stemr4   linspacesumr5   stagesr   r   r|   dictr_   rU   headhead_hidden_sizeapply_init_weights)r'   in_chansrc   r`   output_stridedepthsdimstoken_mixersrQ   rP   
mlp_ratiosr   drop_path_raterv   	num_stagedp_ratesprev_chscurr_strider   r   r   first_dilationr   r/   r1   r2   r      s\   



"
"
zMetaNeXt.__init__c                 C   sF   t |tjtjfrt|jdd |jd ur!tj|jd d S d S d S )Ng{Gz?)stdr   )	r   r!   r"   ra   r
   weightrE   init	constant_)r'   mr1   r1   r2   r   @  s   
zMetaNeXt._init_weightsFc                 C   s   t d|rddS ddgdS )Nz^stemz^stages\.(\d+))z^stages\.(\d+)\.downsample)r   )z^stages\.(\d+)\.blocks\.(\d+)N)r   r   )r   )r'   coarser1   r1   r2   group_matcherF  s   zMetaNeXt.group_matcherreturnc                 C   s   | j jS rR   )r   rL   r'   r1   r1   r2   get_classifierP  s   zMetaNeXt.get_classifierNrc   r`   c                 C   s   || _ | j|| d S rR   )rc   r   re   )r'   rc   r`   r1   r1   r2   reset_classifierT  s   zMetaNeXt.reset_classifierTc                 C   s   | j D ]}||_qd S rR   )r   r   )r'   enablesr1   r1   r2   set_grad_checkpointingX  s   
zMetaNeXt.set_grad_checkpointingc                 C   s   t  S rR   )setr   r1   r1   r2   no_weight_decay]  s   zMetaNeXt.no_weight_decayNCHWr7   indicesrH   
stop_early
output_fmtintermediates_onlyc                 C   s   |dv sJ dg }t t| j|\}}	| |}tj s |s$| j}
n	| jd|	d  }
t|
D ]\}}||}||v rB|| q1|rG|S ||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r   zOutput shape must be NCHW.Nr   )	r   r   r   r   r4   r   r   	enumerater   )r'   r7   r   rH   r   r   r   intermediatestake_indices	max_indexr   feat_idxstager1   r1   r2   forward_intermediatesa  s   

zMetaNeXt.forward_intermediatesr   
prune_norm
prune_headc                 C   s<   t t| j|\}}| jd|d  | _|r| dd |S )z@ Prune layers not required for specified intermediates.
        Nr   r   rW   )r   r   r   r   )r'   r   r   r   r   r   r1   r1   r2   prune_intermediate_layers  s
   z"MetaNeXt.prune_intermediate_layersc                 C      |  |}| |}|S rR   )r   r   rS   r1   r1   r2   forward_features     

zMetaNeXt.forward_featuresrf   c                 C   s   |r	| j ||dS |  |S )N)rf   )r   rg   r1   r1   r2   forward_head  s   zMetaNeXt.forward_headc                 C   r   rR   )r   r   rS   r1   r1   r2   r<     r   zMetaNeXt.forwardrh   rR   )T)NFFr   F)r   FT)!r=   r>   r?   r@   r   r!   r{   ri   r   r   r4   r   ignorer   Moduler   r    r   rk   r   r   r   Tensorr   r   rl   r   r   r   r   r   r<   rA   r1   r1   r/   r2   r      sv    C	
 
,
 c                 K   s   | dddddt tddd
|S )	NrV   )r      r   )   r   g      ?bicubiczstem.0zhead.fc2)
urlrc   
input_size	pool_sizecrop_pctinterpolationmeanr   
first_conv
classifierr   )r   kwargsr1   r1   r2   _cfg  s   r   ztimm/)	hf_hub_idgffffff?)r   r   )r   r   r   )   r   r~   )r   r   r   r   )zinception_next_atto.sail_in1kzinception_next_tiny.sail_in1kzinception_next_small.sail_in1kzinception_next_base.sail_in1kz!inception_next_base.sail_in1k_384Fc                 K   s$   t t| |fdtdddi|}|S )Nfeature_cfg)r   r   r}   r   T)out_indicesflatten_sequential)r   r   r   )variant
pretrainedr   modelr1   r1   r2   _create_inception_next  s   
r   c                 K   s6   t ddttdddd}td	d| it |fi |S )
N)r}   r}      r}   )(   P      i@  r   g      ?)r*   r+   r   r   r   inception_next_attor   )r   )r   r   r   r   r   r   
model_argsr1   r1   r2   r     s
   r   c                 K   ,   t ddtd}tdd| it |fi |S )Nr   r   r   inception_next_tinyr   )r   r   r   r   r   r1   r1   r2   r     
   r   c                 K   r   )Nr   r      r   r   r   inception_next_smallr   )r   r   r   r1   r1   r2   r     r   r   c                 K   r   )Nr   )      i   i   r   inception_next_baser   )r  r   r   r1   r1   r2   r    r   r  )r   rh   ),r@   	functoolsr   typingr   r   r   r   r4   torch.nnr!   	timm.datar   r	   timm.layersr
   r   r   r   r   _builderr   	_featuresr   _manipulater   	_registryr   r   __all__r   r   rB   rU   rm   r|   r   r   default_cfgsr   r   r   r   r  r1   r1   r1   r2   <module>   s`    '#-'7 
=
	