o
    پi                     @   s  d Z ddlmZ ddlmZmZ ddlmZ ddlm	Z	m
Z
mZmZmZ ddlZddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZ d	d
lmZ d	dl m!Z! d	dl"m#Z# d	dl$m%Z%m&Z& ddgZ'eG dd dZ(G dd dej)Z*dde+de,de
fddZ-G dd dej)Z.e!G dd dej)Z/				dde0d e0d!e+d"ee
 d#ee
 d$e1deej2e0ee+e	f f fd%d&Z3e4dd'd(d)d*d+d,d-d.d/d0d1d2d3d4d5Z5G d6d dej)Z6	7		8		dd9ee0d:f d;ee0d:f d<ee0 d#e+d=ee+ d>eee+e	f  de(fd?d@Z7dd9ee0d:f d;ee0d:f de(fdBdCZ8	D	E	F	G	H	I	dd9ee0d:f d;ee0d:f d<e0dJe,dKe,d#e+d=e+d>eee+e	f  de(fdLdMZ9	D	H	dd9ee0d:f d;ee0d:f d#e+dNe1de(f
dOdPZ:e4di dQe:dRdSdTe:dUdSdVe:dWdSdXe:dYdSdZe:d[dSd\e:d]dSd^e:d_dSd`e9dRdSdae9dUdSdbe9dWdSdce9dYdSdde9d[dSdee9d]dSdfe9d_dSdge9dhdSdie9dRdjdkdle4dldmdndodpdqe9dRdjdkdldre4 dodsdte9dUdudkdldre4 dodsdve9dWdudkdldre4 dodsdwe9dYdudkdldre4 dodsdxe8dydSdze8d{dSd|e8d}d~dde8dddde8dddde8dddde7ddSde7ddSde7ddSde7ddIe4dddde7ddIe4dddde7ddIe4dddde7ddre4 dde7ddre4 dde7ddre4 dde9dddjdmdle4dldmdndodZ;dde+de1de	de6fddZ<dde+de	dee+e	f fddZ=e%i de=ddddddddde=ddddddddde=ddddddddde=ddddddddde=ddddddddde=ddddddddde=ddddddddd`e=dddddύdae=dddddύdbe=dddddύdce=dddddύdde=dddddύdee=dddddύdfe=dddddύdge=dddddύde=ddddddd֍de=ddddddd֍i de=ddddddd֍de=ddddddd֍dwe=ddddddލdxe=ddddddde=dddddddd|e=ddddddde=ddddddde=ddddddde=ddddddde=dddde=ddddddddde=dddde=dddde=dddde=dddde=dddde=ddde=ddde=ddddddddZ>e&dde1de	de6fddQZ?e&dde1de	de6fddTZ@e&dde1de	de6fddVZAe&dde1de	de6fddXZBe&dde1de	de6fddZZCe&dde1de	de6fdd\ZDe&dde1de	de6fdd^ZEe&dde1de	de6fdd`ZFe&dde1de	de6fddaZGe&dde1de	de6fddbZHe&dde1de	de6fddcZIe&dde1de	de6fdddZJe&dde1de	de6fddeZKe&dde1de	de6fddfZLe&dde1de	de6fd dgZMe&dde1de	de6fddiZNe&dde1de	de6fddqZOe&dde1de	de6fddtZPe&dde1de	de6fddvZQe&dde1de	de6fddwZRe&dde1de	de6fddxZSe&dde1de	de6fddzZTe&dde1de	de6fdd|ZUe&dde1de	de6fd	dZVe&dde1de	de6fd
dZWe&dde1de	de6fddZXe&dde1de	de6fddZYe&dde1de	de6fddZZe&dde1de	de6fddZ[e&dde1de	de6fddZ\e&dde1de	de6fddZ]e&dde1de	de6fddZ^e&dde1de	de6fddZ_e&dde1de	de6fddZ`e&dde1de	de6fddZae&dde1de	de6fddZbdS (  a   Normalization Free Nets. NFNet, NF-RegNet, NF-ResNet (pre-activation) Models

Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets`
    - https://arxiv.org/abs/2101.08692

Paper: `High-Performance Large-Scale Image Recognition Without Normalization`
    - https://arxiv.org/abs/2102.06171

Official Deepmind JAX code: https://github.com/deepmind/deepmind-research/tree/master/nfnets

Status:
* These models are a work in progress, experiments ongoing.
* Pretrained weights for two models so far, more to come.
* Model details updated to closer match official JAX code now that it's released
* NF-ResNet, NF-RegNet-B, and NFNet-F models supported

Hacked together by / copyright Ross Wightman, 2021.
    )OrderedDict)	dataclassreplace)partial)AnyCallableDictOptionalTupleNIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)	ClassifierHeadDropPathAvgPool2dSameScaledStdConv2dScaledStdConv2dSameget_act_layer
get_act_fnget_attnmake_divisible   )build_model_with_cfg)register_notrace_module)checkpoint_seq)generate_default_cfgsregister_modelNormFreeNetNfCfgc                   @   s6  e Zd ZU dZeeeeef ed< eeeeef ed< dZeed< dZ	e
ed< dZee ed	< dZee ed
< dZee
 ed< dZeee
ef  ed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< d Ze
ed!< dS )"r   z.Configuration for Normalization-Free Networks.depthschannelsg?alpha3x3	stem_typeNstem_chs
group_size
attn_layerattn_kwargs       @	attn_gain      ?width_factor      ?bottle_ratior   num_features   ch_divFreg
extra_convgamma_in_actsame_paddinggh㈵>std_conv_epsskipinitzero_init_fcsilu	act_layer) __name__
__module____qualname____doc__r
   int__annotations__r!   floatr#   strr$   r	   r%   r&   r'   r   r   r)   r+   r-   r.   r0   r1   boolr2   r3   r4   r5   r6   r7   r9    rC   rC   E/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/nfnet.pyr   &   s.   
 c                       sF   e Zd ZdZddededef fdd	Zd
ej	dej	fddZ
  ZS )GammaActz.Activation function with gamma scaling factor.relur*   Fact_typegammainplacec                    s$   t    t|| _|| _|| _dS )zInitialize GammaAct.

        Args:
            act_type: Type of activation function.
            gamma: Scaling factor for activation output.
            inplace: Whether to perform activation in-place.
        N)super__init__r   act_fnrH   rI   )selfrG   rH   rI   	__class__rC   rD   rK   C   s   


zGammaAct.__init__xreturnc                 C   s   | j || jd| jS )zzForward pass.

        Args:
            x: Input tensor.

        Returns:
            Scaled activation output.
        rI   )rL   rI   mul_rH   rM   rP   rC   rC   rD   forwardP   s   	zGammaAct.forward)rF   r*   F)r:   r;   r<   r=   rA   r@   rB   rK   torchTensorrU   __classcell__rC   rC   rN   rD   rE   @   s    rE   r*   rG   rH   rQ   c                    s   ddt dtf fdd}|S )zCreate activation function factory with gamma scaling.

    Args:
        act_type: Type of activation function.
        gamma: Scaling factor for activation output.

    Returns:
        Activation function factory.
    FrI   rQ   c                    s   t  | dS )N)rH   rI   )rE   rR   rG   rH   rC   rD   _createf   s   zact_with_gamma.<locals>._createNF)rB   rE   )rG   rH   rZ   rC   rY   rD   act_with_gamma\   s   
r\   c                       s^   e Zd ZdZdddefdededededee d	ef fd
dZde	j
de	j
fddZ  ZS )DownsampleAvgzEAvgPool downsampling as in 'D' ResNet variants with dilation support.r   Nin_chsout_chsstridedilationfirst_dilation
conv_layerc           	         s|   t t|   |dkr|nd}|dks|dkr.|dkr!|dkr!tntj}|d|ddd| _nt | _|||ddd| _dS )a7  Initialize DownsampleAvg.

        Args:
            in_chs: Input channels.
            out_chs: Output channels.
            stride: Stride for downsampling.
            dilation: Dilation rate.
            first_dilation: First dilation rate (unused).
            conv_layer: Convolution layer type.
        r      TF)	ceil_modecount_include_pad)r`   N)	rJ   r]   rK   r   nn	AvgPool2dpoolIdentityconv)	rM   r^   r_   r`   ra   rb   rc   
avg_strideavg_pool_fnrN   rC   rD   rK   n   s   
zDownsampleAvg.__init__rP   rQ   c                 C   s   |  | |S )ztForward pass.

        Args:
            x: Input tensor.

        Returns:
            Downsampled tensor.
        )rk   ri   rT   rC   rC   rD   rU      s   	zDownsampleAvg.forward)r:   r;   r<   r=   r   r>   r	   r   rK   rV   rW   rU   rX   rC   rC   rN   rD   r]   k   s(    r]   c                %       s   e Zd ZdZddddddddddddddded	fd
edee dededee dedededee dededededee	 dedee	 de	def$ fddZ
dejdejfd d!Z  ZS )"NormFreeBlockz-Normalization-Free pre-activation block.
    Nr   r*         ?TFr(           r^   r_   r`   ra   rb   r!   betar-   r%   r0   r1   r2   r6   r&   r)   r9   rc   drop_path_ratec                    s  t    |p|}|p|}t|r|| n|| |
}|	sdn||	 }|	r.|	|
 dkr.|	| }|| _|| _|| _||ksC|dksC||krOt||||||d| _nd| _| | _|||d| _	|dd| _
|||d|||d| _|r|dd| _|||dd||d| _nd| _d| _|r|dur||| _nd| _| | _|||d|rd	nd
d| _|s|dur||| _nd| _|dkrt|nt | _|rttd
| _dS d| _dS )a  Initialize NormFreeBlock.

        Args:
            in_chs: Input channels.
            out_chs: Output channels.
            stride: Stride for convolution.
            dilation: Dilation rate.
            first_dilation: First dilation rate.
            alpha: Alpha scaling factor for residual.
            beta: Beta scaling factor for pre-activation.
            bottle_ratio: Bottleneck ratio.
            group_size: Group convolution size.
            ch_div: Channel divisor for rounding.
            reg: Use RegNet-style configuration.
            extra_conv: Add extra 3x3 convolution.
            skipinit: Use skipinit initialization.
            attn_layer: Attention layer type.
            attn_gain: Attention gain factor.
            act_layer: Activation layer type.
            conv_layer: Convolution layer type.
            drop_path_rate: Stochastic depth drop rate.
        r   r   )r`   ra   rb   rc   NTrR      )r`   ra   groupsr*   rp   )	gain_init)rJ   rK   r   r!   rq   r)   r]   
downsampleact1conv1act2conv2act2bconv2battnact3conv3	attn_lastr   rg   rj   	drop_path	ParameterrV   tensorskipinit_gain)rM   r^   r_   r`   ra   rb   r!   rq   r-   r%   r0   r1   r2   r6   r&   r)   r9   rc   rr   mid_chsrt   rN   rC   rD   rK      sN   
+
	$zNormFreeBlock.__init__rP   rQ   c                 C   s   |  || j }|}| jdur| |}| |}| | |}| jdur.| | |}| jdur;| j	| | }| 
| |}| jdurP| j	| | }| |}| jdur`|| j || j | }|S )zoForward pass.

        Args:
            x: Input tensor.

        Returns:
            Output tensor.
        N)rw   rq   rv   rx   rz   ry   r|   r{   r}   r)   r   r~   r   r   r   rS   r!   )rM   rP   outshortcutrC   rC   rD   rU      s$   	







zNormFreeBlock.forward)r:   r;   r<   r=   r   r>   r	   r@   rB   r   rK   rV   rW   rU   rX   rC   rC   rN   rD   rn      sr    	
Zrn    Tr^   r_   r#   rc   r9   preact_featurec                 C   s  d}t |ddd}t }|dv sJ d|v rd|v r:d|vs J |d |d	 |d |f}	d
}
d	}t |d ddd}n$d|v rJd| d |d |f}	n	|d |d |f}	d}
t |d ddd}t|	d }tt|	|
D ]'\}\}}|| |d|d|d|d  < ||kr|dd|d|d  < |} qknd|v r|| |ddd|d< n
|| |ddd|d< d|v rtjdddd|d< d	}t|||fS )a  Create stem module for NFNet models.

    Args:
        in_chs: Input channels.
        out_chs: Output channels.
        stem_type: Type of stem ('', 'deep', 'deep_tiered', 'deep_quad', '3x3', '7x7', etc.).
        conv_layer: Convolution layer type.
        act_layer: Activation layer type.
        preact_feature: Use pre-activation feature.

    Returns:
        Tuple of (stem_module, stem_stride, stem_feature_info).
    rd   	stem.convnum_chs	reductionmodule)	r   deepdeep_tiered	deep_quadr"   7x7	deep_pool3x3_pool7x7_poolr   quadri   r/      )rd   r   r   rd   z
stem.conv3tieredrs   )rd   r   r   z
stem.conv2r   )kernel_sizer`   rk   TrR   actr"      )r`   padding)dictr   len	enumerateziprg   	MaxPool2d
Sequential)r^   r_   r#   rc   r9   r   stem_stridestem_featurestemr$   strideslast_idxicsrC   rC   rD   create_stem  s<   r   g   `U?g   yX?g   \9?g   `aK?g   ?g    ?g    `l?g   `i?g   |?g    7@g   -?g   @g   `?g   ?)identityceluelugelu
leaky_relulog_sigmoidlog_softmaxrF   relu6selusigmoidr8   softsignsoftplustanhc                       s  e Zd ZdZ						d'deded	ed
ededededef fddZ	e
jjd(dedeeef fddZe
jjd)deddfddZe
jjdejfddZd*ded
ee ddfddZde
jde
jfd d!Zd(de
jd"ede
jfd#d$Zde
jde
jfd%d&Z  ZS )+r   a*   Normalization-Free Network

    As described in :
    `Characterizing signal propagation to close the performance gap in unnormalized ResNets`
        - https://arxiv.org/abs/2101.08692
    and
    `High-Performance Large-Scale Image Recognition Without Normalization` - https://arxiv.org/abs/2102.06171

    This model aims to cover both the NFRegNet-Bx models as detailed in the paper's code snippets and
    the (preact) ResNet models described earlier in the paper.

    There are a few differences:
        * channels are rounded to be divisible by 8 by default (keep tensor core kernels happy),
            this changes channel dim and param counts slightly from the paper models
        * activation correcting gamma constants are moved into the ScaledStdConv as it has less performance
            impact in PyTorch when done with the weight scaling there. This likely wasn't a concern in the JAX impl.
        * a config option `gamma_in_act` can be enabled to not apply gamma in StdConv as described above, but
            apply it in each activation. This is slightly slower, numerically different, but matches official impl.
        * skipinit is disabled by default, it seems to have a rather drastic impact on GPU memory use and throughput
            for what it is/does. Approx 8-10% throughput loss.
      rs   avg    rp   cfgnum_classesin_chansglobal_pooloutput_stride	drop_raterr   kwargsc              
      s4  t    || _|| _d| _t|fi |}|jtv s$J d|j d|jr)t	nt
}	|jr@t|jt|j d}
t|	|jd}	nt|j}
t|	t|j |jd}	|jr_tt|jfi |jnd}t|jpi|jd |j |j}t|||j|	|
d	\| _}}|g| _d
d td|t|j |jD }|}|}d}d}g }t!|jD ]\}}|dkr|dkrdnd}||kr|dkr||9 }d}||9 }|dv rdnd}g }t"|j| D ]}|dko|dk}t|j| |j |j}|t#d/i d|d|d|j$dd|d  d|dkr
|ndd|d|d|j%d|j&r |r dn|j'd|jd|j&d|j(d|j)d|d|j*d |
d!|	d"|| | g7 }|dkrRd}||j$d 7 }|}|}q|  jt+||d#| d$g7  _|t,j-| g7 }qt,j-| | _.|j/rt|j|j/ |j| _/|	|| j/d| _0t+| j/|d%d$| jd&< n|| _/t,1 | _0|
|j/dkd'| _2| j/| _3t4| j/||| jd(| _5| 6 D ]S\}}d)|v rt7|t,j8r|j9rt,j:;|j< n	t,j:=|j<d*d+ |j>durt,j:;|j> qt7|t,j?rt,j:j@|j<d,d-d. |j>durt,j:;|j> qdS )0a  
        Args:
            cfg: Model architecture configuration.
            num_classes: Number of classifier classes.
            in_chans: Number of input channels.
            global_pool: Global pooling type.
            output_stride: Output stride of network, one of (8, 16, 32).
            drop_rate: Dropout rate.
            drop_path_rate: Stochastic depth drop-path rate.
            **kwargs: Extra kwargs overlayed onto cfg.
        Fz3Please add non-linearity constants for activation (z).)rH   )eps)rH   r   Nr   )rc   r9   c                 S   s   g | ]}|  qS rC   )tolist).0rP   rC   rC   rD   
<listcomp>  s    z(NormFreeNet.__init__.<locals>.<listcomp>r   r*   rd   )r   rd   r^   r_   r!   rq   r,   r`   ra   rb   r%   r-   r0   r1   r2   r6   r&   r)   r9   rc   rr   zstages.r   
final_convrR   )	pool_typer   fcrp   g{Gz?fan_inlinear)modenonlinearityrC   )ArJ   rK   r   r   grad_checkpointingr   r9   _nonlin_gammar4   r   r   r3   r\   r   r5   r   r&   r   r'   r   r$   r    r+   r0   r   r#   r   feature_inforV   linspacesumr   splitr   rangern   r!   r%   r1   r-   r2   r6   r)   r   rg   r   stagesr.   r   rj   	final_acthead_hidden_sizer   headnamed_modules
isinstanceLinearr7   initzeros_weightnormal_biasConv2dkaiming_normal_)rM   r   r   r   r   r   r   rr   r   rc   r9   r&   r$   r   	stem_featdrop_path_ratesprev_chs
net_stridera   expected_varr   	stage_idxstage_depthr`   rb   blocks	block_idxfirst_blockr_   nmrN   rC   rD   rK   }  s   

"&
	

 
zNormFreeNet.__init__FcoarserQ   c                 C   s    t d|rdnddfdgd}|S )z"Group parameters for optimization.z^stemz^stages\.(\d+)z^stages\.(\d+)\.(\d+)N)z^final_conv)i )r   r   )r   )rM   r   matcherrC   rC   rD   group_matcher  s   zNormFreeNet.group_matcherTenableNc                 C   s
   || _ dS )z)Enable or disable gradient checkpointing.N)r   )rM   r   rC   rC   rD   set_grad_checkpointing  s   
z"NormFreeNet.set_grad_checkpointingc                 C   s   | j jS )zGet the classifier head.)r   r   )rM   rC   rC   rD   get_classifier  s   zNormFreeNet.get_classifierc                 C   s   || _ | j|| dS )zReset the classifier head.

        Args:
            num_classes: Number of classes for new classifier.
            global_pool: Global pooling type.
        N)r   r   reset)rM   r   r   rC   rC   rD   reset_classifier  s   zNormFreeNet.reset_classifierrP   c                 C   sJ   |  |}| jrtj st| j|}n| |}| |}| |}|S )zForward pass through feature extraction layers.

        Args:
            x: Input tensor.

        Returns:
            Feature tensor.
        )	r   r   rV   jitis_scriptingr   r   r   r   rT   rC   rC   rD   forward_features  s   
	


zNormFreeNet.forward_features
pre_logitsc                 C   s   |r	| j ||dS |  |S )zForward pass through classifier head.

        Args:
            x: Input features.
            pre_logits: Return features before final linear layer.

        Returns:
            Classification logits or features.
        )r   )r   )rM   rP   r   rC   rC   rD   forward_head,  s   
zNormFreeNet.forward_headc                 C   s   |  |}| |}|S )zoForward pass.

        Args:
            x: Input tensor.

        Returns:
            Output logits.
        )r   r   rT   rC   rC   rD   rU   8  s   
	
zNormFreeNet.forward)r   rs   r   r   rp   rp   r[   )T)N)r:   r;   r<   r=   r   r>   rA   r@   r   rK   rV   r   ignorerB   r   r   r   rg   Moduler   r	   r   rW   r   r   rU   rX   rC   rC   rN   rD   r   g  sF    	}
      i   i   rF   r   .r    r%   r&   r'   c                 C   s&   |pi }t | |ddd||||d	}|S )ar  Create NFNet ResNet configuration.

    Args:
        depths: Number of blocks in each stage.
        channels: Channel dimensions for each stage.
        group_size: Group convolution size.
        act_layer: Activation layer type.
        attn_layer: Attention layer type.
        attn_kwargs: Attention layer arguments.

    Returns:
        NFNet configuration.
    r   @   ro   )	r   r    r#   r$   r-   r%   r9   r&   r'   )r   )r   r    r%   r9   r&   r'   r   rC   rC   rD   
_nfres_cfgF  s   r  0   h        c                 C   s:   d|d  d }t dd}t| |dddd	|d
d|d
}|S )zCreate NFNet RegNet configuration.

    Args:
        depths: Number of blocks in each stage.
        channels: Channel dimensions for each stage.

    Returns:
        NFNet configuration.
    i   r   r  r,   rd_ratior"   r/   g      ?g      @Tse)
r   r    r#   r%   r+   r-   r.   r1   r&   r'   )r   r   )r   r    r.   r'   r   rC   rC   rD   
_nfreg_cfgj  s   

r  r   r     r     r,   r(   r   r  r-   	feat_multc           
      C   sH   t |d | }|dur|ntdd}t| |dd||d||||d}	|	S )	a  Create NFNet configuration.

    Args:
        depths: Number of blocks in each stage.
        channels: Channel dimensions for each stage.
        group_size: Group convolution size.
        bottle_ratio: Bottleneck ratio.
        feat_mult: Feature multiplier for final layer.
        act_layer: Activation layer type.
        attn_layer: Attention layer type.
        attn_kwargs: Attention layer arguments.

    Returns:
        NFNet configuration.
    r   Nr,   r	  r   r  T)r   r    r#   r$   r%   r-   r2   r.   r9   r&   r'   )r>   r   r   )
r   r    r%   r-   r  r9   r&   r'   r.   r   rC   rC   rD   
_nfnet_cfg  s    r  r6   c                 C   s:   t | |ddddddd|t|d d |dtddd	}|S )
a  Create DeepMind NFNet configuration.

    Args:
        depths: Number of blocks in each stage.
        channels: Channel dimensions for each stage.
        act_layer: Activation layer type.
        skipinit: Use skipinit initialization.

    Returns:
        NFNet configuration.
    r   r  r,   Tr   r(   r  r	  )r   r    r#   r$   r%   r-   r2   r3   r4   r6   r.   r9   r&   r'   )r   r>   r   )r   r    r9   r6   r   rC   rC   rD   _dm_nfnet_cfg  s"   r  dm_nfnet_f0)r   rd      rs   )r   dm_nfnet_f1)rd   r      r  dm_nfnet_f2)rs   r     	   dm_nfnet_f3)r   r/      r  dm_nfnet_f4)   
         dm_nfnet_f5)r  r  $   r  dm_nfnet_f6)r      *      nfnet_f0nfnet_f1nfnet_f2nfnet_f3nfnet_f4nfnet_f5nfnet_f6nfnet_f7)r/      r  r  nfnet_l0g      ?r  ro   r/   )r
  
rd_divisorr8   )r   r  r%   r-   r'   r9   eca_nfnet_l0eca)r   r  r%   r-   r&   r'   r9   eca_nfnet_l1rd   eca_nfnet_l2eca_nfnet_l3nf_regnet_b0)r   rs   r  r  nf_regnet_b1)rd   r   r   r   nf_regnet_b2)rd   r   r/   r/   )8   p      i  )r   r    nf_regnet_b3)rd   r  r  r  )r:  r     i  nf_regnet_b4)rd   r     r@  )r        ih  nf_regnet_b5)rs   r   r$  r$  )P      iP  i  nf_resnet26)rd   rd   rd   rd   nf_resnet50)rs   r   r  rs   nf_resnet101)rs   r      rs   nf_seresnet26g      ?r	  )r   r&   r'   nf_seresnet50nf_seresnet101nf_ecaresnet26nf_ecaresnet50nf_ecaresnet101
test_nfnet)r   r   r   r   )r   r  `   r  )r   r    r  r%   r-   r'   r9   Fvariant
pretrainedr   c                 K   s,   t |  }tdd}tt| |f||d|S )zCreate a NormFreeNet model.

    Args:
        variant: Model variant name.
        pretrained: Load pretrained weights.
        **kwargs: Additional model arguments.

    Returns:
        NormFreeNet model instance.
    T)flatten_sequential)	model_cfgfeature_cfg)
model_cfgsr   r   r   )rR  rS  r   rU  rV  rC   rC   rD   _create_normfreenet  s   
rX  urlc                 K   s   | dddddt tddd
|S )	zCreate default configuration dictionary.

    Args:
        url: Model weight URL.
        **kwargs: Additional configuration options.

    Returns:
        Configuration dictionary.
    r   rs      r[  r   r   ?bicubicz
stem.conv1zhead.fc)
rY  r   
input_size	pool_sizecrop_pctinterpolationmeanstd
first_conv
classifierr   )rY  r   rC   rC   rD   _dcfg,  s   rg  zdm_nfnet_f0.dm_in1kztimm/zmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f0-604f9c3a.pth)r  r  )rs      rh  )rs   r   r   r]  squash)	hf_hub_idrY  r`  r_  test_input_sizera  	crop_modezdm_nfnet_f1.dm_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f1-fc540f82.pthr\  rZ  )rs   @  rm  gQ?zdm_nfnet_f2.dm_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f2-89875923.pth)r/   r/   )rs   `  rn  gq=
ףp?zdm_nfnet_f3.dm_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f3-d74ab3aa.pth)r  r  )rs     ro  gGz?zdm_nfnet_f4.dm_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f4-0ac5b10b.pth)r  r  )rs     rp  )rs   r  r  g;On?zdm_nfnet_f5.dm_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f5-ecb20ab1.pth)   rq  )rs      rr  gI+?zdm_nfnet_f6.dm_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-dnf-weights/dm_nfnet_f6-e0f12116.pth)r$  r$  )rs     rs  )rs   @  rt  gd;O?)rY  r`  r_  rk  )r   r   )rs     ru  )rs   `  rv  znfnet_l0.ra2_in1kzjhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/nfnet_l0_ra2-45c6688d.pth)rs   rB  rB  )rj  rY  r`  r_  rk  test_crop_pctzeca_nfnet_l0.ra2_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ecanfnet_l0_ra2-e3e9ac50.pthzeca_nfnet_l1.ra2_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ecanfnet_l1_ra2-7dce93cd.pthzeca_nfnet_l2.ra3_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ecanfnet_l2_ra3-da781a61.pth)r@  r@  )rY  r`  r_  rk  rw  r   )rY  r`  r_  rk  re  znf_regnet_b1.ra2_in1kzrhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/nf_regnet_b1_256_ra2-ad85cfef.pth)rj  rY  r`  r_  rk  re  )rs      rx  )rs     ry  )r  r  )rs     rz  )rY  re  znf_resnet50.ra2_in1kzmhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/nf_resnet50_ra2-9f236009.pth)rj  rY  r`  r_  rk  ra  re  )r,   r,   r,   gffffff?)rs      r{  )r  r  )rj  rc  rd  ra  r_  r`  )rO  ztest_nfnet.r160_in1kc                 K      t dd| i|S )z&NFNet-F0 (DeepMind weight compatible).r  rS  N)r  rX  rS  r   rC   rC   rD   r       c                 K   r|  )z&NFNet-F1 (DeepMind weight compatible).r  rS  N)r  r}  r~  rC   rC   rD   r    r  c                 K   r|  )z&NFNet-F2 (DeepMind weight compatible).r  rS  N)r  r}  r~  rC   rC   rD   r    r  c                 K   r|  )z&NFNet-F3 (DeepMind weight compatible).r  rS  N)r  r}  r~  rC   rC   rD   r    r  c                 K   r|  )z&NFNet-F4 (DeepMind weight compatible).r  rS  N)r  r}  r~  rC   rC   rD   r    r  c                 K   r|  )z&NFNet-F5 (DeepMind weight compatible).r!  rS  N)r!  r}  r~  rC   rC   rD   r!    r  c                 K   r|  )z&NFNet-F6 (DeepMind weight compatible).r#  rS  N)r#  r}  r~  rC   rC   rD   r#    r  c                 K   r|  )z	NFNet-F0.r'  rS  N)r'  r}  r~  rC   rC   rD   r'    r  c                 K   r|  )z	NFNet-F1.r(  rS  N)r(  r}  r~  rC   rC   rD   r(    r  c                 K   r|  )z	NFNet-F2.r)  rS  N)r)  r}  r~  rC   rC   rD   r)    r  c                 K   r|  )z	NFNet-F3.r*  rS  N)r*  r}  r~  rC   rC   rD   r*    r  c                 K   r|  )z	NFNet-F4.r+  rS  N)r+  r}  r~  rC   rC   rD   r+    r  c                 K   r|  )z	NFNet-F5.r,  rS  N)r,  r}  r~  rC   rC   rD   r,    r  c                 K   r|  )z	NFNet-F6.r-  rS  N)r-  r}  r~  rC   rC   rD   r-    r  c                 K   r|  )z	NFNet-F7.r.  rS  N)r.  r}  r~  rC   rC   rD   r.    r  c                 K   r|  )zNFNet-L0b w/ SiLU.

    My experimental 'light' model w/ F0 repeats, 1.5x final_conv mult, 64 group_size, .25 bottleneck & SE ratio
    r0  rS  N)r0  r}  r~  rC   rC   rD   r0       c                 K   r|  )zECA-NFNet-L0 w/ SiLU.

    My experimental 'light' model w/ F0 repeats, 1.5x final_conv mult, 64 group_size, .25 bottleneck & ECA attn
    r2  rS  N)r2  r}  r~  rC   rC   rD   r2    r  c                 K   r|  )zECA-NFNet-L1 w/ SiLU.

    My experimental 'light' model w/ F1 repeats, 2.0x final_conv mult, 64 group_size, .25 bottleneck & ECA attn
    r4  rS  N)r4  r}  r~  rC   rC   rD   r4    r  c                 K   r|  )zECA-NFNet-L2 w/ SiLU.

    My experimental 'light' model w/ F2 repeats, 2.0x final_conv mult, 64 group_size, .25 bottleneck & ECA attn
    r5  rS  N)r5  r}  r~  rC   rC   rD   r5    r  c                 K   r|  )zECA-NFNet-L3 w/ SiLU.

    My experimental 'light' model w/ F3 repeats, 2.0x final_conv mult, 64 group_size, .25 bottleneck & ECA attn
    r6  rS  N)r6  r}  r~  rC   rC   rD   r6  &  r  c                 K   r|  )z"Normalization-Free RegNet-B0.
    r7  rS  N)r7  r}  r~  rC   rC   rD   r7  /     c                 K   r|  )z"Normalization-Free RegNet-B1.
    r8  rS  N)r8  r}  r~  rC   rC   rD   r8  6  r  c                 K   r|  )z"Normalization-Free RegNet-B2.
    r9  rS  N)r9  r}  r~  rC   rC   rD   r9  =  r  c                 K   r|  )z"Normalization-Free RegNet-B3.
    r=  rS  N)r=  r}  r~  rC   rC   rD   r=  D  r  c                 K   r|  )z"Normalization-Free RegNet-B4.
    r?  rS  N)r?  r}  r~  rC   rC   rD   r?  K  r  c                 K   r|  )z"Normalization-Free RegNet-B5.
    rC  rS  N)rC  r}  r~  rC   rC   rD   rC  R  r  c                 K   r|  )z"Normalization-Free ResNet-26.
    rF  rS  N)rF  r}  r~  rC   rC   rD   rF  Y  r  c                 K   r|  )z"Normalization-Free ResNet-50.
    rG  rS  N)rG  r}  r~  rC   rC   rD   rG  `  r  c                 K   r|  )z#Normalization-Free ResNet-101.
    rH  rS  N)rH  r}  r~  rC   rC   rD   rH  g  r  c                 K   r|  )zNormalization-Free SE-ResNet26.rJ  rS  N)rJ  r}  r~  rC   rC   rD   rJ  n  r  c                 K   r|  )zNormalization-Free SE-ResNet50.rK  rS  N)rK  r}  r~  rC   rC   rD   rK  t  r  c                 K   r|  )z Normalization-Free SE-ResNet101.rL  rS  N)rL  r}  r~  rC   rC   rD   rL  z  r  c                 K   r|  )z Normalization-Free ECA-ResNet26.rM  rS  N)rM  r}  r~  rC   rC   rD   rM    r  c                 K   r|  )z Normalization-Free ECA-ResNet50.rN  rS  N)rN  r}  r~  rC   rC   rD   rN    r  c                 K   r|  )z!Normalization-Free ECA-ResNet101.rO  rS  N)rO  r}  r~  rC   rC   rD   rO    r  c                 K   r|  )z%Test NFNet model for experimentation.rP  rS  N)rP  r}  r~  rC   rC   rD   rP    r  )r*   )r   NNT)r   NrF   NN)r  )r  r  r,   r(   r   r  N)r  r   TrC   r[   )r   )cr=   collectionsr   dataclassesr   r   	functoolsr   typingr   r   r   r	   r
   rV   torch.nnrg   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   _builderr   _features_fxr   _manipulater   	_registryr   r   __all__r   r   rE   rA   r@   r\   r]   rn   r>   rB   r   r   r   r   r   r  r  r  r  rW  rX  rg  default_cfgsr  r  r  r  r  r!  r#  r'  r(  r)  r*  r+  r,  r-  r.  r0  r2  r4  r5  r6  r7  r8  r9  r=  r?  rC  rF  rG  rH  rJ  rK  rL  rM  rN  rO  rP  rC   rC   rC   rD   <module>   sv   ,+ 
= b


*$

	
-



$














!
'
()*+,
/
0
1345789;
A"

	




 "$&(*,/37;?C
EI
K
M
O
RS
WYZ[]^
_
h"