o
    پi                     @   s  d Z ddlZddlmZmZ ddlmZ ddlmZm	Z	m
Z
mZmZmZmZ ddlZddlmZ ddlmZmZ ddlmZmZmZmZmZmZ ddlmZmZmZmZ d	d
l m!Z! d	dl"m#Z# d	dl$m%Z%m&Z& d	dl'm(Z(m)Z)m*Z* ddgZ+eG dd dZ,de-de.de.fddZ/	ddee. dee- dee. de-deee. ee. f f
ddZ0	dde-de.d e-d!e.d"e.d#e.deee. e.ee. f fd$d%Z1								&dd'e.d(e.d)e.d*e.d+e.d,ee	 d-e2dej3fd.d/Z4								&dd'e.d(e.d)e.d*e.d+e.d,ee	 d-e2dej5fd0d1Z6	2		&dd3ee7 d'e.d(e.d)e.d*e.d+ee.e.f d,ee	 d-e2deej3 fd4d5Z8G d6d7 d7ej3Z9G d8d9 d9ej3Z:G d:d; d;ej3Z;G d<d dej3Z<dd>ej3d?e7d@e2ddfdAdBZ=dCe
e7ef de
e7ef fdDdEZ>e?di dFe,dGdHdIddJdKdLe,dGdMdNdOdPdKdQe,dGdMdNdOdPdRdSdTe,dUdVdWdGdOdKdXe,dYdZd[dOdOdKd\e,d]d^d_dGd`dKdae,dbdcd_dUdddKdee,dfdgdhdidjdKdke,dldmdndYdodKdpe,d]dqdrdsdjdKdte,dudvdwdxdydKdze,d{d|d}d~dPdKde,ddddudjdKde,dGdHdIddJddde,dUddddOddde,dUdddOdddde,dYdddOdddde,dYdddOdddRdde,dUdddGdddde,d]dddGdddde,dfddWddPddde,dxdddddddde,ddddYdoddde,ddddYdoddRdde,dudvdwdxdyddde,ddddxd`ddde,dddddddde,dddddddde,dddddddde,dddddddde,dfddWddPddeedOddde,dPdfddWdddddčde,dddxddddddddǍ	de,ddOdddddddddd΍de,ddUdddddddddd΍de,ddUdddddddddd΍Z@de7de2de<fdd؄ZAdde7de
e7ef fddۄZBdde7de
e7ef fdd݄ZCdde7de
e7ef fdd߄ZDe(i deBddddeBddddeBddddeBddddeBdddeBdddeBdddeBddddeBddddeBd=ddeBdddddeBdddddeBd=ddeBdddddd ddeBdddddd ddeBddddeDdddi deDdd	dd
eDddddeDddddeDddddeDddddeDddddeDddddeDddddeDddddeDddddeDddddeDdddd eDdd!dd"eDdd#d$d%d&dd'd(eDdd)d$d%d&dd'd*eDdd+d$d%d&dd'd,eDdd-d$d.i d/eDdd0d$d.d1eDdd2d$d.d3eDdd4d5d6d%d&dd7d8eDdd4d5d9d%d&dd7d:eDdd4d5d;d%d&dd7d<eDdd4d5d=d%d&dd7d>eDdd?dd4d5d@dAeDddBdd4d5d@dCeDddDdd4d5d@dEeCdddFeCdddGeCdddHeCdddIeCdddJeCdddKeCdddLeCddi dMeCdddNeCdddOeCdddPeCdddQeCdddReCdddSeCdddTeCdddUeCdddVeCdddWeCdddXeCdddYeCdddZeCddd[eCddd\eCddZEe)dde2de<fd]dFZFe)dde2de<fd^dLZGe)dde2de<fd_dQZHe)dde2de<fd`dTZIe)dde2de<fdadXZJe)dde2de<fdbd\ZKe)dde2de<fdcdaZLe)dde2de<fdddeZMe)dde2de<fdedkZNe)dde2de<fdfdpZOe)dde2de<fdgdtZPe)dde2de<fdhdzZQe)dde2de<fdidZRe)dde2de<fdjdZSe)dde2de<fdkdZTe)dde2de<fdldZUe)dde2de<fdmdZVe)dde2de<fdndZWe)dde2de<fdodZXe)dde2de<fdpdZYe)dde2de<fdqdZZe)dde2de<fdrdZ[e)dde2de<fdsdZ\e)dde2de<fdtdZ]e)dde2de<fdudZ^e)dde2de<fdvdZ_e)dde2de<fdwdZ`e)dde2de<fdxdZae)dde2de<fdydZbe)dde2de<fdzdZce)dde2de<fd{dZde)dde2de<fd|dZee)dde2de<fd}dńZfe)dde2de<fd~dȄZge)dde2de<fddτZhe)dde2de<fddӄZie*ejddi dS (  a  RegNet X, Y, Z, and more

Paper: `Designing Network Design Spaces` - https://arxiv.org/abs/2003.13678
Original Impl: https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py

Paper: `Fast and Accurate Model Scaling` - https://arxiv.org/abs/2103.06877
Original Impl: None

Based on original PyTorch impl linked above, but re-wrote to use my own blocks (adapted from ResNet here)
and cleaned up with more descriptive variable names.

Weights from original pycls impl have been modified:
* first layer from BGR -> RGB as most PyTorch models are
* removed training specific dict entries from checkpoints and keep model state_dict only
* remap names to match the ones here

Supports weight loading from torchvision and classy-vision (incl VISSL SEER)

A number of custom timm model definitions additions including:
* stochastic depth, gradient checkpointing, layer-decay, configurable dilation
* a pre-activation 'V' variant
* only known RegNet-Z model definitions with pretrained weights

Hacked together by / Copyright 2020 Ross Wightman
    N)	dataclassreplace)partial)AnyCallableDictListOptionalUnionTupleIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)ClassifierHeadAvgPool2dSameConvNormActSEModuleDropPathGroupNormAct)get_act_layerget_norm_act_layercreate_conv2dmake_divisible   )build_model_with_cfg)feature_take_indices)checkpoint_seqnamed_apply)generate_default_cfgsregister_modelregister_model_deprecationsRegNet	RegNetCfgc                   @   s   e Zd ZU dZdZeed< dZeed< dZe	ed< dZ
e	ed	< d
Zeed< dZe	ed< dZe	ed< dZe	ed< dZeed< dZee ed< dZeed< dZeed< dZeed< dZeeef ed< dZeeef ed< dS )r"   z"RegNet architecture configuration.   depthP   w0q=
ףPE@waHzG@wm   
group_size      ?bottle_ratio        se_ratiogroup_min_ratio    
stem_widthconv1x1
downsampleF
linear_outpreactr   num_featuresrelu	act_layer	batchnorm
norm_layerN)__name__
__module____qualname____doc__r$   int__annotations__r&   r(   floatr*   r,   r.   r0   r1   r3   r5   r	   strr6   boolr7   r8   r:   r
   r   r<    rF   rF   F/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/regnet.pyr"   -   s"   
 fqreturnc                 C   s   t t| | | S )zConverts a float to the closest non-zero int divisible by q.

    Args:
        f: Input float value.
        q: Quantization divisor.

    Returns:
        Quantized integer value.
    )rA   round)rH   rI   rF   rF   rG   quantize_floatA   s   
rL   r/   widthsbottle_ratiosgroups	min_ratioc                    sv   dd t | |D }dd t ||D } r# fddt ||D }n
dd t ||D }dd t ||D } | |fS )a,  Adjusts the compatibility of widths and groups.

    Args:
        widths: List of channel widths.
        bottle_ratios: List of bottleneck ratios.
        groups: List of group sizes.
        min_ratio: Minimum ratio for divisibility.

    Returns:
        Tuple of adjusted widths and groups.
    c                 S   s   g | ]
\}}t || qS rF   rA   ).0wbrF   rF   rG   
<listcomp>_       z-adjust_widths_groups_comp.<locals>.<listcomp>c                 S      g | ]	\}}t ||qS rF   )min)rR   gw_botrF   rF   rG   rU   `       c                    s   g | ]
\}}t || qS rF   )r   rR   rZ   rY   rP   rF   rG   rU   c   rV   c                 S   rW   rF   )rL   r\   rF   rF   rG   rU   e   r[   c                 S   s   g | ]
\}}t || qS rF   rQ   )rR   rZ   rT   rF   rF   rG   rU   f   rV   )zip)rM   rN   rO   rP   bottleneck_widthsrF   r]   rG   adjust_widths_groups_compN   s   r`      width_slopewidth_initial
width_multr$   r,   quantc                    s   | dkr|dkr|dkr|| dksJ t j|t jd|  | }t t || t| }t |t || | | }tt |t	|
  d }	}
t j fddt|	D t jd}|	  |	| fS )au  Generates per block widths from RegNet parameters.

    Args:
        width_slope: Slope parameter for width progression.
        width_initial: Initial width.
        width_mult: Width multiplier.
        depth: Network depth.
        group_size: Group convolution size.
        quant: Quantization factor.

    Returns:
        Tuple of (widths, num_stages, groups).
    r   r   )dtypec                    s   g | ]} qS rF   rF   rR   _r,   rF   rG   rU      s    z#generate_regnet.<locals>.<listcomp>)torcharangefloat32rK   logmathpowlenuniquerA   maxitemtensorrangeint32tolist)rb   rc   rd   r$   r,   re   widths_cont
width_expsrM   
num_stages	max_stagerO   rF   ri   rG   generate_regnetj   s   ($"r|   Fin_chsout_chskernel_sizestridedilationr<   r7   c              	   C   s^   |pt j}|dkr|dkrdn|}|dkr|nd}|r$t| ||||dS t| |||||ddS )am  Create convolutional downsampling module.

    Args:
        in_chs: Input channels.
        out_chs: Output channels.
        kernel_size: Convolution kernel size.
        stride: Convolution stride.
        dilation: Convolution dilation.
        norm_layer: Normalization layer.
        preact: Use pre-activation.

    Returns:
        Downsampling module.
    r   )r   r   F)r   r   r<   	apply_act)nnBatchNorm2dr   r   )r}   r~   r   r   r   r<   r7   rF   rF   rG   downsample_conv   s(   
r   c                 C   s   |pt j}|dkr|nd}t  }|dks|dkr.|dkr#|dkr#tnt j}	|	d|ddd}|r9t| |ddd}
n
t| |dd|dd}
t j||
g S )a  Create average pool downsampling module.

    AvgPool Downsampling as in 'D' ResNet variants. This is not in RegNet space but I might experiment.

    Args:
        in_chs: Input channels.
        out_chs: Output channels.
        kernel_size: Convolution kernel size.
        stride: Convolution stride.
        dilation: Convolution dilation.
        norm_layer: Normalization layer.
        preact: Use pre-activation.

    Returns:
        Sequential downsampling module.
    r      TF)	ceil_modecount_include_padr   )r   r<   r   )r   r   Identityr   	AvgPool2dr   r   
Sequential)r}   r~   r   r   r   r<   r7   
avg_stridepoolavg_pool_fnconvrF   rF   rG   downsample_avg   s   
r   r   r   downsample_typec           	      C   s   | dv sJ ||ks|dks|d |d kr<t ||d ||d}| s$dS | dkr1t||fi |S t||fd|i|S t S )a  Create shortcut connection for residual blocks.

    Args:
        downsample_type: Type of downsampling ('avg', 'conv1x1', or None).
        in_chs: Input channels.
        out_chs: Output channels.
        kernel_size: Kernel size for conv downsampling.
        stride: Stride for downsampling.
        dilation: Dilation rates.
        norm_layer: Normalization layer.
        preact: Use pre-activation.

    Returns:
        Shortcut module or None.
    )avgr4    Nr   r   )r   r   r<   r7   Nr   r   )dictr   r   r   r   )	r   r}   r~   r   r   r   r<   r7   dargsrF   rF   rG   create_shortcut   s    r   c                          e Zd ZdZdddddddejejddfd	ed
ededeeef de	dede	de
dededede	f fddZdddZdejdejfddZ  ZS )
BottleneckzRegNet Bottleneck block.

    This is almost exactly the same as a ResNet Bottleneck. The main difference is the SE block is moved from
    after conv3 to after conv2. Otherwise, it's just redefining the arguments for groups/bottleneck channels.
    r   r         ?r4   FNr/   r}   r~   r   r   r.   r,   r0   r5   r6   r:   r<   drop_path_ratec              	      s  t t|   t|
}
tt|| }|| }t|
|d}t||fddi|| _t||fd||d ||d|| _	|rNtt|| }t
|||
d| _nt | _t||fddd	|| _|	rft n|
 | _t|||d|||d
| _|dkrt|| _dS t | _dS )a  Initialize RegNet Bottleneck block.

        Args:
            in_chs: Input channels.
            out_chs: Output channels.
            stride: Convolution stride.
            dilation: Dilation rates for conv2 and shortcut.
            bottle_ratio: Bottleneck ratio (reduction factor).
            group_size: Group convolution size.
            se_ratio: Squeeze-and-excitation ratio.
            downsample: Shortcut downsampling type.
            linear_out: Use linear activation for output.
            act_layer: Activation layer.
            norm_layer: Normalization layer.
            drop_block: Drop block layer.
            drop_path_rate: Stochastic depth drop rate.
        r:   r<   r   r      r   )r   r   r   rO   
drop_layerrd_channelsr:   F)r   r   )r   r   r   r<   N)superr   __init__r   rA   rK   r   r   conv1conv2r   ser   r   conv3act3r   r5   r   	drop_path)selfr}   r~   r   r   r.   r,   r0   r5   r6   r:   r<   
drop_blockr   bottleneck_chsrO   cargsse_channels	__class__rF   rG   r     sD   !

$	zBottleneck.__init__rJ   c                 C   s   t j| jjj dS )z1Zero-initialize the last batch norm in the block.N)r   initzeros_r   bnweightr   rF   rF   rG   zero_init_lastP  s   zBottleneck.zero_init_lastxc                 C   sX   |}|  |}| |}| |}| |}| jdur%| || | }| |}|S zoForward pass.

        Args:
            x: Input tensor.

        Returns:
            Output tensor.
        N)r   r   r   r   r5   r   r   r   r   shortcutrF   rF   rG   forwardT  s   	





zBottleneck.forwardrJ   Nr=   r>   r?   r@   r   ReLUr   rA   r   rC   rD   rE   r   r   r   rj   Tensorr   __classcell__rF   rF   r   rG   r     sP    

	

Dr   c                       r   )PreBottleneckznPre-activation RegNet Bottleneck block.

    Similar to Bottleneck but with pre-activation normalization.
    r   r   r   r4   FNr/   r}   r~   r   r   r.   r,   r0   r5   r6   r:   r<   r   c              	      s   t t|   t||
}tt|| }|| }||| _t||dd| _||| _	t||d||d |d| _
|rJtt|| }t|||
d| _nt | _||| _t||dd| _t|||d||dd| _|dkrst|| _d	S t | _d	S )
a  Initialize pre-activation RegNet Bottleneck block.

        Args:
            in_chs: Input channels.
            out_chs: Output channels.
            stride: Convolution stride.
            dilation: Dilation rates for conv2 and shortcut.
            bottle_ratio: Bottleneck ratio (reduction factor).
            group_size: Group convolution size.
            se_ratio: Squeeze-and-excitation ratio.
            downsample: Shortcut downsampling type.
            linear_out: Use linear activation for output.
            act_layer: Activation layer.
            norm_layer: Normalization layer.
            drop_block: Drop block layer.
            drop_path_rate: Stochastic depth drop rate.
        r   )r   r   r   )r   r   r   rO   r   T)r   r   r   r7   N)r   r   r   r   rA   rK   norm1r   r   norm2r   r   r   r   r   norm3r   r   r5   r   r   )r   r}   r~   r   r   r.   r,   r0   r5   r6   r:   r<   r   r   norm_act_layerr   rO   r   r   rF   rG   r   p  s>   !




$	zPreBottleneck.__init__rJ   c                 C   s   dS )z?Zero-initialize the last batch norm (no-op for pre-activation).NrF   r   rF   rF   rG   r     s   zPreBottleneck.zero_init_lastr   c                 C   sl   |  |}|}| |}| |}| |}| |}| |}| |}| jdur4| || | }|S r   )	r   r   r   r   r   r   r   r5   r   r   rF   rF   rG   r     s   
	






zPreBottleneck.forwardr   r   rF   rF   r   rG   r   j  sP    	
	

Cr   c                       sb   e Zd ZdZdefdedededededeee  d	e	f fd
dZ
dejdejfddZ  ZS )RegStagezRegNet stage (sequence of blocks with the same output shape).

    A stage consists of multiple bottleneck blocks with the same output dimensions.
    Nr$   r}   r~   r   r   drop_path_ratesblock_fnc              
      s   t t|   d| _|dv rdnd}	t|D ]:}
|
dkr|nd}|
dkr&|n|}|	|f}|dur4||
 nd}d|
d }| ||||f|||d	| |}	qdS )
a  Initialize RegNet stage.

        Args:
            depth: Number of blocks in stage.
            in_chs: Input channels.
            out_chs: Output channels.
            stride: Stride for first block.
            dilation: Dilation rate.
            drop_path_rates: Drop path rates for each block.
            block_fn: Block class to use.
            **block_kwargs: Additional block arguments.
        F)r   r   r   r   r   Nr/   zb{})r   r   r   )r   r   r   grad_checkpointingru   format
add_module)r   r$   r}   r~   r   r   r   r   block_kwargsfirst_dilationiblock_strideblock_in_chsblock_dilationdprnamer   rF   rG   r     s0   zRegStage.__init__r   rJ   c                 C   s<   | j rtj st|  |}|S |  D ]}||}q|S )zForward pass through all blocks in the stage.

        Args:
            x: Input tensor.

        Returns:
            Output tensor.
        )r   rj   jitis_scriptingr   children)r   r   blockrF   rF   rG   r     s   	
zRegStage.forward)r=   r>   r?   r@   r   rA   r	   r   rC   r   r   rj   r   r   r   rF   rF   r   rG   r     s(    
.r   c                       s  e Zd ZdZ							d8ded	ed
edededededef fddZ				d9dedededede
eeeef  eeef f f
ddZejjd:dedeeef fddZejjd;deddfddZejjdejfdd Zd<d
edee ddfd!d"Z				#	d=d$ejd%eeeee f  d&ed'ed(ed)edeeej e
ejeej f f fd*d+Z	,		d>d%eeee f d-ed.edee fd/d0Zd$ejdejfd1d2Zd:d$ejd3edejfd4d5Zd$ejdejfd6d7Z  Z S )?r!   zRegNet-X, Y, and Z Models.

    Paper: https://arxiv.org/abs/2003.13678
    Original Impl: https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py
    r     r2   r   r/   Tcfgin_chansnum_classesoutput_strideglobal_pool	drop_rater   r   c	              	      s  t    || _|| _|dv sJ t|fi |	}|j}
t|j|jd}|j	r1t
||
ddd| _nt||
dfddi|| _t|
dddg| _|
}d}| j|||d	\}}t|d
ks]J |j	rbtnt}t|D ]2\}}d|d }| |td||d|| |d }||d 9 }|  jt|||dg7  _qh|jrt||jfddi|| _|j| _n|jp|j	}|rt|j nt | _|| _| j| _t| j|||d| _tt t!|d|  dS )a  Initialize RegNet model.

        Args:
            cfg: Model architecture configuration.
            in_chans: Number of input channels.
            num_classes: Number of classifier classes.
            output_stride: Output stride of network, one of (8, 16, 32).
            global_pool: Global pooling type.
            drop_rate: Dropout rate.
            drop_path_rate: Stochastic depth drop-path rate.
            zero_init_last: Zero-init last weight of residual path.
            kwargs: Extra kwargs overlayed onto cfg.
        )ra      r2   r   r   r   r   r   stem)num_chs	reductionmodule)r   r      zs{}r   )r}   r   r~   r   )in_featuresr   	pool_typer   )r   NrF   )"r   r   r   r   r   r3   r   r:   r<   r7   r   r   r   feature_info_get_stage_argsrp   r   r   	enumerater   r   r   r8   
final_convr6   r   r   r   head_hidden_sizer   headr   r   _init_weights)r   r   r   r   r   r   r   r   r   kwargsr3   na_args
prev_widthcurr_strideper_stage_argscommon_argsr   r   
stage_args
stage_name	final_actr   rF   rG   r     sd   

	
zRegNet.__init__r   default_striderJ   c              	      sd  t jjjjj\}}}tjt|dd\}}	|	 |		 }}	fddt
|D }
g }g }d}d}t
|D ]}||krI||9 }d}n|}||9 }|| || q<td|t|	}tjt|	dd	 dd
}t||	 }dd |D }t||
|jd\}}g d  fddt||||	|
||D }tjjjjjd}||fS )aM  Generate stage arguments from configuration.

        Args:`
            cfg: RegNet configuration.
            default_stride: Default stride for stages.
            output_stride: Target output stride.
            drop_path_rate: Stochastic depth rate.

        Returns:
            Tuple of (per_stage_args, common_args).
        T)return_countsc                    s   g | ]} j qS rF   )r.   rg   )r   rF   rG   rU     s    z*RegNet._get_stage_args.<locals>.<listcomp>r   r   r   N)dimc                 S   s   g | ]}|  qS rF   )rw   )rR   r   rF   rF   rG   rU     s    r]   )r~   r   r   r$   r.   r,   r   c                    s   g | ]	}t t |qS rF   )r   r^   )rR   params)	arg_namesrF   rG   rU     s    )r5   r0   r6   r:   r<   )r|   r(   r&   r*   r$   r,   rj   rq   rt   rw   ru   appendlinspacesumcumsumtensor_splitr`   r1   r^   r   r5   r0   r6   r:   r<   )r   r   r   r   r   rM   rz   stage_gsstage_widthsstage_depthsstage_brstage_stridesstage_dilations
net_strider   rh   r   
dpr_tensorsplit_indices	stage_dprr   r   rF   )r   r   rG   r   n  sF    



zRegNet._get_stage_argsFcoarsec                 C   s   t d|rddS ddS )z"Group parameters for optimization.z^stemz^s(\d+)z^s(\d+)\.b(\d+))r   blocks)r   )r   r  rF   rF   rG   group_matcher  s   zRegNet.group_matcherenableNc                 C   s$   t |  dd D ]}||_q
dS )z)Enable or disable gradient checkpointing.r   r   N)listr   r   )r   r  srF   rF   rG   set_grad_checkpointing  s   zRegNet.set_grad_checkpointingc                 C   s   | j jS )zGet the classifier head.)r   fcr   rF   rF   rG   get_classifier  s   zRegNet.get_classifierc                 C   s   || _ | jj||d dS )zReset the classifier head.

        Args:
            num_classes: Number of classes for new classifier.
            global_pool: Global pooling type.
        )r   N)r   r   reset)r   r   r   rF   rF   rG   reset_classifier  s   zRegNet.reset_classifierNCHWr   indicesnorm
stop_early
output_fmtintermediates_onlyc                 C   s   |dv sJ dg }t d|\}}	d}
| |}|
|v r!|| d}|r+|d|	 }|D ]}|
d7 }
t| ||}|
|v rC|| q-|rH|S |
dkrQ| |}||fS )	a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r  zOutput shape must be NCHW.   r   s1s2s3s4Nr   r   )r   r   r   getattrr   )r   r   r  r  r  r  r  intermediatestake_indices	max_indexfeat_idxlayer_namesnrF   rF   rG   forward_intermediates  s*   



zRegNet.forward_intermediatesr   
prune_norm
prune_headc                 C   s^   t d|\}}d}||d }|D ]
}t| |t  q|dk r%t | _|r-| dd |S )aE  Prune layers not required for specified intermediates.

        Args:
            indices: Indices of intermediate layers to keep.
            prune_norm: Whether to prune normalization layer.
            prune_head: Whether to prune the classifier head.

        Returns:
            List of indices that were kept.
        r  r  Nr   r   r   )r   setattrr   r   r   r  )r   r  r+  r,  r%  r&  r(  r)  rF   rF   rG   prune_intermediate_layers  s   
z RegNet.prune_intermediate_layersc                 C   s@   |  |}| |}| |}| |}| |}| |}|S )zForward pass through feature extraction layers.

        Args:
            x: Input tensor.

        Returns:
            Feature tensor.
        )r   r  r   r!  r"  r   r   r   rF   rF   rG   forward_features  s   
	




zRegNet.forward_features
pre_logitsc                 C   s   |r	| j ||dS |  |S )zForward pass through classifier head.

        Args:
            x: Input features.
            pre_logits: Return features before final linear layer.

        Returns:
            Classification logits or features.
        )r1  )r   )r   r   r1  rF   rF   rG   forward_head#  s   
zRegNet.forward_headc                 C   s   |  |}| |}|S )zoForward pass.

        Args:
            x: Input tensor.

        Returns:
            Output logits.
        )r0  r2  r/  rF   rF   rG   r   /  s   
	
zRegNet.forward)r   r   r2   r   r/   r/   T)r   r2   r/   F)T)N)NFFr  F)r   FT)!r=   r>   r?   r@   r"   rA   rD   rC   rE   r   r   r   r   r   r   rj   r   ignorer  r  r   Moduler  r	   r  r   r
   r*  r.  r0  r2  r   r   rF   rF   r   rG   r!     s    		V
< 
2
r   r   r   r   c                 C   s   t | tjr4| jd | jd  | j }|| j }| jjdt	
d|  | jdur2| jj  dS dS t | tjrTtjj| jddd | jdurRtj| j dS dS |rat| drc|   dS dS dS )	zInitialize module weights.

    Args:
        module: PyTorch module to initialize.
        name: Module name.
        zero_init_last: Zero-initialize last layer weights.
    r   r          @Nr/   g{Gz?)meanstdr   )
isinstancer   Conv2dr   out_channelsrO   r   datanormal_rn   sqrtbiaszero_Linearr   r   hasattrr   )r   r   r   fan_outrF   rF   rG   r   =  s   


r   
state_dictc                 C   sn  |  d| } g d}d| v rtddl}| d d d } i }| d  D ]1\}}|dd	}|d
d}|ddd |}|dd|}|D ]
\}}|||}qD|||< q"| d  D ]\}}d|v sfd|v rgqZ|dd}|||< qZ|S d| v rddl}i }|  D ]0\}}|dd	}|dd}|ddd |}|D ]
\}}|||}q|dd}|||< q|S | S )zFilter and remap state dict keys for compatibility.

    Args:
        state_dict: Raw state dictionary.

    Returns:
        Filtered state dictionary.
    model))zf.a.0z
conv1.conv)zf.a.1zconv1.bn)zf.b.0z
conv2.conv)zf.b.1zconv2.bn)z
f.final_bnconv3.bn)zf.se.excitation.0zse.fc1)zf.se.excitation.2zse.fc2)zf.ser   )zf.c.0
conv3.conv)zf.c.1rF  )zf.crG  )zproj.0downsample.conv)zproj.1zdownsample.bn)projrH  classy_state_dictr   N
base_modeltrunkz_feature_blocks.conv1.stem.0	stem.convz_feature_blocks.conv1.stem.1zstem.bnz&^_feature_blocks.res\d.block(\d)-(\d+)c                 S   (   dt | d dt | dd  S )Nr  r   .br   rA   groupr   rF   rF   rG   <lambda>w     ( z_filter_fn.<locals>.<lambda>zs(\d)\.b(\d+)\.bnzs\1.b\2.downsample.bnheadsprojection_head
prototypesz0.clf.0head.fczstem.0.weightzstem.0zstem.1z)trunk_output.block(\d)\.block(\d+)\-(\d+)c                 S   rN  )Nr  r   rO  r   rP  rR  rF   rF   rG   rS    rT  zfc.zhead.fc.)getreitemsr   sub)rD  replacesrZ  outkvr  rrF   rF   rG   
_filter_fnS  sN   	


rb  regnetx_002r+   gQ8B@gQ@   )r&   r(   r*   r,   r$   regnetx_004g{Gz8@gRQ@r      regnetx_004_tvg?)r&   r(   r*   r,   r$   r1   regnetx_0060   g\(|B@gQ@regnetx_0088   g=
ףpA@g=
ףp=@regnetx_016r%   gzGA@g      @   regnetx_032X   g(\O:@   regnetx_040`   g33333SC@gq=
ףp@(      regnetx_064   g
ףp=jN@g(\ @   regnetx_080gHzH@g
ףp=
@x   regnetx_120   gףp=
WR@g(\@p      regnetx_160   gQK@g @   regnetx_320@  gףp=
wQ@r6  regnety_002r   )r&   r(   r*   r,   r$   r0   regnety_004gp=
;@gQ @regnety_006gQE@@g(\@   regnety_008gQkC@g333333@   regnety_008_tv)r&   r(   r*   r,   r$   r0   r1   regnety_016g(\µ4@g333333@   regnety_032r'   r)   r#   regnety_040g)\h?@@   regnety_064g\(@@g)\(@H   regnety_080   gGz4S@gQ@regnety_080_tvregnety_120regnety_160   gQZ@gףp=
@regnety_320   g)\\@g=
ףp=@   regnety_640i`  g(\ob@iH  regnety_1280i  g(\d@g)\(@i  regnety_2560i  g(\l@iu  regnety_040_sgnsiluri   )r&   r(   r*   r,   r$   r0   r:   r<   regnetv_040T)r$   r&   r(   r*   r,   r0   r7   r:   regnetv_064r   )	r$   r&   r(   r*   r,   r0   r7   r:   r5   regnetz_005gffffff%@gGz@r   g      @i   )r$   r&   r(   r*   r,   r.   r0   r5   r6   r8   r:   regnetz_040   g      -@g+@regnetz_040_hi   variant
pretrainedc                 K   s   t t| |ft|  td|S )zCreate a RegNet model.

    Args:
        variant: Model variant name.
        pretrained: Load pretrained weights.
        **kwargs: Additional model arguments.

    Returns:
        RegNet model instance.
    )	model_cfgpretrained_filter_fn)r   r!   
model_cfgsrb  )r  r  r   rF   rF   rG   _create_regnet  s   r  urlc                 K   s"   | dddddddt tdd	d
|S )zCreate default configuration dictionary.

    Args:
        url: Model weight URL.
        **kwargs: Additional configuration options.

    Returns:
        Configuration dictionary.
    r   r      r     r  )r      r  gffffff?r-   bicubicrM  rX  )r  r   
input_size	pool_sizetest_input_sizecrop_pcttest_crop_pctinterpolationr7  r8  
first_conv
classifierr   r  r   rF   rF   rG   _cfg  s   r  c                 K   "   | dddddt tdddd	d
|S )zCreate pycls configuration dictionary.

    Args:
        url: Model weight URL.
        **kwargs: Additional configuration options.

    Returns:
        Configuration dictionary.
    r   r  r  g      ?r  rM  rX  mitz)https://github.com/facebookresearch/pyclsr  r   r  r  r  r  r7  r8  r  r  license
origin_urlr   r  rF   rF   rG   _cfgpyc     r  c                 K   r  )zCreate torchvision v2 configuration dictionary.

    Args:
        url: Model weight URL.
        **kwargs: Additional configuration options.

    Returns:
        Configuration dictionary.
    r   r  r  gzG?r  rM  rX  zbsd-3-clausez!https://github.com/pytorch/visionr  r   r  rF   rF   rG   _cfgtv2  r  r  zregnety_032.ra_in1kztimm/znhttps://github.com/huggingface/pytorch-image-models/releases/download/v0.1-weights/regnety_032_ra-7f2439f9.pth)	hf_hub_idr  zregnety_040.ra3_in1kzshttps://github.com/huggingface/pytorch-image-models/releases/download/v0.1-tpu-weights/regnety_040_ra3-670e1166.pthzregnety_064.ra3_in1kzshttps://github.com/huggingface/pytorch-image-models/releases/download/v0.1-tpu-weights/regnety_064_ra3-aa26dc7d.pthzregnety_080.ra3_in1kzshttps://github.com/huggingface/pytorch-image-models/releases/download/v0.1-tpu-weights/regnety_080_ra3-1fdc4344.pthzregnety_120.sw_in12k_ft_in1k)r  zregnety_160.sw_in12k_ft_in1kzregnety_160.lion_in12k_ft_in1kzregnety_120.sw_in12ki-.  )r  r   zregnety_160.sw_in12kzregnety_040_sgn.untrained)r  zregnetv_040.ra3_in1kzshttps://github.com/huggingface/pytorch-image-models/releases/download/v0.1-tpu-weights/regnetv_040_ra3-c248f51f.pthr   )r  r  r  zregnetv_064.ra3_in1kzshttps://github.com/huggingface/pytorch-image-models/releases/download/v0.1-tpu-weights/regnetv_064_ra3-530616c2.pthzregnetz_005.untrainedzregnetz_040.ra3_in1kzshttps://github.com/huggingface/pytorch-image-models/releases/download/v0.1-tpu-weights/regnetz_040_ra3-9007edf5.pth)r      r  )ra   ra   r-   )r   r  r  )r  r  r  r  r  r  zregnetz_040_h.ra3_in1kzthttps://github.com/huggingface/pytorch-image-models/releases/download/v0.1-tpu-weights/regnetz_040h_ra3-f594343b.pthzregnety_160.deit_in1kz<https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pthzregnetx_004_tv.tv2_in1kz?https://download.pytorch.org/models/regnet_x_400mf-62229a5f.pthzregnetx_008.tv2_in1kz?https://download.pytorch.org/models/regnet_x_800mf-94a99ebd.pthzregnetx_016.tv2_in1kz?https://download.pytorch.org/models/regnet_x_1_6gf-a12f2b72.pthzregnetx_032.tv2_in1kz?https://download.pytorch.org/models/regnet_x_3_2gf-7071aa85.pthzregnetx_080.tv2_in1kz=https://download.pytorch.org/models/regnet_x_8gf-2b70d774.pthzregnetx_160.tv2_in1kz>https://download.pytorch.org/models/regnet_x_16gf-ba3796d7.pthzregnetx_320.tv2_in1kz>https://download.pytorch.org/models/regnet_x_32gf-6eb8fdc6.pthzregnety_004.tv2_in1kz?https://download.pytorch.org/models/regnet_y_400mf-e6988f5f.pthzregnety_008_tv.tv2_in1kz?https://download.pytorch.org/models/regnet_y_800mf-58fc7688.pthzregnety_016.tv2_in1kz?https://download.pytorch.org/models/regnet_y_1_6gf-0d7bc02a.pthzregnety_032.tv2_in1kz?https://download.pytorch.org/models/regnet_y_3_2gf-9180c971.pthzregnety_080_tv.tv2_in1kz=https://download.pytorch.org/models/regnet_y_8gf-dc2b1b54.pthzregnety_160.tv2_in1kz>https://download.pytorch.org/models/regnet_y_16gf-3e4a00f9.pthzregnety_320.tv2_in1kz>https://download.pytorch.org/models/regnet_y_32gf-8db6d4b5.pthzregnety_160.swag_ft_in1kzChttps://download.pytorch.org/models/regnet_y_16gf_swag-43afe44d.pthzcc-by-nc-4.0)r     r  )   r  )r  r  r  r  r  r  zregnety_320.swag_ft_in1kzChttps://download.pytorch.org/models/regnet_y_32gf_swag-04fdfa75.pthzregnety_1280.swag_ft_in1kzDhttps://download.pytorch.org/models/regnet_y_128gf_swag-c8ce3e52.pthzregnety_160.swag_lc_in1kzFhttps://download.pytorch.org/models/regnet_y_16gf_lc_swag-f3ec0043.pth)r  r  r  zregnety_320.swag_lc_in1kzFhttps://download.pytorch.org/models/regnet_y_32gf_lc_swag-e1583746.pthzregnety_1280.swag_lc_in1kzGhttps://download.pytorch.org/models/regnet_y_128gf_lc_swag-cbe8ce12.pthzregnety_320.seer_ft_in1kotherz)https://github.com/facebookresearch/visslzhttps://dl.fbaipublicfiles.com/vissl/model_zoo/seer_finetuned/seer_regnet32_finetuned_in1k_model_final_checkpoint_phase78.torch)r  r  r  r  r  r  r  zregnety_640.seer_ft_in1kzhttps://dl.fbaipublicfiles.com/vissl/model_zoo/seer_finetuned/seer_regnet64_finetuned_in1k_model_final_checkpoint_phase78.torchzregnety_1280.seer_ft_in1kzhttps://dl.fbaipublicfiles.com/vissl/model_zoo/seer_finetuned/seer_regnet128_finetuned_in1k_model_final_checkpoint_phase78.torchzregnety_2560.seer_ft_in1kzhttps://dl.fbaipublicfiles.com/vissl/model_zoo/seer_finetuned/seer_regnet256_finetuned_in1k_model_final_checkpoint_phase38.torchzregnety_320.seerzihttps://dl.fbaipublicfiles.com/vissl/model_zoo/seer_regnet32d/seer_regnet32gf_model_iteration244000.torch)r  r  r   r  r  zregnety_640.seerzphttps://dl.fbaipublicfiles.com/vissl/model_zoo/seer_regnet64/seer_regnet64gf_model_final_checkpoint_phase0.torchzregnety_1280.seerzhttps://dl.fbaipublicfiles.com/vissl/model_zoo/swav_ig1b_regnet128Gf_cnstant_bs32_node16_sinkhorn10_proto16k_syncBN64_warmup8k/model_final_checkpoint_phase0.torchzregnetx_002.pycls_in1kzregnetx_004.pycls_in1kzregnetx_006.pycls_in1kzregnetx_008.pycls_in1kzregnetx_016.pycls_in1kzregnetx_032.pycls_in1kzregnetx_040.pycls_in1kzregnetx_064.pycls_in1kzregnetx_080.pycls_in1kzregnetx_120.pycls_in1kzregnetx_160.pycls_in1kzregnetx_320.pycls_in1kzregnety_002.pycls_in1kzregnety_004.pycls_in1kzregnety_006.pycls_in1kzregnety_008.pycls_in1kzregnety_016.pycls_in1kzregnety_032.pycls_in1kzregnety_040.pycls_in1kzregnety_064.pycls_in1kzregnety_080.pycls_in1kzregnety_120.pycls_in1kzregnety_160.pycls_in1kzregnety_320.pycls_in1kc                 K      t d| fi |S )zRegNetX-200MFrc  r  r  r   rF   rF   rG   rc       c                 K   r  )zRegNetX-400MFre  r  r  rF   rF   rG   re    r  c                 K   r  )z+RegNetX-400MF w/ torchvision group roundingrg  r  r  rF   rF   rG   rg    r  c                 K   r  )zRegNetX-600MFrh  r  r  rF   rF   rG   rh    r  c                 K   r  )zRegNetX-800MFrj  r  r  rF   rF   rG   rj    r  c                 K   r  )zRegNetX-1.6GFrl  r  r  rF   rF   rG   rl    r  c                 K   r  )zRegNetX-3.2GFrn  r  r  rF   rF   rG   rn    r  c                 K   r  )zRegNetX-4.0GFrq  r  r  rF   rF   rG   rq    r  c                 K   r  )zRegNetX-6.4GFru  r  r  rF   rF   rG   ru    r  c                 K   r  )zRegNetX-8.0GFrx  r  r  rF   rF   rG   rx    r  c                 K   r  )zRegNetX-12GFrz  r  r  rF   rF   rG   rz    r  c                 K   r  )zRegNetX-16GFr~  r  r  rF   rF   rG   r~    r  c                 K   r  )zRegNetX-32GFr  r  r  rF   rF   rG   r     r  c                 K   r  )zRegNetY-200MFr  r  r  rF   rF   rG   r  &  r  c                 K   r  )zRegNetY-400MFr  r  r  rF   rF   rG   r  ,  r  c                 K   r  )zRegNetY-600MFr  r  r  rF   rF   rG   r  2  r  c                 K   r  )zRegNetY-800MFr  r  r  rF   rF   rG   r  8  r  c                 K   r  )z+RegNetY-800MF w/ torchvision group roundingr  r  r  rF   rF   rG   r  >  r  c                 K   r  )zRegNetY-1.6GFr  r  r  rF   rF   rG   r  D  r  c                 K   r  )zRegNetY-3.2GFr  r  r  rF   rF   rG   r  J  r  c                 K   r  )zRegNetY-4.0GFr  r  r  rF   rF   rG   r  P  r  c                 K   r  )zRegNetY-6.4GFr  r  r  rF   rF   rG   r  V  r  c                 K   r  )zRegNetY-8.0GFr  r  r  rF   rF   rG   r  \  r  c                 K   r  )z+RegNetY-8.0GF w/ torchvision group roundingr  r  r  rF   rF   rG   r  b  r  c                 K   r  )zRegNetY-12GFr  r  r  rF   rF   rG   r  h  r  c                 K   r  )zRegNetY-16GFr  r  r  rF   rF   rG   r  n  r  c                 K   r  )zRegNetY-32GFr  r  r  rF   rF   rG   r  t  r  c                 K   r  )zRegNetY-64GFr  r  r  rF   rF   rG   r  z  r  c                 K   r  )zRegNetY-128GFr  r  r  rF   rF   rG   r    r  c                 K   r  )zRegNetY-256GFr  r  r  rF   rF   rG   r    r  c                 K   r  )zRegNetY-4.0GF w/ GroupNorm r  r  r  rF   rF   rG   r    r  c                 K   r  )zRegNetV-4.0GF (pre-activation)r  r  r  rF   rF   rG   r    r  c                 K   r  )zRegNetV-6.4GF (pre-activation)r  r  r  rF   rF   rG   r    r  c                 K      t d| fddi|S )zRegNetZ-500MF
    NOTE: config found in https://github.com/facebookresearch/ClassyVision/blob/main/classy_vision/models/regnet.py
    but it's not clear it is equivalent to paper model as not detailed in the paper.
    r  r   Fr  r  rF   rF   rG   r       c                 K   r  )RegNetZ-4.0GF
    NOTE: config found in https://github.com/facebookresearch/ClassyVision/blob/main/classy_vision/models/regnet.py
    but it's not clear it is equivalent to paper model as not detailed in the paper.
    r  r   Fr  r  rF   rF   rG   r    r  c                 K   r  )r  r  r   Fr  r  rF   rF   rG   r    r  regnetz_040h)r/   )ra   )r   r   r   NF)r   NF)r   FrF   )r   r3  )kr@   rn   dataclassesr   r   	functoolsr   typingr   r   r   r   r	   r
   r   rj   torch.nnr   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   r   _builderr   	_featuresr   _manipulater   r   	_registryr   r   r    __all__r"   rC   rA   rL   r`   r|   rE   r5  r   r   r   rD   r   r   r   r   r!   r   rb  r   r  r  r  r  r  default_cfgsrc  re  rg  rh  rj  rl  rn  rq  ru  rx  rz  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r=   rF   rF   rF   rG   <module>   s   $ 
"
$
1
,
	
&eeE   +"
B	
 !%*,159@



 
%&
*
0369<?BEILORUX[_
c
g
lorv
{
 
  
  
  
  
           !  "  #  $  %  &  '  (  *  +  ,  -  .  /  0  1  2  3  4  5  9
