o
    پi                     @   s   d Z ddlZddlmZ ddlm  mZ ddlmZm	Z	m
Z
 G dd dejZG dd dejZG d	d
 d
ejZG dd dejZdS )a   Convolution with Weight Standardization (StdConv and ScaledStdConv)

StdConv:
@article{weightstandardization,
  author    = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille},
  title     = {Weight Standardization},
  journal   = {arXiv preprint arXiv:1903.10520},
  year      = {2019},
}
Code: https://github.com/joe-siyuan-qiao/WeightStandardization

ScaledStdConv:
Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets`
    - https://arxiv.org/abs/2101.08692
Official Deepmind JAX code: https://github.com/deepmind/deepmind-research/tree/master/nfnets

Hacked together by / copyright Ross Wightman, 2021.
    N   )get_paddingget_padding_valuepad_samec                       .   e Zd ZdZ		d
 fdd	Zdd	 Z  ZS )	StdConv2dzConv2d with Weight Standardization. Used for BiT ResNet-V2 models.

    Paper: `Micro-Batch Training with Batch-Channel Normalization and Weight Standardization` -
        https://arxiv.org/abs/1903.10520v2
    r   NFư>c
           
   
      s:   |d u r
t |||}t j||||||||d |	| _d S )Nstridepaddingdilationgroupsbias)r   super__init__eps)
self
in_channelout_channelskernel_sizer
   r   r   r   r   r   	__class__ H/home/ubuntu/.local/lib/python3.10/site-packages/timm/layers/std_conv.pyr       s   
zStdConv2d.__init__c              	   C   sR   t j| jd| jdd d dd| jd| j}t ||| j| j	| j
| j| j}|S Nr   T        )trainingmomentumr   )F
batch_normweightreshaper   r   
reshape_asconv2dr   r
   r   r   r   r   xr!   r   r   r   forward*   s    zStdConv2d.forward)r   Nr   r   Fr   __name__
__module____qualname____doc__r   r'   __classcell__r   r   r   r   r          
r   c                       r   )StdConv2dSamezConv2d with Weight Standardization. TF compatible SAME padding. Used for ViT Hybrid model.

    Paper: `Micro-Batch Training with Batch-Channel Normalization and Weight Standardization` -
        https://arxiv.org/abs/1903.10520v2
    r   SAMEFr   c
              
      s@   t ||||d\}}
t j||||||||d |
| _|	| _d S )Nr
   r   r	   )r   r   r   same_padr   )r   r   r   r   r
   r   r   r   r   r   
is_dynamicr   r   r   r   8   s   
zStdConv2dSame.__init__c              	   C   sl   | j rt|| j| j| j}tj| jd| j	dd d dd| j
d| j}t||| j| j| j| j| j}|S r   )r2   r   r   r
   r   r   r    r!   r"   r   r   r#   r$   r   r   r   r%   r   r   r   r'   B   s    zStdConv2dSame.forward)r   r0   r   r   Fr   r(   r   r   r   r   r/   2   r.   r/   c                       .   e Zd ZdZ		d fdd	Zd	d
 Z  ZS )ScaledStdConv2da4  Conv2d layer with Scaled Weight Standardization.

    Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets` -
        https://arxiv.org/abs/2101.08692

    NOTE: the operations used in this impl differ slightly from the DeepMind Haiku impl. The impact is minor.
    r   NT      ?r   c              
      sp   |d u r
t |||}t j||||||||d tt| jdddf|| _|	| j	d 
 d  | _|
| _d S )Nr	   r   r         )r   r   r   nn	Parametertorchfullr   gainr!   numelscaler   )r   in_channelsr   r   r
   r   r   r   r   gammar   	gain_initr   r   r   r   U   s   
zScaledStdConv2d.__init__c              	   C   s^   t j| jd| jdd d | j| j ddd| jd	| j}t 
||| j| j| j| j| jS Nr   r   Tr   )r!   r   r   r   )r   r    r!   r"   r   r<   r>   viewr   r#   r$   r   r
   r   r   r   r%   r   r   r   r'   a   s    zScaledStdConv2d.forward)r   Nr   r   Tr6   r   r6   r(   r   r   r   r   r5   L       	r5   c                       r4   )ScaledStdConv2dSamea\  Conv2d layer with Scaled Weight Standardization and Tensorflow-like SAME padding support

    Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets` -
        https://arxiv.org/abs/2101.08692

    NOTE: the operations used in this impl differ slightly from the DeepMind Haiku impl. The impact is minor.
    r   r0   Tr6   r   c              
      sv   t ||||d\}}t j||||||||d tt| jdddf|| _|	| j	d 
 d  | _|| _|
| _d S )Nr1   r	   r   r   r7   )r   r   r   r8   r9   r:   r;   r   r<   r!   r=   r>   r2   r   )r   r?   r   r   r
   r   r   r   r   r@   r   rA   r3   r   r   r   r   r   s   
zScaledStdConv2dSame.__init__c              	   C   sx   | j rt|| j| j| j}tj| jd| j	dd d | j
| j ddd| jd| j}t||| j| j| j| j| jS rB   )r2   r   r   r
   r   r   r    r!   r"   r   r<   r>   rC   r   r#   r$   r   r   r   r%   r   r   r   r'   ~   s    zScaledStdConv2dSame.forward)r   r0   r   r   Tr6   r   r6   r(   r   r   r   r   rE   i   rD   rE   )r,   r:   torch.nnr8   torch.nn.functional
functionalr   r   r   r   r   Conv2dr   r/   r5   rE   r   r   r   r   <module>   s    