o
    پiz"                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ G dd dej	Z
G d	d
 d
ej	ZeeejddZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZdS )zl MLP module w/ dropout and configurable activation layer

Hacked together by / Copyright 2020 Ross Wightman
    )partial)nn   )GlobalResponseNorm)	to_2tuplec                       s:   e Zd ZdZddejddddf fdd	Zdd	 Z  ZS )
Mlpz MLP as used in Vision Transformer, MLP-Mixer and related networks

    NOTE: When use_conv=True, expects 2D NCHW tensors, otherwise N*C expected.
    NT        Fc	                    s   t    |p|}|p|}t|}t|}	|rttjddntj}
|
|||d d| _| | _t	|	d | _
|d ur?||nt | _|
|||d d| _t	|	d | _d S )Nr   kernel_sizer   bias)super__init__r   r   r   Conv2dLinearfc1actDropoutdrop1Identitynormfc2drop2)selfin_featureshidden_featuresout_features	act_layer
norm_layerr   dropuse_conv
drop_probslinear_layer	__class__ C/home/ubuntu/.local/lib/python3.10/site-packages/timm/layers/mlp.pyr      s   
zMlp.__init__c                 C   @   |  |}| |}| |}| |}| |}| |}|S N)r   r   r   r   r   r   r   xr%   r%   r&   forward+      





zMlp.forward	__name__
__module____qualname____doc__r   GELUr   r+   __classcell__r%   r%   r#   r&   r      s    r   c                       sD   e Zd ZdZddejdddddf fdd	Zdd	 Zd
d Z  Z	S )GluMlpz MLP w/ GLU style gating
    See: https://arxiv.org/abs/1612.08083, https://arxiv.org/abs/2002.05202

    NOTE: When use_conv=True, expects 2D NCHW tensors, otherwise N*C expected.
    NTr   Fc
                    s   t    |p|}|p|}|d dksJ t|}t|}
|r&ttjddntj}|r-dnd| _|	| _||||d d| _	| | _
t|
d | _|d urS||d nt | _||d ||d d| _t|
d | _d S )N   r   r   r	   r   )r   r   r   r   r   r   r   	chunk_dim	gate_lastr   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r    r8   r!   r"   r#   r%   r&   r   ;   s   
zGluMlp.__init__c                 C   s`   | j jd urtj| j j| j jjd d d   tjj| j j| j jjd d d  dd d S )Nr   r5   ư>std)r   r   r   initones_shapenormal_weightr   r%   r%   r&   init_weightsX   s   &.zGluMlp.init_weightsc                 C   sl   |  |}|jd| jd\}}| jr|| | n| || }| |}| |}| |}| |}|S )Nr5   )dim)	r   chunkr7   r8   r   r   r   r   r   )r   r*   x1x2r%   r%   r&   r+   ^   s   
"



zGluMlp.forward)
r.   r/   r0   r1   r   Sigmoidr   rB   r+   r3   r%   r%   r#   r&   r4   5   s    r4   F)r   r8   c                       s@   e Zd ZdZddejdddf fdd	Zdd Zd	d
 Z  Z	S )SwiGLUz SwiGLU
    NOTE: GluMLP above can implement SwiGLU, but this impl has split fc1 and
    better matches some other common impl which makes mapping checkpoints simpler.
    NTr   c           	         s   t    |p|}|p|}t|}t|}tj|||d d| _tj|||d d| _| | _t|d | _	|d ur?||nt
 | _tj|||d d| _t|d | _d S )Nr   r   r   )r   r   r   r   r   fc1_gfc1_xr   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r!   r#   r%   r&   r   q   s   

zSwiGLU.__init__c                 C   s4   | j jd urtj| j j tjj| j jdd d S )Nr9   r:   )rI   r   r   r<   r=   r?   r@   rA   r%   r%   r&   rB      s   zSwiGLU.init_weightsc                 C   sN   |  |}| |}| || }| |}| |}| |}| |}|S r(   )rI   rJ   r   r   r   r   r   )r   r*   x_gater%   r%   r&   r+      s   





zSwiGLU.forward)
r.   r/   r0   r1   r   SiLUr   rB   r+   r3   r%   r%   r#   r&   rH   l   s    rH   c                       s:   e Zd ZdZddejddddf fdd	Zdd Z  ZS )	GatedMlpz MLP as used in gMLP
    NTr   c	           
         s   t    |p|}|p|}t|}t|}	tj|||d d| _| | _t|	d | _|d urB|d dks8J ||| _	|d }nt
 | _	|d urO||nt
 | _tj|||d d| _t|	d | _d S )Nr   r   r5   r   )r   r   r   r   r   r   r   r   r   gater   r   r   r   )
r   r   r   r   r   r   
gate_layerr   r   r!   r#   r%   r&   r      s    



zGatedMlp.__init__c                 C   sJ   |  |}| |}| |}| |}| |}| |}| |}|S r(   )r   r   r   rN   r   r   r   r)   r%   r%   r&   r+      s   






zGatedMlp.forwardr-   r%   r%   r#   r&   rM      s    rM   c                       s8   e Zd ZdZddejdddf fdd	Zdd Z  ZS )	ConvMlpzG MLP using 1x1 convs that keeps spatial dims (for 2D NCHW tensors)
    NTr   c                    s   t    |p|}|p|}t|}tj||d|d d| _|r#||nt | _| | _t	|| _
tj||d|d d| _d S )Nr   r   )r
   r   )r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r#   r%   r&   r      s   

zConvMlp.__init__c                 C   s6   |  |}| |}| |}| |}| |}|S r(   )r   r   r   r   r   r)   r%   r%   r&   r+      s   




zConvMlp.forward)	r.   r/   r0   r1   r   ReLUr   r+   r3   r%   r%   r#   r&   rP      s    rP   c                       s8   e Zd ZdZddejdddf fdd	Zdd	 Z  ZS )
GlobalResponseNormMlpz MLP w/ Global Response Norm (see grn.py), nn.Linear or 1x1 Conv2d

    NOTE: Intended for '2D' NCHW (use_conv=True) or NHWC (use_conv=False, channels-last) tensor layouts
    NTr   Fc           
         s   t    |p|}|p|}t|}t|}|rttjddntj}	|	|||d d| _| | _t	|d | _
t|| d| _|	|||d d| _t	|d | _d S )Nr   r	   r   r   )channels_last)r   r   r   r   r   r   r   r   r   r   r   r   grnr   r   )
r   r   r   r   r   r   r   r    r!   r"   r#   r%   r&   r      s   

zGlobalResponseNormMlp.__init__c                 C   r'   r(   )r   r   r   rT   r   r   r)   r%   r%   r&   r+     r,   zGlobalResponseNormMlp.forwardr-   r%   r%   r#   r&   rR      s    rR   N)r1   	functoolsr   torchr   rT   r   helpersr   Moduler   r4   rL   SwiGLUPackedrH   rM   rP   rR   r%   r%   r%   r&   <module>   s    (4.,!