o
    ©Ì³i½  ã                   @   s6   d dl mZ d dlZd dlmZ G dd„ dejƒZdS )é    )ÚOptionalN)Únnc                	       s`   e Zd ZdZde ¡ dœdejdejdeej dejf‡ fdd	„Zd
e	j
de	j
fdd„Z‡  ZS )ÚFeedForwardaë  This class implements the feed-forward network derived from Llama2.

    Args:
        gate_proj (nn.Module): Projection from input dim to hidden dim, fed through activation
            and multiplied by up_proj.
        down_proj (nn.Module): Final projection to output dim.
        up_proj (Optional[nn.Module]): Projection from input dim to hidden dim, multiplied by
            activation(gate_proj).
        activation (nn.Module): Activation function to use. Default is nn.SiLU().
    N)Úup_projÚ
activationÚ	gate_projÚ	down_projr   r   c                   s&   t ƒ  ¡  || _|| _|| _|| _d S )N)ÚsuperÚ__init__Úw1Úw2Úw3r   )Úselfr   r   r   r   ©Ú	__class__© úR/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/modules/feed_forward.pyr
      s
   

zFeedForward.__init__ÚxÚreturnc                 C   s6   |   |  |¡¡}| jdur||  |¡ }|  |¡}|S )ad  
        Args:
            x (torch.Tensor): input tensor with shape ``(..., in_dim)``, where ``in_dim`` is the
                input dimension of both ``gate_proj`` and ``up_proj``.

        Returns:
            torch.Tensor: output tensor with shape ``(..., out_dim)``, where ``out_dim`` is the                 output dimension of ``down_proj``.
        N)r   r   r   r   )r   r   Úhr   r   r   Úforward(   s
   


zFeedForward.forward)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   ÚSiLUÚModuler   r
   ÚtorchÚTensorr   Ú__classcell__r   r   r   r   r      s    úýüûúr   )Útypingr   r   r   r   r   r   r   r   r   Ú<module>   s   