o
     ic                     @   s  d dl Z d dlZd dlmZ d dlm  mZ ejjdd Z	ejjdd Z
G dd dejjZejZejjdd	 Zejjd
d ZG dd dejjZejZejjdd Zejjdd Zejjdd ZdZdZejjeZejjjeddZG dd dejjZejZ dS )    Nc                 C   s<   ||  }|d dt d| dd| |     j| jdS N      ?g      ? e3E?   Hm?dtypetorchtanhtor   )ybiasx r   X/home/ubuntu/.local/lib/python3.10/site-packages/xformers/_flash_attn/ops/activations.py	bias_gelu   s   4r   c                 C   s|   || }t d| dd| |   }d| d||  dd| |    dd|   }||  }|j|jd|jd|jdfS )	z5Assume that y has shape (B, D) and bias has shape (D)r   r   r   r   6vf?r   r   )dimr   )r
   r   r   r   sum)gr   r   r   tanh_outffgrad_yr   r   r   bias_gelu_back   s   $r   c                   @   $   e Zd Zedd Zedd ZdS )GeLUFunctionc                 C      |  || t||S N)save_for_backwardr   )ctxinputr   r   r   r   forward&   s   
zGeLUFunction.forwardc                 C   s   | j \}}t|||}||fS r   )saved_tensorsr   )r    grad_outputr!   r   tmpr   r   r   backward,   s   
zGeLUFunction.backwardN__name__
__module____qualname__staticmethodr"   r&   r   r   r   r   r   %   
    
r   c                 C   s4   | d dt d|  dd|  |      j| jdS r   r	   )r   r   r   r   gelu_fwd8   s   4r-   c                 C   s`   t d| dd| |   }d| d||  dd| |    dd|   }||  j|jdS )Nr   r   r   r   r   r   r	   )r   r   r   r   r   r   r   gelu_bwd@   s
   $r.   c                   @   r   )FastGeLUFunctionc                 C   s   |  | t|S r   )r   r-   )r    r!   r   r   r   r"   K   s   
zFastGeLUFunction.forwardc                 C   s   | j \}t||}|S r   )r#   r.   )r    r$   r!   r%   r   r   r   r&   Q   s   
zFastGeLUFunction.backwardNr'   r   r   r   r   r/   J   r,   r/   c                 C   s   t |dk| dj|jdS )Nr   g        r   )r
   wherer   r   r   r   r   r   r   relu_bwd[      r2   c                 C   s   t | }|| j| jdS )Nr   Frelur   r   )r   rr   r   r   
sqrelu_fwd`   s   
r8   c                 C   s   d|  t | j|jdS )Ng       @r   r4   r1   r   r   r   
sqrelu_bwdf   r3   r9   zn
template <typename T> T swiglu_fwd(T x, T y) {
    return float(x) * float(y) / (1.0f + ::exp(-float(x)));
}
z
template <typename T> void swiglu_bwd(T x, T y, T g, T& dx, T& dy) {
    float x_sigmoid = 1.0f / (1.0f + ::exp(-float(x)));
    dx = x_sigmoid * (1 + float(x) * (1.0f - x_sigmoid)) * float(g) * float(y);
    dy = float(x) * x_sigmoid * float(g);
}
   )num_outputsc                   @   r   )SwiGLUFunctionc                 C   r   r   )r   
swiglu_fwd)r    r   r   r   r   r   r"   }   s   
zSwiGLUFunction.forwardc                 C   s   | j \}}t|||S r   )r#   
swiglu_bwd)r    doutr   r   r   r   r   r&      s   
zSwiGLUFunction.backwardNr'   r   r   r   r   r<   {   s
    
r<   )!mathr
   torch.nnnntorch.nn.functional
functionalr5   jitscriptr   r   autogradFunctionr   applybias_gelu_implr-   r.   r/   fast_gelu_implr2   r8   r9   swiglu_fwd_codestringswiglu_bwd_codestringcuda	jiterator_create_jit_fnr=   _create_multi_output_jit_fnr>   r<   swiglur   r   r   r   <module>   s8   	



	



