o
     i                  	   @   s   d dl mZ d dlZd dlmZ ddlmZ ddeej defd	d
Z	ddejdejdeej defddZ	G dd dejj
ZdS )    )OptionalN)nn   )_is_triton_availableư>weightepsc                 C   sF   t  sJ ddlm} t r| js|dur|jrtd|| ||S )a)  
    RMS Normalization along the last dimension.

    This is similar to torch.nn.functional.normalize but with eps being added
    instead of max.

    Expects x contiguous of shape (..., dim), and returns normalized data
    of the same shape. For each dim-length vector x, the result has

        x / sqrt( x*x.sum() + eps)

    If weights are included, they are a contiguous parameter of length dim
    which multiplies the result.

    This functionality is experimental. Its API might be changed without warnings.
    Use it at your own risk.
       )_rms_norm_forwardNGradients not supported.)r   _triton.rmsnorm_kernelsr
   torchis_grad_enabledrequires_grad
ValueError)xr   r   r
    r   H/home/ubuntu/.local/lib/python3.10/site-packages/xformers/ops/rmsnorm.pyrms_norm   s   
r   r   yc                 C   sN   t  r| js|js|dur|jrtdt sJ ddlm} || |||S )a=  
    An addition fused with rms_norm.

        z = rms_norm_add(x, y, weight, eps)

    is equivalent to

        x += y
        z = rms_norm(x, weight, eps)

    where x, y and z are all contiguous.

    This functionality is experimental. Its API might be changed without warnings.
    Use it at your own risk.
    Nr   r	   )_rms_norm_add_forward)r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   rms_norm_add*   s   
r   c                       sV   e Zd ZdZddededef fddZd	ej	fd
dZ
d	ej	dej	fddZ  ZS )RMSNorma(  
    RMS Normalization layer along the last dimension.

    This is similar to torch.nn.functional.normalize but with eps being added
    instead of max.

    Expects contiguous input of shape (..., dim), and returns normalized data
    of the same shape. For each dim-length vector x, the result has

        x / sqrt( x*x.sum() + eps)

    If weights are included, they are a parameter of length dim which multiplies
    the result.

    This functionality is experimental. Its API might be changed without warnings.
    Use it at your own risk.
    Tr   diminclude_weightr   c                    s4   t    || _|rtt|| _d S d | _d S N)super__init__r   r   	Parameterr   onesr   )selfr   r   r   	__class__r   r   r   [   s
   

zRMSNorm.__init__r   c                 C   s   t || j| jS r   )r   r   r   )r    r   r   r   r   forwardc   s   zRMSNorm.forwardr   c                 C   s   t ||| j| jS )z
        An addition fused with forward.

            z = layer.increment_and_forward_(x, y)

        is equivalent to

            x += y
            z = layer(x)
        )r   r   r   )r    r   r   r   r   r   increment_and_forward_f   s   zRMSNorm.increment_and_forward_)Tr   )__name__
__module____qualname____doc__intboolfloatr   r   Tensorr#   r$   __classcell__r   r   r!   r   r   H   s
    r   )r   )typingr   r   r    r   r,   r+   r   r   Moduler   r   r   r   r   <module>   s    
