o
    Gio                     @   s   d dl Z d dlm  mZ d dl mZ ddlmZ ddlmZm	Z	 e r(d dl
Z
ejejejejejdZdedejfd	d
ZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZdS )    N)nn   )	deprecate)is_torch_npu_availableis_torch_version)swishsilumishgelureluact_fnreturnc                 C   s6   |   } | tv rt|   S td|  dtt  )zHelper function to get activation function from string.

    Args:
        act_fn (str): Name of activation function.

    Returns:
        nn.Module: Activation function.
    zactivation function z not found in ACT2FN mapping )lowerACT2CLS
ValueErrorlistkeys)r    r   P/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/models/activations.pyget_activation$   s   

r   c                       s6   e Zd ZdZ fddZdejdejfddZ  ZS )FP32SiLUzH
    SiLU activation function with input upcasted to torch.float32.
    c                    s   t    d S N)super__init__)self	__class__r   r   r   :   s   zFP32SiLU.__init__inputsr   c                 C   s   t j| dd|jS )NF)inplace)Fr   floattodtype)r   r   r   r   r   forward=   s   zFP32SiLU.forward)	__name__
__module____qualname____doc__r   torchTensorr#   __classcell__r   r   r   r   r   5   s    r   c                	       sR   e Zd ZdZddedededef fdd	Zd
ej	dej	fddZ
dd Z  ZS )GELUa  
    GELU activation function with tanh approximation support with `approximate="tanh"`.

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        approximate (`str`, *optional*, defaults to `"none"`): If `"tanh"`, use tanh approximation.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    noneTdim_indim_outapproximatebiasc                    s&   t    tj|||d| _|| _d S Nr0   )r   r   r   Linearprojr/   )r   r-   r.   r/   r0   r   r   r   r   L   s   

zGELU.__init__gater   c                 C   sJ   |j jdkrtddrtj|jtjd| jdj|j	dS tj|| jdS )Nmps<2.0.0r"   )r/   )
devicetyper   r   r
   r!   r(   float32r/   r"   r   r5   r   r   r   r
   Q   s   $z	GELU.geluc                 C   s   |  |}| |}|S r   )r4   r
   r   hidden_statesr   r   r   r#   W   s   

zGELU.forward)r,   T)r$   r%   r&   r'   intstrboolr   r(   r)   r
   r#   r*   r   r   r   r   r+   A   s
     
r+   c                       sN   e Zd ZdZddededef fddZdejd	ejfd
dZ	dd Z
  ZS )GEGLUaN  
    A [variant](https://huggingface.co/papers/2002.05202) of the gated linear unit activation function.

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    Tr-   r.   r0   c                    s$   t    tj||d |d| _d S Nr   r2   r   r   r   r3   r4   r   r-   r.   r0   r   r   r   r   g   s   
zGEGLU.__init__r5   r   c                 C   s>   |j jdkrtddrt|jtjdj|jdS t|S )Nr6   r7   r8   r9   )	r:   r;   r   r   r
   r!   r(   r<   r"   r=   r   r   r   r
   k   s   
z
GEGLU.geluc                 O   sp   t |dks|dd d urd}tdd| | |}t r(tj|dddd S |jddd	\}}|| | S )
Nr   scalezThe `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.z1.0.0   )dimr/   r   rJ   )	lengetr   r4   r   	torch_npu	npu_gegluchunkr
   )r   r?   argskwargsdeprecation_messager5   r   r   r   r#   q   s   
zGEGLU.forwardT)r$   r%   r&   r'   r@   rB   r   r(   r)   r
   r#   r*   r   r   r   r   rC   ]   s
    	rC   c                       s8   e Zd ZdZd
dededef fddZdd	 Z  ZS )SwiGLUa  
    A [variant](https://huggingface.co/papers/2002.05202) of the gated linear unit activation function. It's similar to
    `GEGLU` but uses SiLU / Swish instead of GeLU.

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    Tr-   r.   r0   c                    s.   t    tj||d |d| _t | _d S rD   )r   r   r   r3   r4   SiLU
activationrF   r   r   r   r      s   
zSwiGLU.__init__c                 C   s*   |  |}|jddd\}}|| | S )Nr   rH   rK   )r4   rP   rW   )r   r?   r5   r   r   r   r#      s   
zSwiGLU.forwardrT   )	r$   r%   r&   r'   r@   rB   r   r#   r*   r   r   r   r   rU   ~   s    
rU   c                       sF   e Zd ZdZddededef fddZdejd	ejfd
dZ	  Z
S )ApproximateGELUa  
    The approximate form of the Gaussian Error Linear Unit (GELU). For more details, see section 2 of this
    [paper](https://huggingface.co/papers/1606.08415).

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    Tr-   r.   r0   c                    s    t    tj|||d| _d S r1   rE   rF   r   r   r   r      s   
zApproximateGELU.__init__xr   c                 C   s   |  |}|td|  S )NgZd;?)r4   r(   sigmoid)r   rY   r   r   r   r#      s   
zApproximateGELU.forwardrT   )r$   r%   r&   r'   r@   rB   r   r(   r)   r#   r*   r   r   r   r   rX      s    
rX   c                	       s8   e Zd Zddedededef fddZd	d
 Z  ZS )LinearActivationTr   r-   r.   r0   rW   c                    s*   t    tj|||d| _t|| _d S r1   )r   r   r   r3   r4   r   rW   )r   r-   r.   r0   rW   r   r   r   r      s   
zLinearActivation.__init__c                 C   s   |  |}| |S r   )r4   rW   r>   r   r   r   r#      s   

zLinearActivation.forward)Tr   )	r$   r%   r&   r@   rB   rA   r   r#   r*   r   r   r   r   r[      s     r[   )r(   torch.nn.functionalr   
functionalr   utilsr   utils.import_utilsr   r   rN   rV   Mishr+   ReLUr   rA   Moduler   r   rC   rU   rX   r[   r   r   r   r   <module>   s(   	!