o
     i=                  	   @   s   d dl mZ d dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ G dd deeZG dd	 d	eeZd
efddZG dd dZG dd de
jeZG dd de
jeZG dd de
jeZedddgZdededeee ee f fddZdS )    )
namedtuple)Enum)ListOptionalTupleN)deprecated_functionc                   @   s   e Zd ZdZdZdZdZdS )ResidualNormStylezSupport different residual path and norm styles.
    See "On Layer Normalization in the Transformer Architecture",
    Xiong et al., https://arxiv.org/pdf/2002.04745v1.pdf
    prepostdeepnormN)__name__
__module____qualname____doc__PrePostDeepNorm r   r   P/home/ubuntu/.local/lib/python3.10/site-packages/xformers/components/residual.pyr      s
    r   c                   @   s   e Zd ZdZdZdS )NormalizationType	layernormskipN)r   r   r   	LayerNormSkipr   r   r   r   r      s    r   normalization_typec                 C   s(   G dd dt j}tjt jtj|i|  S )Nc                       s.   e Zd Zd fddZdejfddZ  ZS )	z%get_normalization_layer.<locals>.SkipreturnNc                    s   t    t|  d S N)super__init__r   )self___	__class__r   r   r   %   s   
z.get_normalization_layer.<locals>.Skip.__init__xc                 [   s   |S r   r   )r   r$   r    r   r   r   forward)   s   z-get_normalization_layer.<locals>.Skip.forward)r   N)r   r   r   r   torchTensorr%   __classcell__r   r   r"   r   r   $   s    r   )nnModuler   r   r   )r   r   r   r   r   get_normalization_layer#   s   	r+   c                   @   s   e Zd ZdZdS )RequiresWrappedInputszqUsed to mark, through inheritance,
    the fact that this class will require inputs to be passed as a single listN)r   r   r   r   r   r   r   r   r,   2   s    r,   c                       sF   e Zd ZdZd
dejdee f fddZde	e
j fdd	Z  ZS )Residuala  
    Object-oriented handling of the residual path

    This supports scaling of the residual path, as proposed by DeepNet_
    .. _DeepNet: https://arxiv.org/pdf/2203.00555v1.pdf

    .. Note: the wrapped layers must accept all the inputs as a single list
    Nlayerscalec                    s.   t    t|  || _|| _t|t| _d S r   )r   r   r   r.   r/   
isinstancer,   wrap_inputs)r   r.   r/   r"   r   r   r   D   s
   
zResidual.__init__inputsc                 K   sT   | j d ur|d | j  }n|d }| jr || jdd|i| S || j|i | S )Nr   r2   r   )r/   r1   r.   )r   r2   kwargsresiduer   r   r   r%   M   s   
zResidual.forwardr   )r   r   r   r   r)   r*   r   floatr   r   r&   r'   r%   r(   r   r   r"   r   r-   :   s    		r-   c                	       L   e Zd ZdZ	ddedejdedef fddZ	d	e
ej fd
dZ  ZS )PreNormzvAdds a normalization before computing attention

    ..Note: If a list of inputs is passed, all of them get normalizedTd_normsublayernormalization
use_tritonc                    6   t    t|  t||| _|| _t|t| _d S r   	r   r   r   r+   normr9   r0   r,   r1   r   r8   r9   r:   r;   r"   r   r   r   _   s
   
zPreNorm.__init__r2   c                    s   t |dksJ dd |D }||d t |kr+ |d fdd|D }n	 fdd|D } jrA jdd|i|S  j|i |S )Nr   c                 S   s   g | ]}t |qS r   )id).0r$   r   r   r   
<listcomp>r   s    z#PreNorm.forward.<locals>.<listcomp>c                    s   g | ]} qS r   r   )rA   r    )x_normr   r   rB   v   s    c                    s   g | ]}  |qS r   )r>   )rA   x_)r   r   r   rB   y   s    r2   r   )lencountr>   r1   r9   )r   r2   r3   idsinputs_normedr   )r   rC   r   r%   n   s   zPreNorm.forwardTr   r   r   r   intr)   r*   r   boolr   r   r&   r'   r%   r(   r   r   r"   r   r7   Z   s    	r7   c                	       r6   )PostNormz(Adds LayerNorm after computing attentionTr8   r9   r:   r;   c                    r<   r   r=   r?   r"   r   r   r      s
   
zPostNorm.__init__r2   c                 K   s6   | j r| jdd|i|}n| j|i |}| |S )Nr2   r   )r1   r9   r>   )r   r2   r3   r$   r   r   r   r%      s   
zPostNorm.forwardrI   rJ   r   r   r"   r   rM      s    rM   DeepNormCoefficientsalphabetaencoder_layersdecoder_layersr   c                 C   s   | }|}|dkrt d| d d| d ddfS | dkr,dt d| d d| d dfS t d|d	 | d
  d|d	 | d  d}t d| d d| d d}||fS )z
    See DeepNet_.

    Returns alpha and beta depending on the number of encoder and decoder layers,
    first tuple is for the encoder and second for the decoder

    .. _DeepNet: https://arxiv.org/pdf/2203.00555v1.pdf
    r      g      ?   g      п)rO   rP   NgQ?   g      ?gףp=
?g            )rN   )rQ   rR   NMencoder_coeffsdecoder_coeffsr   r   r   get_deepnorm_coefficients   s    $r\   )collectionsr   enumr   typingr   r   r   r&   torch.nnr)   xformers._deprecation_warningr   strr   r   r+   r,   r*   r-   r7   rM   rN   rK   r\   r   r   r   r   <module>   s*    '