o
     iq                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
 G dd de jjZdd ZG d	d
 d
e jjZdd ZG dd de jjZdS )    N)_cast_if_autocast_enabled)AttnMaskType)scaled_masked_softmax_backwardscaled_masked_softmax_forward)scaled_masked_softmax_get_batch_per_block+scaled_upper_triang_masked_softmax_backward*scaled_upper_triang_masked_softmax_forwardc                   @   s(   e Zd ZdZedd Zedd ZdS )ScaledUpperTriangMaskedSoftmaxz
    Fused operation which performs following three operations in sequence
    1. Scale the tensor.
    2. Apply upper triangular mask (typically used in gpt models).
    3. Perform softmax.
    c                 C   s*   t |g}t||d }| || |S Nr   )torchtensorr   save_for_backward)ctxinputsscalescale_tsoftmax_results r   V/home/ubuntu/.local/lib/python3.10/site-packages/xformers/_flash_attn/fused_softmax.pyforward'   s   z&ScaledUpperTriangMaskedSoftmax.forwardc                 C   s"   | j \}}t|||d }|d fS r
   )saved_tensorsr   r   output_gradsr   r   input_gradsr   r   r   backward.   s
   

z'ScaledUpperTriangMaskedSoftmax.backwardN)__name__
__module____qualname____doc__staticmethodr   r   r   r   r   r   r	      s    
r	   c           	      C   s   |   \}}}}||ksJ d| d||} t| |}tjjjdd tj| }W d    n1 s4w   Y  |||||S )Nz&causal mask is only for self attentionFenabled)	sizeviewr   r   cudaampautocastr	   apply)	r   _r   bnpsqskargsprobsr   r   r   "scaled_upper_triang_masked_softmax7   s   
r0   c                   @   s$   e Zd Zedd Zedd ZdS )ScaledMaskedSoftmaxc                 C   s,   t |g}t|||d }| || |S r
   )r   r   r   r   )r   r   maskr   r   r   r   r   r   r   J   s   zScaledMaskedSoftmax.forwardc                 C   s$   | j \}}t|||d }|d d fS r
   )r   r   r   r   r   r   r   Q   s   

zScaledMaskedSoftmax.backwardN)r   r   r   r   r   r   r   r   r   r   r1   I   s
    
r1   c                 C   sJ   t | ||}tjjjdd tj| W  d    S 1 sw   Y  d S )NFr!   )r   r   r%   r&   r'   r1   r(   )r   r2   r   r.   r   r   r   scaled_masked_softmaxX   s   $r3   c                       sL   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Ze	dd Z
  ZS )FusedScaleMaskSoftmaxa  
    fused operation: scaling + mask + softmax

    Arguments:
        input_in_fp16: flag to indicate if input in fp16 data format.
        input_in_bf16: flag to indicate if input in bf16 data format.
        attn_mask_type: attention mask type (pad or causal)
        scaled_masked_softmax_fusion: flag to indicate user want to use softmax fusion
        mask_func: mask function to be applied.
        softmax_in_fp32: if true, softmax in performed at fp32 precision.
        scale: scaling factor used in input tensor scaling.
    c                    s   t    || _|| _| jr| jrtd| jp| j| _|| _|| _|| _|| _	|| _
| j
d u s6|s6td| jrS| jtjkrDt| _d S | jtjkrOt| _d S tdd S )Nz;both fp16 and bf16 flags cannot be active at the same time.z%softmax should be in fp32 when scaledzInvalid attn_mask_type.)super__init__input_in_fp16input_in_bf16RuntimeErrorinput_in_float16attn_mask_typescaled_masked_softmax_fusion	mask_funcsoftmax_in_fp32r   r   causalr0   fused_softmax_funcpaddingr3   
ValueError)selfr7   r8   r;   r<   r=   r>   r   	__class__r   r   r6   m   s(   



zFusedScaleMaskSoftmax.__init__c                 C   s>   |  dksJ | j|g| R  r| ||S | ||S )N   )dimis_kernel_availabler#   forward_fused_softmaxforward_torch_softmax)rC   inputr2   r   r   r   r      s   zFusedScaleMaskSoftmax.forwardc                 C   s   || }| j rf| jrf| jtjks| jtjkrf|d urfd|  k r$dkrfn dS |d dkrf|d dkrf|d dkrfd|  krCdkrfn dS | ||||}| jtjkr^|| dkr\dS dS || dkrfdS dS )N   i    rF   r   TF)r<   r:   r;   r   r?   rA   get_batch_per_block)rC   r2   r*   r+   r,   r-   attn_batchesbatch_per_blockr   r   r   rH      s,   	z)FusedScaleMaskSoftmax.is_kernel_availablec                 C   s"   | j d ur| j nd}| |||S )Ng      ?)r   r@   )rC   rK   r2   r   r   r   r   rI      s   z+FusedScaleMaskSoftmax.forward_fused_softmaxc                 C   s|   | j r
| jr
| }| jd ur|| j }|d ur| ||n|}tjjdd|}| j r<| jr<| jr8|	 }|S |
 }|S )Nr    )rG   )r:   r>   floatr   r=   r   nnSoftmaxr7   halfbfloat16)rC   rK   r2   mask_outputr/   r   r   r   rJ      s   

z+FusedScaleMaskSoftmax.forward_torch_softmaxc                 C   s   t | |||S )N)r   )r,   r-   r*   r+   r   r   r   rM      s   z)FusedScaleMaskSoftmax.get_batch_per_block)r   r   r   r   r6   r   rH   rI   rJ   r   rM   __classcell__r   r   rD   r   r4   _   s    !	r4   )r   apex._autocast_utilsr   apex.transformer.enumsr   fused_softmax_libr   r   r   r   r   autogradFunctionr	   r0   r1   r3   rQ   Moduler4   r   r   r   r   <module>   s   	