o
    ۷iB                  	   @   s   d dl mZ d dlZd dlmZ d dlmZmZmZ ee	Z
ed Z		ddejdejd	ejdB d
efddZG dd deZG dd deZdS )    )LiteralN)init_logger)AttentionBackendAttentionImplAttentionMetadata)broadcast_kfull_qkr   querykey	attn_mask	mask_modec                 C   s   |durt |dkrd}|durk|jdkrk|jd | jd krk|jd |jd krk|jd | jd |jd }}}|t j}|dkrV|d|||d }|S |dkrd|dd}|S t	d| |S )z
    Reshape Attention Mask
    2D [batch_size, seq_len_k] ->
      - broadcast_k: [batch_size, 1, 1, seq_len_k]
      - full_qk: [batch_size, 1, seq_len_q, seq_len_k]
    Nr         r   r   zUnsupported SDPA mask mode: )
torchallndimshapetobool	unsqueezeexpand
contiguous
ValueError)r	   r
   r   r   BSqSkv r   a/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/diffusion/attention/backends/sdpa.py_maybe_reshape_attn_mask   s    
"r   c                   @   sj   e Zd ZU dZeed< edefddZede	e
 fddZedefdd	Zeded
 fddZdS )SDPABackendTaccept_output_bufferreturnc                 C      dS )NTr   )clsr   r   r   supports_attention_mask=      z#SDPABackend.supports_attention_maskc                   C   s   dd t dD S )Nc                 S   s   g | ]}|qS r   r   .0xr   r   r   
<listcomp>C   s    z8SDPABackend.get_supported_head_sizes.<locals>.<listcomp>i   )ranger   r   r   r   get_supported_head_sizesA   s   z$SDPABackend.get_supported_head_sizesc                   C   r"   )NSDPAr   r   r   r   r   get_nameE   r%   zSDPABackend.get_nameSDPAImplc                   C   s   t S N)r.   r   r   r   r   get_impl_clsI   r%   zSDPABackend.get_impl_clsN)__name__
__module____qualname__r    r   __annotations__classmethodr$   staticmethodlistintr+   strr-   typer0   r   r   r   r   r   :   s   
 r   c                   @   s*  e Zd Z			ddedededededB d	ed
dfddZ		ddej	dej	dej	de
dB ded
ej	fddZ	ddej	dej	dej	de
dB d
ej	f
ddZ	ddej	dej	dej	de
dB d
ej	f
ddZ	ddej	dej	dej	de
dB d
ej	f
ddZ	ddej	dej	dej	de
dB d
ej	f
ddZdS ) r.   FN 	num_heads	head_sizesoftmax_scalecausalnum_kv_headsprefixr!   c                 K   s   || _ || _d S r/   )r?   r>   )selfr<   r=   r>   r?   r@   rA   extra_impl_argsr   r   r   __init__O   s   

zSDPAImpl.__init__r   r	   r
   valueattn_metadatar   c           	   	   C   sh   d }|rt |||j|d}dd |||fD \}}}tjjj||||d| j| jd}|dddd	}|S )
Nr   c                 s   s     | ]}| d dddV  qdS )r   r   r      N)permuter&   r   r   r   	<genexpr>j   s    z)SDPAImpl._forward_impl.<locals>.<genexpr>g        )r   	dropout_p	is_causalscaler   r   r   rH   )	r   r   r   nn
functionalscaled_dot_product_attentionr?   r>   rI   )	rB   r	   r
   rE   rF   r   attention_maskoutputoutr   r   r   _forward_impl\   s   
	zSDPAImpl._forward_implc                 C      | j ||||ddS Nr   rG   rT   rB   r	   r
   rE   rF   r   r   r   forward_cudaw      zSDPAImpl.forward_cudac                 C   rU   rV   rW   rX   r   r   r   forward_xpu   rZ   zSDPAImpl.forward_xpuc                 C   rU   rV   rW   rX   r   r   r   forward_hip   rZ   zSDPAImpl.forward_hipc                 C   rU   )Nr   rG   rW   rX   r   r   r   forward_npu   rZ   zSDPAImpl.forward_npu)FNr;   Nr   r/   )r1   r2   r3   r8   floatr   r9   rD   r   Tensorr   SDPAMaskModerT   rY   r[   r\   r]   r   r   r   r   r.   N   s    	

 


r.   r^   )typingr   r   vllm.loggerr   /vllm_omni.diffusion.attention.backends.abstractr   r   r   r1   loggerra   r`   r   r   r.   r   r   r   r   <module>   s&   
%