o
    ۷i                     @  s   d dl mZ d dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d d	lmZ d d
lmZ erDd dlmZ edddG dd deZG dd dZdS )    )annotations)	dataclass)TYPE_CHECKINGN)init_logger)HAS_FA3HAS_FLASH_ATTN)AttnType)ParallelAttentionContext) SequenceParallelGroupCoordinator)get_forward_context)AttentionMetadataT)frozenslotsc                   @  s   e Zd ZdZdS )_RingCtxz9Per-forward context for Ring sequence-parallel attention.N)__name__
__module____qualname____doc__ r   r   a/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/diffusion/attention/parallel/ring.pyr      s    r   c                   @  s^   e Zd ZdZ	d$d%d	d
Zed&ddZed'ddZd(ddZd)ddZ			d*d+d"d#Z
dS ),RingParallelAttentiona  Ring sequence-parallel strategy.

    This strategy prepares inputs for Ring Attention.
    Key responsibilities:
    - Concatenate joint_query (Text) to query (Image) if present.
    - Keep joint_key/value separate in metadata for the Ring kernel to handle as static prefix.
    Nsp_groupr
   attn_backend_pref
str | NonereturnNonec                 C  s   || _ || _d S N)	_sp_groupr   )selfr   r   r   r   r   __init__.   s   
zRingParallelAttention.__init__boolc                 C     dS )NTr   r   r   r   r   enabled6      zRingParallelAttention.enabledstrc                 C  r!   )Nringr   r"   r   r   r   name:   r$   zRingParallelAttention.namequerytorch.Tensorkeyvalueattn_metadataAttentionMetadata | Nonec           	      C  s   d }d}|d ur|j }|j}|d ur9ddg}||vr"td| d|dkr0tj||gdd}n	tj||gdd}t| jd}|||||fS )Nfrontrearzjoint_strategy: z not supported.   )dim)r'   )joint_queryjoint_strategy
ValueErrortorchcatr   r'   )	r   r(   r*   r+   r,   joint_tensor_queryr3   supported_joint_strategyctxr   r   r   pre_attention>   s   z#RingParallelAttention.pre_attentionattn_outputr9   ParallelAttentionContext | Nonec                 C  s   |S r   r   )r   r;   r9   r   r   r   post_attention]   r$   z$RingParallelAttention.post_attentionFsoftmax_scalefloat | Nonecausalc                 C  s>  |du r|j d d }| j}|du r*zt j}t|dd}W n ty)   d}Y nw |jtjkr3d}nt	sFt
sF|dkrDtt}	|	d d}d\}
}d}|dur^|j}
|j}|jdur^|j}|dksf|d	kr|d
dlm} ||||||| jjd|
||d
S d
dlm} t	rtjntj}||||d||dddd| jj||
||dS )z%Run the actual Ring Attention kernel.Ng      attention_backendsdpaz@Flash Attention (FA2/FA3) is not available! Force enabling SDPA.)NNr.   r5   r   )ring_pytorch_attn_func	efficient)r>   r@   groupop_typejoint_tensor_keyjoint_tensor_valuer3   )ring_flash_attn_funcg        )rA   rA   F)	dropout_pr>   r@   window_sizesoftcapalibi_slopesdeterministicrF   	attn_typerH   rI   r3   )shaper   r   omni_diffusion_configgetattr	Exceptiondtyper5   float32r   r   r   r   warning_once	joint_keyjoint_valuer3   8vllm_omni.diffusion.attention.backends.ring_pytorch_attnrD   r   
ring_group6vllm_omni.diffusion.attention.backends.ring_flash_attnrJ   r   FA3FA)r   r(   r*   r+   r,   r>   r@   backend_prefconfigloggerrX   rY   r3   rD   rJ   rP   r   r   r   run_attentiona   sr   


z#RingParallelAttention.run_attentionr   )r   r
   r   r   r   r   )r   r    )r   r%   )r(   r)   r*   r)   r+   r)   r,   r-   )r;   r)   r9   r<   r   r)   )NF)r(   r)   r*   r)   r+   r)   r,   r-   r>   r?   r@   r    r   r)   )r   r   r   r   r   propertyr#   r'   r:   r=   rb   r   r   r   r   r   %   s    


r   )
__future__r   dataclassesr   typingr   r5   vllm.loggerr   8vllm_omni.diffusion.attention.backends.ring.ring_globalsr   r   9vllm_omni.diffusion.attention.backends.ring.ring_selectorr   +vllm_omni.diffusion.attention.parallel.baser	   1vllm_omni.diffusion.distributed.group_coordinatorr
   #vllm_omni.diffusion.forward_contextr   /vllm_omni.diffusion.attention.backends.abstractr   r   r   r   r   r   r   <module>   s   
