o
    ۷i&)                     @  s   d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ eddd	G d
d deZG dd dZdS )    )annotations)	dataclassN)AttentionMetadata)ParallelAttentionContext)SeqAllToAll4D) SequenceParallelGroupCoordinatorT)frozenslotsc                   @  sJ   e Zd ZU dZded< ded< ded< ded< d	Zded
< dZded< dS )_UlyssesCtxz<Per-forward context for Ulysses sequence-parallel attention.zdist.ProcessGroup
ulysses_pgintscatter_idx
gather_idxbooluse_syncr   	joint_lenfrontstrjoint_strategyN)__name__
__module____qualname____doc____annotations__r   r    r   r   d/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/diffusion/attention/parallel/ulysses.pyr
      s   
 r
   c                   @  sJ   e Zd ZdZd ddZed!ddZed"ddZd#ddZd$ddZ	dS )%UlyssesParallelAttentiona  Ulysses sequence-parallel strategy (all-to-all over seq/head dims).

    This preserves the semantics previously implemented in
    `Attention._forward_ulysses`:
    - If `AttentionMetadata.joint_*` is provided, joint_query/key/value are
      concatenated *after* all-to-all.
    - joint_key/value are assumed to be replicated across SP ranks and are sliced
      by ulysses head rank before concatenation.
    sp_groupr   r   r   r   r   r   returnNonec                 C  s$   || _ |j| _|| _|| _|| _d S )N)	_sp_groupulysses_group_ulysses_pg_scatter_idx_gather_idx	_use_sync)selfr   r   r   r   r   r   r   __init__(   s
   
z!UlyssesParallelAttention.__init__c                 C     dS )NTr   r&   r   r   r   enabled5      z UlyssesParallelAttention.enabledr   c                 C  r(   )Nulyssesr   r)   r   r   r   name9   r+   zUlyssesParallelAttention.namequerytorch.Tensorkeyvalueattn_metadataAttentionMetadata | Nonec              	   C  s  d  } }}d}d}	|d ur|j }|j}|j}|j}d}
|d urb|d urb|d urbddg}||vr:td| d| | jj}| jj}|jd | }|d|| ||d	  d d f }|jd	 }	d
}
n|d u ro|d u ro|d u rontd|
r|jd | }|d|| ||d	  d d f }|d|| ||d	  d d f }|d ur||_||_t	
| j|| j| j| j}t	
| j|| j| j| j}t	
| j|| j| j| j}|
r|dkrtj||gd	d}n	tj||gd	d}| jjd	k}|
r|s|dkrtj||gd	d}tj||gd	d}ntj||gd	d}tj||gd	d}t| j| j| j| j| j|	|d}|d ur|
r|jd u rF|jd u rFd |_n[|jd u rftj|jd |jd	 |jjd	  gtj|jd|_n|jd u rtj|jd |jd	 |jjd	  gtj|jd|_|dkrtj|j|jgd	dn
tj|j|jgd	d|_|jd ur|jjd	 |jd	 ksJ d|jjd	  d|jd	  |j  |_|||||fS )Nr   r   Frearzjoint_strategy: z* not supported. supported joint strategy: .   TzRjoint_query, joint_key, and joint_value should be None or not None simultaneously.dim)r-   r   r   r   r   r   r   )dtypedevicezattn_mask length: z != query length: )joint_query	joint_keyjoint_valuer   
ValueErrorr    ulysses_world_sizeulysses_rankshaper   applyr"   r#   r$   r%   torchcatring_world_sizer
   r-   joint_attn_mask	attn_maskonesr   r:   
contiguous)r&   r.   r0   r1   r2   joint_tensor_queryjoint_tensor_keyjoint_tensor_valuer   r   is_jointsupported_joint_strategyr?   r@   attn_heads_per_ulysses_rankattn_heads_per_ulysses_rank_kvuse_ringctxr   r   r   pre_attention=   s   





z&UlyssesParallelAttention.pre_attentionattn_outputrR   ParallelAttentionContext | Nonec                   s4  t |tsJ dt||jdkr|j}|jdkr0|d d d |f  |d d |d f }n|d d d | f }|d d | d f  t|j||j|j	|j
}    fddtt|jD }tj| |jd tj|dd |jdkrtj |gd	dS tj| gd	dS t|j||j|j	|j
S )
NzUnexpected ctx type: r   r   c                   s   g | ]}t  qS r   )rC   
zeros_like).0_output_jointr   r   
<listcomp>   s    z;UlyssesParallelAttention.post_attention.<locals>.<listcomp>)group   r7   r6   )
isinstancer
   typer   r   r   rB   r   r   r   r   rI   rangedistget_world_size
all_gatherrC   rD   )r&   rT   rR   r   
output_imggathered_jointr   rY   r   post_attention   s"   


z'UlyssesParallelAttention.post_attentionN)
r   r   r   r   r   r   r   r   r   r   )r   r   )r   r   )r.   r/   r0   r/   r1   r/   r2   r3   )rT   r/   rR   rU   r   r/   )
r   r   r   r   r'   propertyr*   r-   rS   rf   r   r   r   r   r      s    


 
r   )
__future__r   dataclassesr   rC   torch.distributeddistributedra   /vllm_omni.diffusion.attention.backends.abstractr   +vllm_omni.diffusion.attention.parallel.baser   $vllm_omni.diffusion.distributed.commr   1vllm_omni.diffusion.distributed.group_coordinatorr   r
   r   r   r   r   r   <module>   s   
