o
    پi                     @   s  d dl Z d dlmZ d dl mZ d dlmZmZ d dlmZ eddddej	de jfdd	Z
eddddej	d
e je ddfddZeddddej	d
e je ddfddZejdej	deeB ded
eddf
ddZejdej	deeB ded
e je ddf
ddZdS )    N)Int32)Tdsl_user_op)llvmlociplock_ptrreturnc             	   C   s>   | j ||d }tjt |gddddtjjd}t	|S )Nr   z#ld.global.acquire.gpu.b32 $0, [$1];z=r,lTFhas_side_effectsis_align_stackasm_dialect)
tointir_valuer   
inline_asmr   i32
AsmDialectAD_ATTcutlassr   )r	   r   r   lock_ptr_i64state r   R/home/ubuntu/.local/lib/python3.10/site-packages/flash_attn_origin/cute/barrier.py
ld_acquire   s   
	r   valc             	   C   D   | j ||d }tjd |t|j||dgddddtjjd d S )Nr   z(red.relaxed.gpu.global.add.s32 [$0], $1;l,rTFr   r   r   r   r   r   r   r   r	   r   r   r   r   r   r   r   red_relaxed      
r    c             	   C   r   )Nr   z(red.release.gpu.global.add.s32 [$0], $1;r   TFr   r   r   r   r   r   red_release'   r!   r"   
thread_idxflag_offsetc                 C   s<   | | }|dkrt d}||krt|}||ksd S d S d S Nr   )r   r   )r	   r#   r$   r   flag_ptrread_valr   r   r   wait_eq7   s   r(   c                 C   s"   | | }|dkrt || d S d S r%   )r"   )r	   r#   r$   r   r&   r   r   r   
arrive_inc@   s   r)   )r   cutlass.cutecuter   cutlass.cutlass_dslr   r   cutlass._mlir.dialectsr   Pointerr   	Constexprr    r"   jitintr(   r)   r   r   r   r   <module>   sN     &