o
    پi                  	   @   sj   d dl Z d dlZd dlmZ ejddejdedejfddZejdejdejd	ej	dejfd
dZ
dS )    N    valreduce_sizereturnc                 C   s@   t t|}t|D ]}| tjj| d|| d > d } q| S )N   )offset)intmathlog2rangecutearchshuffle_sync_down)r   r   itersi r   e/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/diffusion/cutedsl/common/reduce.pywarp_reduce_sum   s   "r   	num_warpstidxc                 C   s   t j }|t j|d }|d? }|d@ }|dkr| ||< tj  |dkr@||k r/|| nt d} t| } |dkr@| ||< tj  || } | S )Nr         r   )	cutlassutilsSmemAllocatorallocate_tensorFloat32r   r   sync_threadsr   )r   r   r   smemaccwarp_idlane_idr   r   r   cta_reduce_sum   s   


r"   )r   )r	   r   cutlass.cuter   jitNumericr   r   	ConstexprInt32r"   r   r   r   r   <module>   s    