o
    پi                     @   s   d dl mZmZ d dlZd dlZd dlmZ d dlm	Z	 ej
dgddejdejdejd	ejd
ejdejdejdejfddZdejdejdejdejdejdeej deejejf fddZeZdS )    )OptionalTupleN)prepare_chunk_indicesT)do_not_specializeHHgKVBTBKBV	IS_VARLENc           +   	   C   sj  t dt d}}||
 ||
 }}|rQt ||d  t jt ||d  d t j}}t || t jt || d t j}}|| }	n||	 ||	 |	 }}t |||
  | |	f|
f|| f|fd}t |||
 |  |	f|
f|| f|fd}t |||
 | |  |	|f|
| df|| df||fd}t j|dd}t j|dd}t t j|dd}tt ||D ]l}t |||
 | |  |	|f|
| df|| || f||fd} t |||
 | |  |	|f|
| df|| || f||fd}!t j| dd}"|"|d d d f  |"j	}#t j
||#dd	}$t j|!|$|!j	jdd qtt ||D ]w}%t | || ||
|   |  |	|f|| df|| |%| f||fd}&t |||
 | |  |	|f|
| df|| |%| f||fd}'t j|&dd}(|(|d d d f  |d d d f  |(j	})t 
||)}*t j|'|*|'j	jdd q;d S )
Nr         )r   )r   r   )boundary_check)r   r   F)
allow_tf32)tl
program_idloadtoint32make_block_ptrexprangecdivdtypedotstore
element_ty)+kvbetawuAg
cu_seqlenschunk_indicesr   r   r   r	   r
   r   r   r   r   i_ti_bhi_bi_hi_nboseosp_betap_gp_Ab_betab_Ab_gi_vp_vp_ub_vb_vbb_ui_kp_kp_wb_kb_kbb_w rB   [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/attention/fla/wy_fast.pyrecompute_w_u_fwd_kernel   s   

$,4



,rD   r    r!   r"   g_cumsumr%   r'   returnc                 C   s"  g | j |j d R \}}}}	}
|j d }|j d }|d ur#t||nd }|d u r/t||nt|}d}d}t|}| ||||	}t||| f di d| d|d|d|d|d	|d
|d|d|d|d|d|d|	d|
d|d|d|d|d udddd ||fS )N@   r    r!   r"   r#   r$   r%   r&   r'   r(   r   r   r   r	   r
   r   r   r   r   	num_warps   
num_stages   rB   )	shaper   tritonr   lentorch
empty_like	new_emptyrD   )r    r!   r"   rE   r%   r'   Br   r   r	   r
   r   r   r(   NTr   r   r$   r#   rB   rB   rC   recompute_w_u_fwdo   sh    


	
rV   )typingr   r   rQ   rO   triton.languagelanguager   %sglang.srt.layers.attention.fla.indexr   jit	constexprrD   Tensor
LongTensorrV   fwd_recompute_w_urB   rB   rB   rC   <module>   sN   X
-