o
    پi2                     @   s  d dl Z d dlZd dlmZ d dlmZ dZeeZ	e Z
e
r&d dlmZ ejdejfddZd4d	ed
efddZej	d4dejdejdejfddZejdejdejdejdejdejdejdejdejfddZde jde jde jfddZejd ejd!ejfd"d#Zd$d% Zd&e jd
e jfd'd(Zd)e jd*e jde jde jd+e jd,e jd-ed.ed/ed
ee je je jf fd0d1Zd2d3 ZdS )5    N)is_cudai   )concat_mla_absorb_qreq_to_token_ptr_stridec                 C   s   d}t jdd}t || }	t || }
d}d}|r)t || t j}|}|t || t j7 }t || |}t|D ]1}t d|t j||  }||| k }t j| |	|  | | |d}t j	||
 | ||d qBd S )N   r   axismask)
tl
program_idloadtoint32cdivrangearangeint64store)req_to_token_ptrreq_pool_indices_ptrpage_kernel_lens_ptr	kv_indptrkv_start_idxkv_indices_ptrr   
BLOCK_SIZEpidreq_pool_indexkv_indices_offsetkv_startkv_endnum_loopioffsetr	   data r$   U/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/attention/utils.py#create_flashinfer_kv_indices_triton   s4   
r&   @   	page_sizereturnc                 C   s   t |  }|S )N)_FLASHMLA_CREATE_KV_BLOCK_SIZE)r(   num_page_per_blockr$   r$   r%   get_num_page_per_block_flashmla7   s   r,   kv_indices_ptr_stride
PAGED_SIZEc                 C   s  t | }tjdd}	t||	 }
d}d}|r$t||	 tj}|}|t||	 tj7 }t|| |}t|| t }t|D ]E}td|tj	||  | }td|||  }||| k }||k }tj| |
|  | | |d}tj
||	|  | || |d qEd S )Nr   r   r   )$FLASHMLA_CREATE_KV_BLOCK_SIZE_TRITONr
   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r-   r.   NUM_PAGE_PER_BLOCKr   r   r   r   	num_pagednum_pages_loopr!   paged_offsetpaged_offset_outr	   mask_outr#   r$   r$   r%   !create_flashmla_kv_indices_triton<   sF   r6   head_cnt	k_stride0	k_stride1nope_stride0nope_stride1rope_stride0nope_dimrope_dimc                 C   s   t d}t d|}| ||  |d d d f |  }t d|	}|||  |d d d f |  |d d d f  }||d d d f  }t |}t || t d|
}|||  |d d d f  }||	 |d d d f  }t |}t || d S )Nr   )r
   r   r   r   r   )k_ptr
k_nope_ptr
k_rope_ptrr7   r8   r9   r:   r;   r<   r=   r>   pid_loc
head_range
k_head_ptr	nope_offssrc_nope_ptrdst_nope_ptrsrc_nope	rope_offssrc_rope_ptrdst_rope_ptrsrc_roper$   r$   r%   concat_and_cast_mha_k_kernelr   s(   
 

rM   kk_nopek_ropec                 C   sz  t | jdkrt |jdkrt |jdks%J d| jd|jd|j| jd |jd kr9| jd |jd ksIJ d| jd|jd|j| jd |jd krZd|jd ksjJ d| jd|jd|j| jd |jd |jd  ksJ d| jd|jd|j|jd }|jd }| jd f}t| | ||| jd | d| d|d|d|d|| d S )	N   z$shape should be 3d, but got k.shape=z, k_nope.shape=z, k_rope.shape=r   zinvalid shape, got k.shape=   )lenshaperM   stride)rN   rO   rP   r=   r>   gridr$   r$   r%   concat_and_cast_mha_k_triton   s8   ,*$ 

rX   BLOCK_MBLOCK_Dc	                 C   sZ  t d}	t d}
t ||	 }t ||	 }|
| t d| }t d|}||k }||k }|| }| |d d d f |  |d d d f  }||	| |  |d d d f |  |d d d f  }t j||d d d f |d d d f |k @ dd}t j|||d d d f |d d d f |k @ d t ddkr||	|  | }t j|||d d S d S )Nr   rR   g        )r	   otherr      )r
   r   r   r   r   )	input_ptroffsets_ptrlengths_ptr
output_ptrmask_ptrmax_len
hidden_dimrY   rZ   bmr"   lengthseq_idshid_idsseq_maskvalid_tokenin_tokenin_ptrout_ptrvaluesmask_out_ptrr$   r$   r%   pad_sequence_with_mask_kernel   s@   

(
""rp   c                 C   s   |j d }| j d }tj|||f| j| jd}tj|| | jtjd}t|}t|}	|t	||	df}
t
|
 | |||||||	|d	 |||fS )Nr   rR   )devicedtype)rY   rZ   )rU   torchzerosrq   rr   emptybooltritonnext_power_of_2r   rp   )	input_emboffsetslengthsrb   Brc   output	attn_maskrZ   rY   rW   r$   r$   r%   pad_sequence_with_mask   s<   





r   tensorc                    s   |    |  |  }t fddt|d D }|s | S dg| }d|d< t|d ddD ]}||d   |d   ||< q1|  |S )zD
    Adjust degenerate strides for a tensor, make it canonical.
    c                 3   s0    | ]} | d ko| |d   kV  qdS )rR   Nr$   ).0r!   sizesstridesr$   r%   	<genexpr>0  s     
z&canonicalize_stride.<locals>.<genexpr>rR   r   rS   r\   )sizerV   dimanyr   
as_strided)r   ndimneed_fixnew_stridesr!   r$   r   r%   canonicalize_stride(  s   

r   q_nopeq_ropepos_idscos_sin_cacheis_neoxkv_lora_rankqk_rope_head_dimc	                 C   s   dd l }		 tj}
|jd |jd }}|j|||| |
d}|j|j|
d}|j|j|
d}|	jj||| |||||
|d|d f ||dd |f |ddd |||fS )Nr   rR   )rr   .g      ?)r   rP   r   rO   r   r   r   quantize_dtype
q_rope_out
k_rope_out
q_nope_out
k_nope_outquant_scale_qquant_scale_kv)flashinfer.ropers   float8_e4m3fnrU   	new_emptyropemla_rope_quantize_fp8)r   r   rO   rP   r   r   r   r   r   
flashinfer
attn_dtypeq_len	num_headsq_outr   r   r$   r$   r%   mla_quantize_and_rope_for_fp8D  s:   
r   c                 C   s<   t r| jd dkr|jd dkrt| |S tj| |gddS )NrS   r   r'   )r   )_is_cudarU   r   rs   cat)r   r   r$   r$   r%   concat_mla_absorb_q_general  s    
r   )r'   )rs   rw   triton.languagelanguager
   sglang.srt.utilsr   r*   	constexprr/   r   
sgl_kernelr   jitr&   intr,   r6   rM   TensorrX   rp   r   r   rv   tupler   r   r$   r$   r$   r%   <module>   s    
&	5	
&
&	53	

U