o
    پi                     @   s  d dl mZmZ d dlZ		ddejdejdejdejdeej deej d	eejejf fd
dZ		ddejdejdejdejdeej deej d	eejejf fddZ	ddejdejdejdejdejdejdeded	ejfddZ		 	d dedededed	ef
ddZ
dS )!    )OptionalTupleNv_as_av_bs_bv_mergeds_mergedreturnc                 C   ^   | tj}| tj}|d u rt| }|d u rt|}tjjj| ||||| ||fS N)totorchfloat32
empty_likeops
sgl_kernelmerge_statedefaultr   r   r   r   r   r	    r   H/home/ubuntu/.local/lib/python3.10/site-packages/sgl_kernel/attention.pyr      s   

r   c                 C   r   r   )r   r   r   r   r   r   merge_state_v2r   r   r   r   r   r      s   

r      q_nopeq_pekv_c_and_k_pe_cacheseq_lens
page_table	workspacesm_scalenum_kv_splitsc                 C   sL  | j dksJ d| j  |j dksJ d|j  |j dks'J d|j  | j\}}	}
|j\}}}||kr;|	|ks=J |j\}}}d}d}|
|ksMJ ||ksSJ ||| ks[J d}|	|kskJ d| d	|	 |	|k r| |||
f}| |d d d |	f< |} ||||f}||d d d |	f< |}t|jd
ksJ |j\}}||ksJ |dksJ d| |d|  dksJ | jtjtjfv sJ d| j d| j|j  kr|jksJ  J |jtjksJ d|j d|jtjksJ d|j d| |||f}tj	j
j|| |||||||	 |d d d |	f  S )N   z$q_nope must be a 3D tensor, but got z"q_pe must be a 3D tensor, but got z1kv_c_and_k_pe_cache must be a 3D tensor, but got i   @      zH must be <= z
, but got    r   z&block num must be greater than 0, got z.q_nope.dtype needs to be fp16 or bf16 but got .z)seq_lens.dtype needs to be int32 but got z+page_table.dtype needs to be int32 but got )ndimshape	new_emptylendtyper   float16bfloat16int32r   r   cutlass_mla_decoder   
contiguous)r   r   r   r   r   r   r    r!   B_qHD_q_nopeB_q_2H_2D_q_pe_	PAGE_SIZED_ckvD_latentD_rope	MAX_HEADSq_nope_paddedq_pe_paddedB_block_table	block_numoutr   r   r   r/   0   sl   


"
r/   max_seq_lennum_batchessm_countc                 C   sB   | dksJ d|  |dksJ d| t jjj| |||S )Nr   z(max_seq_len must be greater than 0, got z(num_batches must be greater than 0, got )r   r   r   cutlass_mla_get_workspace_sizer   )rB   rC   rD   r!   r   r   r   rE   z   s
   
rE   )NN)r   )r   r   )typingr   r   r   Tensorr   r   floatintr/   rE   r   r   r   r   <module>   s    

	
M