o
    پi                     @   s   d dl mZ d dlZddlmZmZmZmZ ddlm	Z	m
Z
mZmZ dZdejdejd	ejd
ejfddZ	ddejdejdejdejdeej f
ddZdejdejfddZdejdejdejfddZdS )    )OptionalN   )merge_state_in_place_kernelmerge_state_kernelmerge_states_kernel#variable_length_merge_states_kernel)check_device	check_dimcheck_inputcheck_shape	   v_as_av_bs_bc           
         s8  t |  t | t | t | t| |||gtgd td|  td| td| td| t| | t|| | d|dksEJ | d|dksQJ |tj}|tj}| d | d}| d}t	| |j
}t |f|j
}|}|}	t fdd | |||||||||	d
 ||fS )	Nmajor      r   r   c                    s    fS N )metaseq_lenr   M/home/ubuntu/.local/lib/python3.10/site-packages/flashinfer/triton/cascade.py<lambda>*   s    zmerge_state.<locals>.<lambda>bdxbdy)r
   r   EXPECT_HOPPERr	   r   sizetotorchfloat32
empty_likedeviceemptyr   )
r   r   r   r   	num_headshead_dimv_mergeds_mergedr   r   r   r   r   merge_state   s4   








r+   vsv_others_othermaskc           
      C   sF  t |  t | t | t | t| |||gtgd td|  td| td| td| t| | t|| | d|dksEJ | d|dksQJ |jtjksYJ |jtjksaJ |d ur~td| | d|dksvJ |j	| j	ks~J | d}| d}| d}|}|}	t
|f | ||||||||	d	 d S )Nr   r   r   r   r   r   )r
   r   r   r	   r   r    dtyper"   r#   r%   r   )
r,   r-   r.   r/   r0   r   r'   r(   r   r   r   r   r   merge_state_in_place1   s6   










r2   c           
      C   s  t |  t | t| |gtgd td|  td| | d|dks'J | d|dks3J | d|dks?J | d}| d}| d}| d}|tj}tj|||f| j	| j
d}tj||f|j	|j
d}|}|}	t|f | ||||||||	d	 ||fS )	Nr      r   r   r   r   r1   r%   r   )r
   r   r   r	   r    r!   r"   r#   r&   r1   r%   r   )
r,   r-   r   num_index_setsr'   r(   r)   r*   r   r   r   r   r   merge_statesV   s>   





r6   indptrc           
      C   s  t |  t | t| |gtgd td|  td| | d|dks'J | d|dks3J |dd }| d}| d}|tj}|tj}tj	|||f| j
| jd}tj	||f|j
|jd}|}|}	t|f | ||||||||	d	 ||fS )Nr   r   r   r   r   r4   r   )r
   r   r   r	   r    r!   r"   r#   int32r&   r1   r%   r   )
r,   r-   r7   r   r'   r(   r)   r*   r   r   r   r   r   variable_length_merge_statesy   s<   



r9   r   )typingr   r"   kernels.cascader   r   r   r   utilsr   r	   r
   r   r   Tensorr+   r2   r6   r9   r   r   r   r   <module>   sD    
&
%#