o
    پi                     @   sh   d dl mZmZ d dlmZ d dlZd dlmZ d dlmZm	Z	 d dl
mZ eddG dd	 d	ZdS )
    )TupleOptional)	dataclassN)Int32
const_expr)SeqlenInfoQKT)frozenc                   @   s(  e Zd ZU eje ed< eje ed< eje ed< dZeje ed< dZ	eje ed< dZ
ee ed< dZee ed	< d
Zeje ed< ej		
ddededejdejdeeef f
ddZejdededeeef fddZejdedededefddZejdedededefddZdS )	BlockInfotile_mtile_n	is_causalFis_localis_split_kvNwindow_size_leftwindow_size_right   qhead_per_kvhead_packgqar   seqlen_infom_block	split_idx
num_splitsreturnc                 C   sL  t |j| j}t| jp| jo| jd urH|d | j }t| j	dkr*t || j	}||j |j
 }t| jr9|n|| j }t|t || j}d}	t| joR| jd ur{|| j }
t| j	dkre|
| j	 }
|
|j |j
 }|| j }t|| j d}	t| jr||	krtdn	||	 | d | }|	||  }	t|	| |}|	|fS )Nr   r   )cuteceil_divseqlen_kr   r   r   r   r   r
   r   seqlen_qminr   cutlassmaxr   r   )selfr   r   r   r   n_block_max	m_idx_maxn_idxn_idx_rightn_block_min	m_idx_min
n_idx_leftnum_n_blocks_per_split r(   U/home/ubuntu/.local/lib/python3.10/site-packages/flash_attn_origin/cute/block_info.pyget_n_block_min_max   s0   



zBlockInfo.get_n_block_min_maxn_blockc           
      C   s   t |j| j}d}t| jp| jo| jd ur8|| j }||j |j	 }t| jr+|n|| j }t
||| j }t| jo@| jd ur`|d | j }||j |j	 }|| j }	t|t |	| j}||fS )Nr   r   )r   r   r   r
   r   r   r   r   r   r   r   r   r   )
r   r   r+   m_block_maxm_block_min	n_idx_minm_idxm_idx_right	n_idx_max
m_idx_leftr(   r(   r)   get_m_block_min_max9   s   

zBlockInfo.get_m_block_min_maxr$   c                 C   sh   || j  }t| jdkr|| j }||j |j }t| j p"| jdu r&|n|| j }t||| j	 S )zZIf we have separate iterations with causal or local masking at the start, where do we stopr   N)
r
   r   r   r   r   r   r   r   r   r   )r   r   r   r$   r%   r"   r#   r(   r(   r)   !get_n_block_min_causal_local_maskI   s   

z+BlockInfo.get_n_block_min_causal_local_maskc                 C   st   t | j p	| jdu r|S |d | j }t | jdkr"t|| j}||j |j }|| j }t	
|t|| jS )zhIf we have separate iterations with local masking at the end, where do we stop the non-masked iterationsNr   )r   r   r   r
   r   r   r   r   r   r   r   r   )r   r   r   r$   r!   r"   r&   r(   r(   r)   !get_n_block_min_before_local_mask\   s   
z+BlockInfo.get_n_block_min_before_local_mask)r   r   )__name__
__module____qualname__r   	Constexprint__annotations__boolr   r   r   r   r   r   r   r   jitr   r   r*   r3   r4   r5   r(   r(   r(   r)   r	      sZ   
 
! r	   )typingr   r   dataclassesr   r   cutlass.cuter   r   r   "flash_attn_origin.cute.seqlen_infor   r	   r(   r(   r(   r)   <module>   s   