o
    پi                     @   s   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ eG d	d
 d
eZejddddejddfddZdS )    )Type)	dataclassN)cpasync)Int32
const_expr)utils)
ParamsBase)FastDivmodDivisorc                   @   st  e Zd ZU ejed< ejed< ejed< eed< eed< eed< eed< eed< ej	e ed	< ej	e ed
< ej	e ed< ej	e ed< eed< eed< ej
ed< ej
ed< ejed< ejed< ejed< ejed< edejdejdejdededededededej	e d
ej	e dej	e d	ej	e deej fddZejdefddZejdedejdefdd Zd!S )"PagedKVManager
mPageTablemK_pagedmV_paged
thread_idxpage_size_divmodseqlen_k	leftpad_kn_block_sizenum_threadshead_dim_paddedhead_dim_v_paddedgmem_threads_per_rowpage_entry_per_threadasync_copy_elemsgmem_tiled_copy_KVgmem_thr_copy_KVtPrPagetPrPageOffsettKpKtVpVbidbbidhdtypec                 C   sV  d}d}||j  }tjtjtjjd||d}tj|| |fdd}td|f}t	|||}|
|}|	| | }t|ft}t|ft}| |d f } |d d |d f }|d d |d f }t|	|
f}||}tj||jd d}t|
|kr~|}nt|	|f}||}tj||jd	 d}t| |||||||	||
||||||||||S )
N      )
cache_mode)num_bits_per_copy)   r   )orderr&   )limitr   )widthcutemake_copy_atomr   	CopyG2SOpLoadCacheModeGLOBALmake_ordered_layoutmake_layoutmake_tiled_copy_tv	get_slicemake_rmem_tensorr   make_identity_tensorpartition_Sr   predicate_kshaper   r
   )r   r   r   r   r   r    r   r   r   r   r   r   r   r!   universal_copy_bitsr   r   atom_async_copy
thr_layout
val_layoutr   r   r   r   r   cKtKcKr   r   cVtVcV r@   S/home/ubuntu/.local/lib/python3.10/site-packages/flash_attn_origin/cute/paged_kv.pycreate(   sf   




zPagedKVManager.createn_blockc           	      C   s   t j| jddD ]F}|| j | j | j }|| j | }t|| j | j	\}}|d | j | jkp5|| jk o:|| j
k }|rB| j| nd}|| j|< || j|< qd S )Nr&   unrollr   )cutlassranger   r   r   r   r   divmodr   r   r   r   r   r   )	selfrC   irowrow_idxpage_idxpage_offsetis_validpager@   r@   rA   load_page_tables   s   
zPagedKVManager.load_page_tablesXK_or_Vc              
   C   s  |dv sJ t |jt j|jd d |jd d |jd ff|jd d |jd d |jd ffd}t|dkrIt |jt j|jddgd}t|dkrR| j	n| j
}t | j|f}| j|}| j|}|dkru| j|| j  nd}	tjt j|dgdddD ]u}
|d|
df d |	k }| j|
 }| j|
 }t|d	kr| j|d |f n| jd ||f }t || jf}|rtjt j|dgdddD ]}|dd|f d | j }t | j|d |f |d |
|f  qqt|dkrt|d |
d f d qd S )
N)KVr   r&      )striderU   )moderD   rT   )r*   make_tensoriteratorr0   r7   rW   r   selectlayoutr   r   r4   r   r   partition_Dr5   r   rF   rG   sizer   r   r   r   tiled_divider   copyr   fill_swizzled)rI   rC   rR   rS   sX_pihead_dimcXtXsXtXcXseqlenk_row_limitmshould_loadrP   rN   mX_paged_curmX_paged_cur_copykkir@   r@   rA   load_KV   sJ   $$



zPagedKVManager.load_KVN)__name__
__module____qualname__r*   Tensor__annotations__r   r	   rF   	Constexpr	TiledCopystaticmethodr   NumericrB   jitrQ   strrn   r@   r@   r@   rA   r
      sn   
 








	
Jr
   )locipvaluereturnc                C   s(   t | | j}|| t ||  dS )zFill tensor with a constant value.

    Fills all elements of the tensor with the specified value, assuming static size
    and supported memory space.
    N)r*   make_rmem_tensor_likeelement_typefillautovec_copy)tensorr|   rz   r{   rTmpr@   r@   rA   ra      s   
ra   )typingr   dataclassesr   rF   cutlass.cuter*   cutlass.cute.nvgpur   r   r   flash_attn_origin.cuter   %flash_attn_origin.cute.cute_dsl_utilsr   r	   r
   dsl_user_oprw   ra   r@   r@   r@   rA   <module>   s     %"