o
    پi                     @   s  d dl mZmZmZ d dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlm  mZ ej		 	d dejd	ejd
ejdejdeje deje deje ddfddZ				d!dejdejd
ejdejdee dee dededejfddZ				d!dejd	ejd
ejdejde	dee dee dededdfddZe	d"ddddee dedejdee deej ej!f f
ddZ"dS )#    )TypeUnionOptionalN)Int32Float32Boolean
const_expr)	warpgroup)Numericdsl_user_op)
LayoutEnumF	tiled_mmaacctCrAtCrB	zero_initwg_waitswap_ABreturnc           	   
   C   s   t |rt| |||||dd d S t  t| j}|tjj	|  t
t|jd D ]}t|||d d |f |d d |f | |tjj	d q/t  t |dkr^t| d S d S )NF)r   r   r      Tr   )r   gemmr	   fencecutemake_mma_atomopsetField
ACCUMULATEcutlassrange_constexprsizeshapecommit_group
wait_group)	r   r   r   r   r   r   r   mma_atomk r&   Y/home/ubuntu/.local/lib/python3.10/site-packages/flash_attn_origin/cute/hopper_helpers.pyr      s   &r   r!   A_idxB_idxc              
   C   s   t |rt| |d d d |||||ddS t| |t}t |d u r&|n|d d d |f }	t |d u r6|n|d d d |f }
t| ||	|
d|d |S )Nr(   Fr   Tr   r   )r   gemm_zero_initr   make_fragmentpartition_shape_Cr   r   )r   r!   r   r   r)   r*   r   r   r   rArBr&   r&   r'   r-   '   s   
  r-   c	                 C   s~   t |rt| |||||||dd	 d S t |d u r|n|d d d |f }	t |d u r+|n|d d d |f }
t| ||	|
||d d S )NFr+   r,   )r   
gemm_w_idxr   )r   r   r   r   r   r)   r*   r   r   r0   r1   r&   r&   r'   r2   =   s
     r2   )locipdtypelayoutstagec          
      C   s   |  r|d n|d }tt|| || }t| rdnd}tj|t|d ur0t	||n|t|d ur9|n|d d d}	|	S )N   r   )r8   r   r   )r   r8   r   r   )order)
is_n_major_cr	   make_smem_layout_atomsm90_utils_ogget_smem_layout_atomr   is_m_major_cr   tile_to_shapeappend)
r5   r6   r!   r7   r3   r4   major_mode_sizesmem_layout_atomr9   smem_layout_stagedr&   r&   r'   make_smem_layoutP   s   
rD   )Fr   F)NNr(   F)N)#typingr   r   r   r   cutlass.cuter   r   r   r   r   cutlass.cute.nvgpur	   cutlass.cutlass_dslr
   r   cutlass.utilsr   cutlass.utils.hopper_helpersutilshopper_helpersr<   jitTiledMmaTensor	Constexprboolintr   Shaper-   r2   LayoutComposedLayoutrD   r&   r&   r&   r'   <module>   s   		
	

