o
    c۷i#                     @   s   d dl mZmZmZ d dlZd dlmZ d dlm  m	Z
 d dlmZ d dlmZmZ d dlmZmZmZmZ d dlmZ e		d/ddddee d	ed
ejdee dee deejejf fddZeZeddddejdejdejdededejfddZej 		 	d0dej!dejdejdejdej"e dej"e dej"e ddfdd Z#			!	d1dej!d"ej$dejdejd#ee d$ee dededejfd%d&Z%			!	d1dej!dejdejdejded#ee d$ee dededdfd'd(Z&	d2d)ej'd*ej$d+eej d,eej def
d-d.Z(dS )3    )TypeUnionOptionalN)	warpgroup)Numericdsl_user_op)Float32Int32Boolean
const_expr)
LayoutEnumlocipdtypelayouttilestagemajor_mode_sizereturnc                C   s   t jt j|||d||d}t|d u r | r|d n|d }tt|| || }t|	 r3dnd}	t j
|t|d urDt ||n|t|d urM|	n|	d d d}
|
S )Nr      r   )r   r      )r   r   r   r   )order)cuteproduct_eachshaper   is_n_major_cr   make_smem_layout_atomsm90_utils_ogget_smem_layout_atomis_m_major_ctile_to_shapeappend)r   r   r   r   r   r   r   r   smem_layout_atomr   smem_layout_staged r%   F/home/ubuntu/vllm_env/lib/python3.10/site-packages/quack/sm90_utils.pymake_smem_layout   s   r'   cTepi_tile
tiled_copytidxreference_srcc          	      C   s>   | |}t| |}t|r|j|||dS |j|||dS )Nr   )	get_slicer   flat_divider   partition_Spartition_D)	r(   r)   r*   r+   r,   r   r   thr_copycT_epir%   r%   r&   partition_for_epilogue-   s
   
r3   F	tiled_mmaacctCrAtCrB	zero_initwg_waitswap_ABc           	   
   C   s   t |rt| |||||dd d S t  t| j}|tjj	|  t
t|jd D ]}t|||d d |f |d d |f | |tjj	d q/t  t |dkr^t| d S d S )NF)r8   r9   r:   r   Tr   )r   gemmr   fencer   make_mma_atomopsetField
ACCUMULATEcutlassrange_constexprsizer   commit_group
wait_group)	r4   r5   r6   r7   r8   r9   r:   mma_atomkr%   r%   r&   r;   A   s   &r;   r   A_idxB_idxc              
   C   s   t |rt| |d d d |||||ddS t| |t}t |d u r&|n|d d d |f }	t |d u r6|n|d d d |f }
t| ||	|
d|d |S )NrI   Fr:   Tr8   r9   )r   gemm_zero_initr   make_rmem_tensorpartition_shape_Cr   r;   )r4   r   r6   r7   rJ   rK   r9   r:   r5   rArBr%   r%   r&   rN   \   s   
  rN   c	                 C   s~   t |rt| |||||||dd	 d S t |d u r|n|d d d |f }	t |d u r+|n|d d d |f }
t| ||	|
||d d S )NFrL   rM   )r   
gemm_w_idxr;   )r4   r5   r6   r7   r8   rJ   rK   r9   r:   rQ   rR   r%   r%   r&   rS   r   s
     rS   thr_mma	shape_mnksAsBc           	      C   s  | j jtjjk}t| rKt| |d d t	}t| r.|d us%J | 
| |}n| 
| |d |d f}|d usBJ | | |}n?t| |d |d ft	}t| rn|d useJ | 
| |}n| 
| |d |d f}|d usJ | | |}|||fS )Nr   r   r   )r>   a_srcr   OperandSourceRMEMr   r   rO   rP   r   make_fragment_Apartition_Apartition_shape_Amake_fragment_Bpartition_B)	rT   rU   rV   rW   r:   is_rsr5   r6   r7   r%   r%   r&   partition_fragment_ABC   s&   



ra   )NN)Fr   F)NNrI   F)F))typingr   r   r   rB   cutlass.cuter   cutlass.utils.hopper_helpersutilshopper_helpersr   cutlass.cute.nvgpur   cutlass.cutlass_dslr   r   r   r	   r
   r   cutlass.utilsr   TileintLayoutComposedLayoutr'   make_smem_layout_epiTensor	TiledCopyboolr3   jitTiledMma	Constexprr;   ShaperN   rS   ThrMmara   r%   r%   r%   r&   <module>   s   				
	

