o
    c۷i                     @   sz   d dl mZ d dlmZ d dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZmZ d dlmZ eddG d	d
 d
eZdS )    )Tuple)	dataclassN)Boolean
const_exprInt32)TensorMapUpdateModeTensorMapManager)llvmT)frozenc                   @   s   e Zd ZdZejdejdejdeddfddZ	ejd	e
ejd
f de
ejd
f de
ejd
f dede
ejd
f ddfddZejde
ejd
f dede
ejd
f de
ed
f deje
ed
f  ddfddZdS )TensorMapManagerSm90aI  
    We have to subclass cutlass.utils.TensorMapManager bc it takes in warp_id and only
    perform the operation if warp_id matches the current warp.
    But for Hopper pingpong gemm we want to call it with warp_id 0 and 4.
    So we take in a boolean `is_manager_warp` to determine whether to perform the operation or not.
    	copy_atomdst_ptris_manager_warpreturnNc                 C   sL   |rt j  t jj|| W d    n1 sw   Y  t j  d S )N)cutearch	elect_onenvgpucpasynccopy_tensormap	sync_warp)selfr   r   r    r   M/home/ubuntu/vllm_env/lib/python3.10/site-packages/quack/tensormap_manager.pyinit_tensormap_from_atom   s   
z-TensorMapManagerSm90.init_tensormap_from_atomtensor_gmem.tma_copy_atomtensormap_gmem_ptrtensormap_smem_ptrc           
      C   s  |rt | jtjkrt|||D ]\}}}tjj||| qtj	
  tj	  tj	jddd W d    n1 s<w   Y  tj	  t | jtjkrbt||D ]\}	}tjj|	| qSd S t|||D ]\}}}	tjj|||	 qhtj	  tjj  d S d S )Nr   Tread)r   tensormap_update_moder   SMEMzipr   r   r   update_tma_descriptorr   r   cp_async_bulk_commit_groupcp_async_bulk_wait_groupr   cp_fence_tma_desc_releasefence_tma_desc_release)
r   r   r   r   r   r   r   tensorsmem_ptrgmem_ptrr   r   r   update_tensormap    s,   



z%TensorMapManagerSm90.update_tensormapshapesordersc              
   C   s  |rt | jtjkr9t|||D ](\}}}|  }	tjd |	t	| t	| gd| ddddtj
jd qtj  tj  tjjddd W d    n1 sVw   Y  tj  t | jtjkr|t||D ]\}
}tjj|
| qmd S t|t|  krt|ksJ  J t|||D ](\}
}}|
  }tjd |t	| t	| gd	| d
dddtj
jd qtj  tjj  d S d S )Nz{
	.reg .b64 smem_ptr_i64;
	cvt.u64.u32 smem_ptr_i64, $0;
	tensormap.replace.tile.global_dim.shared::cta.b1024.b32 [smem_ptr_i64], z	, $1;
	}
zr,rTF)has_side_effectsis_align_stackasm_dialectr   r   z9tensormap.replace.tile.global_dim.global.b1024.b32 [$0], z, $1;zl,r)r   r!   r   r"   r#   tointir_valuer	   
inline_asmr   
AsmDialectAD_ATTr   r   r   r%   r&   r   r   r   r'   lenr(   )r   r   r   r   r-   r.   r*   shapeordersmem_ptr_i32r+   gmem_ptr_i64r   r   r   update_tensormap_shapeA   sN   


(

	z+TensorMapManagerSm90.update_tensormap_shape)__name__
__module____qualname____doc__r   jitCopyAtomPointerr   r   r   Tensorr,   r   cutlass	Constexprintr<   r   r   r   r   r      sP    	 
r   )typingr   dataclassesr   rE   cutlass.cuter   cutlass.cutlass_dslr   r   r   cutlass.utilsr   r   cutlass._mlir.dialectsr	   r   r   r   r   r   <module>   s   