o
    پim                  #   @   sn  d dl mZmZ d dlmZ d dlmZ d dlZd dlm	Z	 d dlm
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlZG dd deZG dd deeZG dd deeZdej j!ej j"ej j#dZ$					d(dedededededee dee% de&de&de&de&d e'd!e'd"e&d#ee d$ee d%df"d&d'Z(i e(_)dS ))    )OptionalTuple)partial)TensorN)Float32
const_expr)GemmSm90)	GemmSm100)GemmDefaultEpiMixin)GemmActMixin)get_device_capacityget_max_active_clusters)GemmWrapperBasec                   @   sX   e Zd ZejZejZej	d
dedeej	df dej	de
ej	 de
ej	 f
dd	ZdS )GemmDActMixinNparamsepi_loop_tensors.tRS_rDtRS_rCreturnc           	      C   sj  |d usJ t j| |||d d t|| j}|| | j t|j	d urt
|jj| j}t| jdk rWtjt|ddD ]}|	|| || \||< ||< qCnJtjt|d ddD ]:}|	|d|  |d| d  f|d|  |d| d  f\\|d| < |d| d < \|d| < |d| d < qcn|}t|| j}|| | j |S )N)r   d   T)unroll_full      )r
   epi_visit_subtilecutemake_fragment_like	acc_dtypestoreloadtor   act_fnmake_fragmentlayoutshapearchcutlassrangesizepostact_dtype)	selfr   r   r   r   
tRS_rC_acctRS_rPostActitRS_rPostAct_out r.   C/home/ubuntu/.local/lib/python3.10/site-packages/quack/gemm_dact.pyr      s.   "	zGemmDActMixin.epi_visit_subtile)N)__name__
__module____qualname__r   EpilogueArgumentsEpilogueParamsr   jitr   r   r   r   r.   r.   r.   r/   r      s     r   c                   @      e Zd ZdS )GemmDActSm90Nr0   r1   r2   r.   r.   r.   r/   r7   ?       r7   c                   @   r6   )GemmDActSm100Nr8   r.   r.   r.   r/   r:   C   r9   r:   )Nrelurelu_sqgelu_tanh_approxT   ABOutPreActPostActtile_count_semaphore
activationtile_Mtile_N	cluster_M	cluster_Npingpong
persistentmax_swizzle_sizecu_seqlens_mA_idxr   c           %      C   s  |d ur6|s
J d|  ddksJ d| ddks J d| ddks+J d| ddks6J d|d u}|rL|d usDJ d|
dksLJ d	|tv sWJ d
| tj| |||d|i||d\}}}}}tj||d ud t| dddddd}t|| t| j}|d dv sJ d|d dkrt	nt
}t}||f}|	|
df}||d j|d j||d j|d j|d jstd|rt|	|
 nd}t|| t| }||d j|}tj|||d}t|d |||||j|} t }!tj|||||||d u|||d u|d udd}"tj}#|"|#vrQ|d dkr*t|||d}|||d j|||d}$t|$|d j|d j|d j|d j||| |!	|#|"< |#|" |d j|d j|d j|d j||| |! d S ) Nz!varlen_m requires persistent=Truer   z!varlen_m requires A to be k-majorz#varlen_m requires Out to be n-majorz&varlen_m requires PreAct to be n-majorz'varlen_m requires PostAct to be n-majorz9gather_A requires varlen (cu_seqlens_m must be specified)zgather_A requires cluster_N=1zUnsupported activation rC   )additional_tensorsrM   rN   )varlen_m)mkl)nrS   rT   )rR   rU   rT   )r?   r@   DCrC   r   )	   
   z!Only SM90 and SM100 are supportedrX   r?   r@   rV   z;Skipping due to unsupported combination of types and majors)rL   )r?   r@   rV   rC   rW   )key_tensor_names)rJ   is_persistent)gather_ArW   )stridedact_fn_mapr   validate_and_prepare_tensorspermute_tensorsextract_dtypesdetermine_major_ordersr   devicer:   r7   r   is_valid_dtypesdtypemajor	TypeErrorr   create_cute_tensorsr3   cute_tensorcreate_scheduler_argscreate_varlen_argsnum_epi_tensormapscutlass_torchcurrent_streamget_compile_key	gemm_dactcompile_cacher   r   compile)%r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   r\   LMKNtensor_infosmajor_configsdevice_capacityGemmClsr   tile_shape_mncluster_shape_mnkmax_active_clustersr    epi_argsscheduler_argsvarlen_argsrn   compile_keycachegemmr.   r.   r/   rp   O   s   	



rp   )TTr>   NN)*typingr   r   	functoolsr   torchr   r%   cutlass.cuter   r   r   cutlass.torchrm   quack.gemm_sm90r   quack.gemm_sm100r	   quack.gemm_default_epir
   quack.gemm_actr   quack.cute_dsl_utilsr   r   quack.gemm_wrapper_utilsr   quack.activationquackr   r7   r:   rE   dreludrelu_sqdgelu_tanh_approxr^   strintboolrp   rq   r.   r.   r.   r/   <module>   s   *	

 
	