o
    پiz                     @   s  d dl mZmZmZ d dlZd dlmZ deej dedej	fddZ
ej				dd	ejd
ejdejdejdejdejdejdejdee deje deje deje ddfddZej	dd	ejd
ejdejdejdejdejdee ddfddZdS )    )TypeCallableOptionalNdtypek_dimreturnc              	   C   s   t | jd }t || }t |d dkrdn|d dkr!dn	|d dkr)dnd| }|dkr4dn|dkr:dn|dkr@d	nd
}|dkrHd	n|d	krNdnd}tt|||dtjt |d dkrfdnd|fddS )N      r   @                      )r   r   )order)cutlass
const_exprwidthcutemake_composed_layoutmake_swizzlemake_ordered_layout)r   r   
dtype_bytebytes_per_rowsmem_k_block_sizeswizzle_bitsswizzle_base r   Y/home/ubuntu/.local/lib/python3.10/site-packages/flash_attn_origin/cute/ampere_helpers.pyget_smem_layout_atom   s,   "
r    F	tiled_mmaacctCrAtCrBtCsAtCsBsmem_thr_copy_Asmem_thr_copy_Bhook_fn	A_in_regs	B_in_regsswap_ABc                 C   sl  t |rt| |||||||||
|	dd d S ||}||}t |	 r2t||d |d  t |
 rCt||d |d  t t|jd D ]e}|t|jd d k rt |	 rwt||d d |d f |d d |d f  t |
 rt||d d |d f |d d |d f  t| ||d d |f |d d |f | t |dko|d ur|  qNd S )NF)r*   r+   r,   NNr   r   r   r   )	r   r   gemmretiler   copyrange_constexprsizeshape)r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   tCrA_copy_viewtCrB_copy_viewkr   r   r   r.   "   sH   



""&r.   c           	   
   C   s   | |}t||d |d  tt|jd D ]G}t|t|jd d k rAt||d d |d f |d d |d f  t| ||d d |f |d d |f | t|dko]|d urb|  qd S )Nr-   r   r   r   )	r/   r   r0   r   r1   r2   r3   r   r.   )	r!   r"   r#   r$   r&   r(   r)   r5   r6   r   r   r   gemm_rsV   s   

*&r7   )NFFF)N)typingr   r   r   r   cutlass.cuter   NumericintComposedLayoutr    jitTiledMmaTensor	TiledCopy	Constexprboolr.   r7   r   r   r   r   <module>   sn   
	
3