o
    پi=                     @   s   d dl mZ d dlmZ d dlZd dlmZ d dlmZmZm	Z	 d dl
mZ d dlmZmZ d dlmZ eG dd	 d	eZG d
d dZdS )    )Optional)	dataclassN)Int32Boolean
const_expr)
LayoutEnum)ArgumentsBase
ParamsBase)TensorMapManagerSm90c                   @   sV   e Zd ZU dZeej ed< dZeej ed< dZ	eej ed< dZ
eej ed< dS )VarlenArgumentsNmCuSeqlensMmCuSeqlensKmTensormapsmAIdx)__name__
__module____qualname__r   r   cuteTensor__annotations__r   r   r    r   r   F/home/ubuntu/.local/lib/python3.10/site-packages/quack/varlen_utils.pyr      s
   
 r   c                   @   s  e Zd ZdZeG dd deZededfdddded	e	e
jj d
e	ej de	ej de	ej dee	ej  dedededefddZeddddedefddZeej		dTddddededededededeeB dd fddZd edefd!d"Zd edefd#d$Zd%ejd edejfd&d'Zd edejfd(d)Zd*ejd edejfd+d,Zd-ejd edejfd.d/Z	dUd0e	ej d1ejd2eeB ddfd3d4Z 	dUd5e	ej d6eej d2eeB ddfd7d8Z!dVd9d:Z"ej	dUd ed;e#d<e#d2eeB ddf
d=d>Z$ej	dUd ed?e#d@ee dAee d2eeB ddfdBdCZ%ejdUd2eeB ddfdDdEZ&ejdUd2eeB ddfdFdGZ'de	ej fdHdIZ(de	ej fdJdKZ)de	ej fdLdMZ*dee	ej  fdNdOZ+dPdQ Z,dRdS Z-dS )WVarlenManager   c                   @   sz   e Zd ZU dZeej ed< dZeej ed< dZ	eej ed< dZ
eej ed< eejddddedd fd	d
ZdS )zVarlenManager.ParamsNcu_seqlens_mcu_seqlens_k
tensormapsr   locipargsreturnc                C   s   t j| j| j| j| jdS )N)r   r   r   r   )r   Paramsr   r   r   r   r    r   r   r   r   r   create"   s   zVarlenManager.Params.create)r   r   r   r   r   r   r   r   r   r   r   staticmethodjitr   r$   r   r   r   r   r"      s   
 "r"   TNr   paramstensormap_managertensormap_a_ptrtensormap_b_ptrtensormap_d_ptrtensormap_epi_ptrslen_m_staticlen_k_staticlast_batch_idxis_group_changedc                C   s|   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	t
|jd u| _t
|jd u| _t
|jd u| _|| _|| _d S N)r(   r)   _tensormap_a_ptr_tensormap_b_ptr_tensormap_d_ptr_tensormap_epi_ptrs_len_m_static_len_k_static_last_batch_idx_is_group_changedr   r   varlen_mr   varlen_kr   gather_A_loc_ip)selfr(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r   r   r   r   r   __init__,   s   
zVarlenManager.__init__r    r!   c                C   s.   | j d ur| jd urJ dtjj| ||dS )Nz(Only support either varlen_m or varlen_kr   )r   r   r   r"   r$   r#   r   r   r   to_underlying_argumentsL   s   z%VarlenManager.to_underlying_argumentsFr   has_Dnum_epi_tensormapspingpongwarp_idxc             
      sL  d d\}	}
}d g| }t  jd u}t  jd u}t |p|rttjjjtj	t
t
j t
j t |rnt rB|d nd}|t |r^ j|d f j}s[dnd7  fddt|D }n,|srJ t  jd u}t | r jdd f j}	 j|sdndd f j}
t |	|
||||dS )	N)NNN   r         c              	      s4   g | ]}  j|sd nd  df jqS )rH   rI   N)get_tensormap_ptrr   iterator).0ir(   rE   tensormap_epi_offsetr)   tensormap_workspace_idxr   r   
<listcomp>t   s    z(VarlenManager.create.<locals>.<listcomp>)r.   r/   )r   r   r   r
   cutlassutilsTensorMapUpdateModeGMEMr   bytes_per_tensormapr   make_layoutarchgrid_dim	block_idxrJ   r   rK   ranger   )r(   rC   rD   r.   r/   rE   rF   r   r   r*   r+   r,   r-   r;   r<   tensormap_d_idxr=   r   rN   r   r$   S   sV   


zVarlenManager.create	batch_idxc                 C   ,   t | jr| jj|d  | jj|  S | jS NrH   )r   r;   r(   r   r7   r@   r]   r   r   r   len_m      
zVarlenManager.len_mc                 C   r^   r_   )r   r<   r(   r   r8   r`   r   r   r   len_k   rb   zVarlenManager.len_kmA_mklc                 C   s`   | j }t| jrt|j| df|}|S t| jr'td|j| f|}|S |d d |f }|S Nr   )r(   r   r;   r   domain_offsetr   r<   r   )r@   rd   r]   r(   mA_mkr   r   r   offset_batch_A      

zVarlenManager.offset_batch_Ac                 C   s`   | j }t| jrt|j| f|j}|S t| jr't|j| f|j}|S |jd |f }|S r2   )	r(   r   r;   r   rf   r   r   r<   r   )r@   r]   r(   mAIdx_mkr   r   r   offset_batch_AIdx   ri   zVarlenManager.offset_batch_AIdxmB_nklc                 C   s<   | j }t| jrtd|j| f|}|S |d d |f }|S re   )r(   r   r<   r   rf   r   )r@   rl   r]   r(   mB_nkr   r   r   offset_batch_B      
zVarlenManager.offset_batch_BmD_mnlc                 C   s<   | j }t| jrt|j| df|}|S |d d |f }|S re   )r(   r   r;   r   rf   r   )r@   rp   r]   r(   mD_mnr   r   r   offset_batch_epi   ro   zVarlenManager.offset_batch_epi
tma_atom_a
tma_atom_bis_manager_warpc                 C   sB   t | jrt | j r| j|| j| | j|| j| d S d S r2   )r   r<   r=   r)   init_tensormap_from_atomr3   r4   )r@   rs   rt   ru   r   r   r   init_tensormap_AB   s   
zVarlenManager.init_tensormap_AB
tma_atom_dtma_atoms_epic                 C   sX   t | jr(t | jd ur| j|| j| t|| jD ]\}}| j||| qd S d S r2   )r   r;   r5   r)   rv   zipr6   )r@   rx   ry   ru   tma_atomtensormap_epi_ptrr   r   r   init_tensormap_epi   s   
z VarlenManager.init_tensormap_epic                 C   s   | j   d S r2   )r)   fence_tensormap_initialization)r@   r   r   r   fence_tensormap_init   s   z"VarlenManager.fence_tensormap_inita_layoutb_layoutc           	      C   s   t | jrat|| jk| _|| _| jrc| jj}| jg}||d  g}t |tj	kr+dndg}t | j
 rT|d| j |d||d   |dt |tj	krQdnd | jj||||d d d S d S d S )NrH   r   ru   shapesorderstensormap_smem_ptr)r   r<   r   r9   r:   r(   r   r4   r   	ROW_MAJORr=   insertr3   r)   update_tensormap_shape)	r@   r]   r   r   ru   r   tensormap_ptrsr   r   r   r   r   update_tensormap_AB   s*   

z!VarlenManager.update_tensormap_ABd_layout
epi_shapes
epi_ordersc                 C   s   t | jrgt|| jk| _|| _|d urt | rdndnd }| jri| jj}g g g }}	}
t | jd urJ|	| j |		||d   |
	| |
| j |	
| |

| | jj|||	|
d d d S d S d S )Nr   rH   r   )r   r;   r   r9   r:   is_m_major_cr(   r   r5   appendextendr6   r)   r   )r@   r]   r   r   r   ru   order_dr   r   r   r   r   r   r   update_tensormap_epi  s0   
	



z"VarlenManager.update_tensormap_epic                 C   sL   t | jr | jr"|r$t | j r| j| j | j| j d S d S d S d S r2   )r   r<   r:   r=   r)   fence_tensormap_updater3   r4   )r@   ru   r   r   r   fence_tensormap_update_AB(  s   

z'VarlenManager.fence_tensormap_update_ABc                 C   sd   t | jr*| jr,|r.t | jd ur| j| j | jD ]}t |d ur)| j| qd S d S d S d S r2   )r   r;   r:   r5   r)   r   r6   )r@   ru   r|   r   r   r   fence_tensormap_update_epi0  s   


z(VarlenManager.fence_tensormap_update_epic                 C   0   d }t | jo
| jd ur| j| jtjj}|S r2   )r   r<   r3   r)   rJ   r   AddressSpacegeneric)r@   tma_desc_a_ptrr   r   r   get_tma_desc_a_ptr:     
z VarlenManager.get_tma_desc_a_ptrc                 C   s&   d }t | jr| j| jtjj}|S r2   )r   r<   r)   rJ   r4   r   r   r   )r@   tma_desc_b_ptrr   r   r   get_tma_desc_b_ptrB  s   

z VarlenManager.get_tma_desc_b_ptrc                 C   r   r2   )r   r;   r5   r)   rJ   r   r   r   )r@   tma_desc_d_ptrr   r   r   get_tma_desc_d_ptrJ  r   z VarlenManager.get_tma_desc_d_ptrc                 C   sT   d gt | j }t| jr(t| jD ]\}}t|d ur'| j|tjj	||< q|S r2   )
lenr6   r   r;   	enumerater)   rJ   r   r   r   )r@   tma_desc_epi_ptrsrM   r|   r   r   r   get_tma_desc_epi_ptrsR  s   
z#VarlenManager.get_tma_desc_epi_ptrsc              
   C   sd   g g }| _ | j| j| j| j| j| j| j| j| j	| j
f
D ]}t|}||7 }| j t| q|S r2   )_values_posr(   r)   r3   r4   r5   r6   r7   r8   r9   r:   rR   extract_mlir_valuesr   r   )r@   valuesobj
obj_valuesr   r   r   __extract_mlir_values__\  s    
z%VarlenManager.__extract_mlir_values__c                 C   s~   g }t | j| j| j| j| j| j| j| j| j	| j
g
| jD ]\}}|t||d |  ||d  }q| jt|d| jiS )Nr   )rz   r(   r)   r3   r4   r5   r6   r7   r8   r9   r:   r   r   rR   new_from_mlir_values	__class__tupler>   )r@   r   obj_listr   n_itemsr   r   r   __new_from_mlir_values__o  s$   z&VarlenManager.__new_from_mlir_values__)Fr   )T)r!   N).r   r   r   rV   r   r	   r"   r   r   r   rR   rS   TensorMapManagerr   PointerlistrA   r%   r   rB   r&   boolintr$   ra   rc   r   rh   rk   rn   rr   CopyAtomrw   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      s    
	

 	@




#	
r   )typingr   dataclassesr   rR   cutlass.cuter   r   r   r   cutlass.utilsr   quack.cute_dsl_utilsr   r	   quack.tensormap_managerr
   r   r   r   r   r   r   <module>   s   