o
    پi6                     @   sv   d dl Z d dlmZ d dlmZ ddlmZmZmZm	Z	m
Z
 edefddZd	ed
efddZd	ed
efddZdS )    N)contextmanager)Path   )InputLayoutencode_nameenumerate_hmma_flash_kernelsenumerate_qmma_flash_kernelsgenerate_filespathc              	   c   s8    t  }t |  zdV  W t | dS t | w )z8Context manager to temporarily change working directory.N)osgetcwdchdir)r
   original_dir r   e/home/ubuntu/.local/lib/python3.10/site-packages/flashinfer/jit/attention/fmha_v2/generate_kernels.pyworking_directory   s   
r   
src_targetgen_dirc                 C   s`   |j ddd |d }| s| r|  |j| dd |d j dd |d j dd dS )	zHSetup output directory with symlinks to TensorRT-LLM source directories.T)parentsexist_oksrc)target_is_directory	generated)r   binN)mkdir
is_symlinkexistsunlink
symlink_to)r   r   src_linkr   r   r   _setup_output_directory   s   r    c           	         s$  t | | t| g }t|dddd t|dddgd t|dddgdd g }d	d
 }|D ]>  j} j} j}||rG fdd|D n g||rWfdd|D n||rffdd|D n| q/dd |D }dd |D }t| W d    d S 1 sw   Y  d S )Nx   bf16   )smdtypehead_size_v	e4m3_fp32   )r$   r%   
head_sizes)r$   r%   r)   output_dtypec                 S   s   t | ttfS )N)
isinstancelisttuple)xr   r   r   <lambda>7   s    z#enumerate_kernels.<locals>.<lambda>c                    s   g | ]} j |d qS ))seq_len_replace).0s)kspecr   r   
<listcomp>=   s    z%enumerate_kernels.<locals>.<listcomp>c                    "   g | ]} D ]}|j |d qqS ))	head_sizer1   )r3   dtmp_kstmp_expr   r   r6   B      " c                    r7   ))r%   r1   )r3   dtr:   r;   r   r   r6   G   r=   c                 S   s   g | ]
}|j |jkr|qS r   )r$   sm_mmar3   r5   r   r   r   r6   N   s    c                 S   s  g | ]\}|j d kr0|jdv r0|jdkr0|jdkr0|jdu r0|jdkr0|js0|jr0|jt	j
ksH|j dkrN|jdv rN|jdkrN|jrN|jdkrN|jsN|jrH|j dkry|jdv ry|jd kry|jdkry|jdu ry|jdkry|jsy|jry|jt	j
ksH|j d	v r|jd
v r|jdkr|jdkr|jt	jkr|jdu r|jdkr|js|jr|js|jsH|j d	v r|jdv r|jdkr|jdkr|jt	j
kr|jdu r|jdkr|js|jr|jr|jr|js|jr|jrH|j dkr|jdv r|jdkr|jdv r|js|jr|jr|jt	jkr|js|jrH|j dkr_|jdv r_|jdv r_|jdv r_|jdkr_|js_|jr_|js_|jt	jkr_|jdksW|jdksW|js|gt|R qS )P   )fp16r"   	fp16_fp32e4m3r'      r   N   Z   )rB   r"   rC   d   )rG   rH   r!   )r"   r'   i@  i   )r"   rD   r'   r(   r#   )rA   r#   ))@   rI   rE   Y   ))rI       rK   )rB   r"   )r$   r%   r8   r&   sage_block_sizesversion	cross_mhaflash_attentioninput_layoutr   SEPARATE_Q_K_Vldgsts_q
Q_PAGED_KVwarp_specializationtiledalibienable_attn_logit_softcapping
PACKED_QKVr*   r   r@   r   r   r   r6   P   s    


























_)	r    r   r   r   r0   r8   r%   extendr	   )	r   r   specsspecs_expanded	list_liketmp_stmp_d	tmp_dtypespecs_namesr   )r5   r<   r   enumerate_kernels(   sD   


f "ra   )r   
contextlibr   pathlibr   generator_utilsr   r   r   r   r	   r   r    ra   r   r   r   r   <module>   s    	
