o
    ˜à·iÿ  ã                   @   sÔ  d dl Z d dlmZ d dlmZ eeƒZe ¡ r,zd dlZ	dZ
W n ey+   dZ
Y nw dZ
e ¡ rFzd dlZ	dZW n eyE   dZY nw dZdeeedB f fdd„ZdeeedB f fd	d
„ZdeeedB f fdd„Zdd„ Zeƒ d  r‹d dlmZmZmZmZmZmZmZ nG dd„ dƒZeZeZeZeZeZeZde jdededee je jf fdd„Z				d#de jde jde jde jdede jde jde dB dede jdB d e jdB dee je jf fd!d"„Z!dS )$é    N)Úinit_logger)Úcurrent_platformTFÚreturnc                   C   s   t sdS tsdS dS )N)Fz¤vllm._flashmla_C is not available, likely was not compiled due to insufficient nvcc version or a supported arch was not in the list of target arches to compile for.)FzZvllm._flashmla_extension_C is not available, likely was not compiled due to a build error.©TN)Ú_flashmla_C_AVAILABLEÚ_flashmla_extension_C_AVAILABLE© r   r   úT/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/attention/ops/flashmla.pyÚ_is_flashmla_available!   s
   r
   c                  C   s(   t ƒ \} }| sd|fS t d¡sdS dS )úC
    Return: is_supported_flag, unsupported_reason (optional).
    FéZ   )Fz3FlashMLA Dense is only supported on Hopper devices.r   ©r
   r   Úis_device_capability_family©Úis_availableÚmaybe_reasonr   r   r	   Úis_flashmla_dense_supported3   s   

r   c                  C   s2   t ƒ \} }| sd|fS t d¡st d¡sdS dS )r   Fr   éd   )FzBFlashMLA Sparse is only supported on Hopper and Blackwell devices.r   r   r   r   r   r	   Úis_flashmla_sparse_supported?   s   
ÿþr   c                  O   s   t ƒ \}}t|p	dƒ‚)NzFlashMLA is not available)r
   ÚRuntimeError)Ú_argsÚ_kwargsÚ_Úreasonr   r   r	   Ú_raise_flashmla_unavailableQ   s   
r   )ÚFlashMLASchedMetaÚflash_attn_varlen_funcÚflash_attn_varlen_kvpacked_funcÚ flash_attn_varlen_qkvpacked_funcÚflash_mla_sparse_fwdÚflash_mla_with_kvcacheÚget_mla_metadatac                   @   s   e Zd ZdS )r   N)Ú__name__Ú
__module__Ú__qualname__r   r   r   r	   r   b   s    r   Úcache_seqlensÚnum_q_tokens_per_head_kÚnum_heads_kc                 C   s"   t ƒ d stƒ  tjj | ||¡S )Nr   )r
   r   ÚtorchÚopsÚ_flashmla_extension_CÚ#get_mla_decoding_metadata_dense_fp8)r%   r&   r'   r   r   r	   Úget_mla_metadata_dense_fp8m   s   
ýr,   ÚqÚk_cacheÚblock_tableÚ
head_dim_vÚtile_scheduler_metadataÚ
num_splitsÚsoftmax_scaleÚcausalÚ	descale_qÚ	descale_kc                 C   sT   t ƒ d stƒ  |d u r| jd d }tjj | |||||||||	|
¡\}}||fS )Nr   éÿÿÿÿg      à¿)r
   r   Úshaper(   r)   r*   Úfwd_kvcache_mla_fp8)r-   r.   r/   r%   r0   r1   r2   r3   r4   r5   r6   ÚoutÚsoftmax_lser   r   r	   Úflash_mla_with_kvcache_fp8{   s$   
õr<   )NFNN)"r(   Úvllm.loggerr   Úvllm.platformsr   r"   ÚloggerÚis_cudaÚvllm._flashmla_CÚvllmr   ÚImportErrorÚvllm._flashmla_extension_Cr   ÚtupleÚboolÚstrr
   r   r   r   Ú-vllm.third_party.flashmla.flash_mla_interfacer   r   r   r   r   r    r!   ÚTensorÚintr,   Úfloatr<   r   r   r   r	   Ú<module>   sŽ   ÿÿ
&ÿþý
üõÿþýüûúùø	÷
öõô