o
    ˜à·ig  ã                	   @   sF  d dl mZ d dlmZ d dlmZ eeƒZdZe 	¡ r+d dl
mZ d dlmZmZ nQe ¡ rEd dlmZ d dlmZ ejZejZejZn7e ¡ r|z
d d	lmZ d
ZW n eyg   dededefdd„ZY nw dededdfdd„Zd dlmZ ejZddededB fdd„Zdefdd„Zdefdd„Zdd„ Zdefdd„ZdS )é    )ÚAny)Úinit_logger©Úcurrent_platformF)Úreshape_and_cache_flash)Úflash_attn_varlen_funcÚget_scheduler_metadata)Ú_custom_ops)Úxpu_ops)r   TÚargsÚkwargsÚreturnc                  O   s   t dƒ‚)Nz\ROCm platform requires upstream flash-attn to be installed. Please install flash-attn first.)ÚImportError©r   r   © r   úY/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/attention/backends/fa_utils.pyr   '   s   ÿr   Nc                  O   s   d S )Nr   r   r   r   r   r   .   s   r   Úrequires_alibic              	   C   s  ddl m} | ¡ rdS | ¡ rd S ziddlm}m} | ¡ }|d us%J ‚|jdkr0|dƒr0dnd}ddl	m
} |ƒ }|d urI|jjd urI|jj}|jdkrY|dkrYt d	¡ d}| rf|dkrft d
¡ d}||ƒsst d|||ƒ¡ ||ƒsyJ ‚|W S  ttfy‡   Y d S w )Nr   r   é   )Úfa_version_unsupported_reasonÚis_fa_version_supportedé	   é   )Úget_current_vllm_config_or_noneé
   zJCannot use FA version 3 on Blackwell platform, defaulting to FA version 2.z?Cannot use FA version 3 with ALiBi, defaulting to FA version 2.z3Cannot use FA version %d is not supported due to %s)Úvllm.platformsr   Úis_xpuÚis_rocmÚ)vllm.vllm_flash_attn.flash_attn_interfacer   r   Úget_device_capabilityÚmajorÚvllm.configr   Úattention_configÚflash_attn_versionÚloggerÚwarning_onceÚerrorr   ÚAssertionError)r   r   r   r   Údevice_capabilityÚ
fa_versionr   Úvllm_configr   r   r   Úget_flash_attn_version7   sJ   ÿÿÿýÿr*   c                   C   s   t ƒ dko	t d¡S )Nr   éZ   )r*   r   Úis_device_capability_familyr   r   r   r   Úflash_attn_supports_fp8t   s   
þr-   c                   C   s   t  ¡ rdS tƒ dkS )NTr   )r   r   r*   r   r   r   r   Úflash_attn_supports_sinks{   s   
r.   c               	   C   sR   ddl m}  |  ¡ r'zddlm} |dƒo|  d¡W S  ttfy&   Y dS w dS )Nr   r   )r   r   r+   F)r   r   Úis_cudar   r   r,   r   r&   )r   r   r   r   r   Úflash_attn_supports_mla‚   s   ÿþþr0   c                   C   s$   t  ¡ st  ¡ r
dS t  ¡ rtS dS )aÑ  Check if flash_attn_varlen_func is available.

    This function determines whether the flash_attn_varlen_func imported at module
    level is a working implementation or a stub.

    Platform-specific sources:
    - CUDA: vllm.vllm_flash_attn.flash_attn_varlen_func
    - XPU: xpu_ops.flash_attn_varlen_func
    - ROCm: upstream flash_attn.flash_attn_varlen_func (if available)

    Note: This is separate from the AITER flash attention backend (rocm_aiter_fa.py)
    which uses rocm_aiter_ops.flash_attn_varlen_func. The condition to use AITER is
    handled separately via _aiter_ops.is_aiter_found_and_supported().

    Returns:
        bool: True if a working flash_attn_varlen_func implementation is available.
    TF)r   r/   r   r   Ú_ROCM_FLASH_ATTN_AVAILABLEr   r   r   r   Ú#is_flash_attn_varlen_func_available“   s
   r2   )F)Útypingr   Úvllm.loggerr   r   r   Ú__name__r#   r1   r/   Úvllm._custom_opsr   Úvllm.vllm_flash_attnr   r   r   Úvllmr	   ÚopsÚvllm._xpu_opsr
   r   Ú
flash_attnr   ÚboolÚintr*   r-   r.   r0   r2   r   r   r   r   Ú<module>   s:   þ	=