o
    .i%                     @   s"  U d Z ddlmZ ddlmZmZ ddlmZmZ ddl	m
Z
 ddlmZ er-ddlmZ e
eZG dd	 d	eZG d
d deedZG dd deedZejjejjejjejjejjejjdZi Zeeef ed< i Z eeef ed< 		ddeeB dedB de!dee"ge"f fddZ#dS )zAttention backend registry    )Callable)EnumEnumMeta)TYPE_CHECKINGcast)init_logger)resolve_obj_by_qualname)AttentionBackendc                       s&   e Zd ZdZdef fddZ  ZS )_AttentionBackendEnumMetazDMetaclass for AttentionBackendEnum to provide better error messages.namec                    sN   zt  |W S  ty&   td| j }d|}td| d| dw )z0Get backend by name with helpful error messages.zdict[str, Enum]z, zUnknown attention backend: 'z'. Valid options are: N)super__getitem__KeyErrorr   __members__keysjoin
ValueError)clsr   membersvalid_backends	__class__ `/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/attention/backends/registry.pyr      s   
z%_AttentionBackendEnumMeta.__getitem__)__name__
__module____qualname____doc__strr   __classcell__r   r   r   r   r
      s    r
   c                   @   s   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZd%dedefddZd&dd Zdefd!d"Zd'd#d$Z dS )(AttentionBackendEnuma  Enumeration of all supported attention backends.

    The enum value is the default class path, but this can be overridden
    at runtime using register_backend().

    To get the actual backend class (respecting overrides), use:
        backend.get_class()
    z;vllm.v1.attention.backends.flash_attn.FlashAttentionBackendzHvllm.v1.attention.backends.flash_attn_diffkv.FlashAttentionDiffKVBackendz=vllm.v1.attention.backends.triton_attn.TritonAttentionBackendz9vllm.v1.attention.backends.rocm_attn.RocmAttentionBackendz=vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackendzEvllm.v1.attention.backends.mla.aiter_triton_mla.AiterTritonMLABackendzCvllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackendzNvllm.v1.attention.backends.mla.rocm_aiter_mla_sparse.ROCMAiterMLASparseBackend z7vllm.v1.attention.backends.flashinfer.FlashInferBackendzBvllm.v1.attention.backends.mla.flashinfer_mla.FlashInferMLABackendz:vllm.v1.attention.backends.mla.triton_mla.TritonMLABackendz<vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackendz7vllm.v1.attention.backends.mla.flashmla.FlashMLABackendzDvllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackendz@vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackendz4vllm.v1.attention.backends.ipex.IpexAttentionBackendz:vllm.v1.attention.backends.no_attention.NoAttentionBackendz>vllm.v1.attention.backends.flex_attention.FlexAttentionBackendz9vllm.v1.attention.backends.tree_attn.TreeAttentionBackendzSvllm.v1.attention.backends.rocm_aiter_unified_attn.RocmAiterUnifiedAttentionBackendz7vllm.v1.attention.backends.cpu_attn.CPUAttentionBackendNTinclude_classnamereturnc                 C   D   t | | j}|std| j d| j d|s |ddd }|S zGet the class path for this backend (respects overrides).

        Returns:
            The fully qualified class path string

        Raises:
            ValueError: If Backend.CUSTOM is used without being registered
        zBackend z= must be registered before use. Use register_backend(Backend.z, 'your.module.YourClass').   r   )_ATTN_OVERRIDESgetvaluer   r   rsplitselfr"   pathr   r   r   get_pathU      	
zAttentionBackendEnum.get_pathtype[AttentionBackend]c                 C      t |  S a  Get the backend class (respects overrides).

        Returns:
            The backend class

        Raises:
            ImportError: If the backend class cannot be imported
            ValueError: If Backend.CUSTOM is used without being registered
        r   r/   r-   r   r   r   	get_classh      
zAttentionBackendEnum.get_classc                 C      | t v S zCheck if this backend has been overridden.

        Returns:
            True if the backend has a registered override
        )r(   r5   r   r   r   is_overriddent      z"AttentionBackendEnum.is_overriddenc                 C      t | d dS z>Clear any override for this backend, reverting to the default.N)r(   popr5   r   r   r   clear_override|      z#AttentionBackendEnum.clear_overrideTr#   r1   r#   N)!r   r   r   r   
FLASH_ATTNFLASH_ATTN_DIFFKVTRITON_ATTN	ROCM_ATTNROCM_AITER_MLAROCM_AITER_TRITON_MLAROCM_AITER_FAROCM_AITER_MLA_SPARSE
TORCH_SDPA
FLASHINFERFLASHINFER_MLA
TRITON_MLACUTLASS_MLAFLASHMLAFLASHMLA_SPARSEFLASH_ATTN_MLAIPEXNO_ATTENTIONFLEX_ATTENTION	TREE_ATTNROCM_AITER_UNIFIED_ATTNCPU_ATTNCUSTOMboolr   r/   r6   r:   r?   r   r   r   r   r    "   sH    	
r    )	metaclassc                   @   s^   e Zd ZdZdZdZdZdZdZdZ	dd	e
d
efddZdddZd
e
fddZdddZdS )MambaAttentionBackendEnuma  Enumeration of all supported mamba attention backends.

    The enum value is the default class path, but this can be overridden
    at runtime using register_backend().

    To get the actual backend class (respecting overrides), use:
        backend.get_class()
    z=vllm.v1.attention.backends.mamba1_attn.Mamba1AttentionBackendz=vllm.v1.attention.backends.mamba2_attn.Mamba2AttentionBackendzDvllm.v1.attention.backends.short_conv_attn.ShortConvAttentionBackendz=vllm.v1.attention.backends.linear_attn.LinearAttentionBackendz7vllm.v1.attention.backends.gdn_attn.GDNAttentionBackendNTr"   r#   c                 C   r$   r%   )_MAMBA_ATTN_OVERRIDESr)   r*   r   r   r+   r,   r   r   r   r/      r0   z"MambaAttentionBackendEnum.get_pathr1   c                 C   r2   r3   r4   r5   r   r   r   r6      r7   z#MambaAttentionBackendEnum.get_classc                 C   r8   r9   )r^   r5   r   r   r   r:      r;   z'MambaAttentionBackendEnum.is_overriddenc                 C   r<   r=   )r^   r>   r5   r   r   r   r?      r@   z(MambaAttentionBackendEnum.clear_overriderA   rB   rC   )r   r   r   r   MAMBA1MAMBA2
SHORT_CONVLINEARGDN_ATTNrZ   r[   r   r/   r6   r:   r?   r   r   r   r   r]      s    	
r]   )mamba1mamba2
short_convlinear_attentiongdn_attentioncustomr(   r^   NFbackend
class_pathis_mambar#   c                    sH   dt dt f fdd}|dur"r|t < dd S |t < dd S |S )a  Register or override a backend implementation.

    Args:
        backend: The AttentionBackendEnum member to register
        class_path: Optional class path. If not provided and used as
            decorator, will be auto-generated from the class.

    Returns:
        Decorator function if class_path is None, otherwise a no-op

    Examples:
        # Override an existing attention backend
        @register_backend(AttentionBackendEnum.FLASH_ATTN)
        class MyCustomFlashAttn:
            ...

        # Override an existing mamba attention backend
        @register_backend(MambaAttentionBackendEnum.LINEAR, is_mamba=True)
        class MyCustomMambaAttn:
            ...

        # Register a custom third-party attention backend
        @register_backend(AttentionBackendEnum.CUSTOM)
        class MyCustomBackend:
            ...

        # Direct registration
        register_backend(
            AttentionBackendEnum.CUSTOM,
            "my.module.MyCustomBackend"
        )
    r   r#   c                    s8   r| j  d| j t < | S | j  d| j t < | S )Nr&   )r   r   r^   r(   )r   rj   rl   r   r   	decorator   s
   z#register_backend.<locals>.decoratorNc                 S   s   | S )Nr   )xr   r   r   <lambda>   s    z"register_backend.<locals>.<lambda>)typer^   r(   )rj   rk   rl   rn   r   rm   r   register_backend   s   &rr   )NF)$r   collections.abcr   enumr   r   typingr   r   vllm.loggerr   vllm.utils.import_utilsr   vllm.v1.attention.backendr	   r   loggerr
   r    r]   r_   r   r`   ra   rb   rc   rZ   MAMBA_TYPE_TO_BACKEND_MAPr(   dictr   __annotations__r^   r[   rq   rr   r   r   r   r   <module>   s@   _@
