o
    ii	                     @   sJ   d dl mZmZ d dlmZ d dlmZ d dlmZ eG dd dZ	dS )    )AnyLiteral)field_validator)config)AttentionBackendEnumc                   @   s   e Zd ZU dZdZedB ed< 	 dZed dB ed< 	 dZ	e
ed< 	 dZeed	< 	 dZe
ed
< 	 dZe
ed< 	 dZe
dB ed< 	 dZe
ed< 	 dZe
ed< 	 defddZedddededefddZdS )AttentionConfigz/Configuration for attention mechanisms in vLLM.Nbackend)      flash_attn_versionFuse_prefill_decode_attention    (flash_attn_max_num_splits_for_cuda_graphuse_cudnn_prefillT"use_trtllm_ragged_deepseek_prefilluse_trtllm_attentiondisable_flashinfer_prefill!disable_flashinfer_q_quantizationreturnc                 C   s&   ddl m}m} g }|| |}||S )a$  
        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        r   )get_hash_factorshash_factors)vllm.config.utilsr   r   )selfr   r   ignored_factorsfactors r   K/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/config/attention.pycompute_hash.   s   
zAttentionConfig.compute_hashbefore)modevaluec                 C   s   t |trt|  S |S )z6Enable parsing of the `backend` enum type from string.)
isinstancestrr   upper)clsr    r   r   r   validate_backend_before<   s   
z'AttentionConfig.validate_backend_before)__name__
__module____qualname____doc__r   r   __annotations__r   r   r   boolr   intr   r   r   r   r   r"   r   r   classmethodr   r%   r   r   r   r   r      s0   
 
r   N)
typingr   r   pydanticr   r   r   #vllm.v1.attention.backends.registryr   r   r   r   r   r   <module>   s   