o
    .i]                     @   s"  d dl mZ d dlmZmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZ e
eZG d	d
 d
eZ					d dedejdedB dedB dedededededB dee fddZededee fddZdedee fddZededee fddZdS )!    )cache)
NamedTuplecastget_argsN)
CacheDType)init_logger)resolve_obj_by_qualname)AttentionBackendAttentionType)MAMBA_TYPE_TO_BACKEND_MAPMambaAttentionBackendEnumc                   @   s~   e Zd ZU eed< ejed< edB ed< edB ed< dZe	ed< dZ
e	ed< dZe	ed	< dZe	ed
< ejZeed< dd ZdS )AttentionSelectorConfig	head_sizedtypeNkv_cache_dtype
block_sizeFuse_mlahas_sink
use_sparseuse_mm_prefix	attn_typec                 C   sN   d| j  d| j d| j d| j d| j d| j d| j d| j d	| j d
S )Nz"AttentionSelectorConfig(head_size=z, dtype=z, kv_cache_dtype=z, block_size=z
, use_mla=z, has_sink=z, use_sparse=z, use_mm_prefix=z, attn_type=)	r   r   r   r   r   r   r   r   r   )self r   W/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/attention/selector.py__repr__    s$   
z AttentionSelectorConfig.__repr__)__name__
__module____qualname__int__annotations__torchr   r   r   boolr   r   r   r
   DECODERr   strr   r   r   r   r   r      s   
 
r   Fr   r   r   r   r   r   r   r   r   returnc	                 C   s|   |durt t}	||	v sJ d| d|	 ddlm}
 |
 }|jj}t| |ttdB |||||||p5tj	d	}t
||dS )z=Selects which attention backend to use and lazily imports it.NzInvalid kv_cache_dtype: z. Valid values are: r   )get_current_vllm_configr   )backendattn_selector_config)r   r   vllm.configr'   attention_configr(   r   r   r
   r$   _cached_get_attn_backend)r   r   r   r   r   r   r   r   r   valid_cache_dtypesr'   vllm_configbackend_enumr)   r   r   r   get_attn_backend.   s2   
r0   r)   c                 C   sp   ddl m} |j| |d}|std|j t|} |  }|d ur6ddlm} || t	
d||   | S )Nr   )current_platform)r)   zInvalid attention backend for )set_kv_cache_layoutz(Using %s KV cache layout for %s backend.)vllm.platformsr1   get_attn_backend_cls
ValueErrordevice_namer   get_required_kv_cache_layout vllm.v1.attention.backends.utilsr2   loggerinfoget_name)r(   r)   r1   attention_clsrequired_layoutr2   r   r   r   r,   Y   s(   
r,   
mamba_typec                 C   s   t | S )zASelect which mamba attention backend to use and lazily import it.)_cached_get_mamba_attn_backend)r>   r   r   r   get_mamba_attn_backendy   s   r@   c              
   C   sp   | rt | ts	J d }z
t|  }t| }W n ty1 } ztd| dttj  |d }~ww |	 }|S )Nz'Invalid mamba attention backend type: 'z'. Valid backends are: )

isinstancer%   r   r   KeyErrorr5   list__members__keys	get_class)r>   selected_backendbackend_nameemamba_attn_backendr   r   r   r?      s"   r?   )FFFFN)	functoolsr   typingr   r   r   r"   vllm.config.cacher   vllm.loggerr   vllm.utils.import_utilsr   vllm.v1.attention.backendr	   r
   #vllm.v1.attention.backends.registryr   r   r   r9   r   r    r   r%   r#   typer0   r,   r@   r?   r   r   r   r   <module>   sj   	

+
