o
    
۾i                     @   s  d dl mZ d dlmZmZ d dlZd dlmZmZ d dl	m
Z
 d dlmZmZmZ d dlmZmZmZ d dlmZ d	ed
eeef fddZded	edejfddZdedejfddZdedeeejf deeef d
eeejf fddZdeej deeef dedeeef dejd
eeejf fddZdejded
eeejf fddZdee de d e d!ejd"ejd#ejd$e d%eej dejded
eeef fd&d'Z!dS )(    )Sequence)AnycastN)
VllmConfigget_layers_from_vllm_config)AttentionLayerBase)AttentionBackendAttentionMetadataBuilderCommonAttentionMetadata)AttentionSpecKVCacheConfigKVCacheSpec)bind_kv_cachevllm_configreturnc                 C   sH   i }t tt t}t| |}| D ]\}}||  }r!|||< q|S N)r   typer   r   r   itemsget_kv_cache_spec)r   kv_cache_spec
layer_typeattn_layers
layer_nameattn_modulespec r   Q/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/worker/gpu/attn_utils.pyr      s   
r   kv_cache_configdevicec                 C   s   i }g }d }| j D ]J}|j}tt|}ttt t}	t||	|}
|
| 	 }|D ]}|||< q)|
 |j|||}|| | dkrS|d u rN| }q	|| q	||fS )N
FLASHINFER)kv_cache_groupslayer_namesnextiterr   r   r   r   r   get_attn_backendget_builder_clsr   appendget_name_get_workspace_bufferset_workspace_buffer)r   r   r   attn_backendsattn_metadata_buildersflashinfer_workspacekv_cache_group_specr!   any_layer_namer   r   attn_backendr   attn_metadata_builderr   r   r   init_attn_backend"   s*   





r1   c                 C   s|   i }| j D ]}tj|jtj|d}|jD ]}|||< qqt }| jD ]}|jD ]}|	| q'q"|t|
 ks<J d|S )N)dtyper   z)Some layers are not correctly initialized)kv_cache_tensorstorchzerossizeint8	shared_bysetr    r!   addkeys)r   r   kv_cache_raw_tensorskv_cache_tensortensorr   r!   groupr   r   r   _allocate_kv_cache?   s   




r@   r<   r*   c                    s  i }| j D ]}|j}t|tsJ |jD ]w}|| }| |j dks%J | |j }|| }	|	||j|j	|j
 z|	 tt ksJJ W n ttfy^   ttt Y nw t fddD  fddttD }
|j}||}| }|j|
 ||< qq|S )Nr   c                 3   s    | ]} | V  qd S r   r   .0i)kv_cache_shaper   r   	<genexpr>m   s    z$_reshape_kv_cache.<locals>.<genexpr>c                    s   g | ]}  |qS r   )indexrA   )kv_cache_stride_orderr   r   
<listcomp>n   s    z%_reshape_kv_cache.<locals>.<listcomp>)r    r   
isinstancer   r!   numelpage_size_bytesget_kv_cache_shape
block_sizenum_kv_heads	head_sizeget_kv_cache_stride_orderlenAttributeErrorNotImplementedErrortupleranger2   viewpermute)r   r<   r*   	kv_cachesr-   r   r   
raw_tensor
num_blocksr/   	inv_orderr2   r   )rD   rG   r   _reshape_kv_cacheP   s>   





r\   runner_kv_cachesforward_contextc                 C   s&   t ||}t|||}t|||  |S r   )r@   r\   r   )r]   r^   r   r*   r   r<   rX   r   r   r   init_kv_cachez   s   
r_   slot_mappingsc                 C   s6   i }|j }t| |D ]\}}|jD ]}|||< qq
|S r   )r    zipr!   )r`   r   slot_mappings_by_layerr    slot_mappingkv_cache_groupr   r   r   r   build_slot_mappings_by_layer   s   

re   r+   num_reqs
num_tokensquery_start_loc_gpuquery_start_loc_cpuseq_lensmax_seq_lenblock_tablesc
                 C   s   t | }
|d | }i }|	j}t|D ]/\}}|| }|| }t|||||||
||dd
}| | }|jd|d}|jD ]}|||< q=q|S )NT)
query_start_locri   rj   rk   rf   num_actual_tokensmax_query_lenblock_table_tensorrc   causalr   )common_prefix_lencommon_attn_metadata)intmaxr    	enumerater
   buildr!   )r+   rf   rg   rh   ri   rj   rk   rl   r`   r   ro   attn_metadatar    rC   r   block_tablerc   rs   r0   metadatar   r   r   r   build_attn_metadata   s6   

r{   )"collections.abcr   typingr   r   r4   vllm.configr   r   /vllm.model_executor.layers.attention_layer_baser   vllm.v1.attention.backendr   r	   r
   vllm.v1.kv_cache_interfacer   r   r   vllm.v1.worker.utilsr   dictstrr   r   r1   r@   Tensorr\   listr_   re   rt   r{   r   r   r   r   <module>   s   


*



	

