o
    i                     @   sP   d dl Z d dlmZ e rd dlmZ n
e rd dlm	Z G dd dZ
dS )    N)current_platform)_custom_ops)xpu_opsc                   @   sx   e Zd Zedejdededeejejf fddZedejdejd	ejd
ejdejde	dejdejddfddZ
dS )PagedAttentionkv_cachenum_kv_heads	head_sizereturnc                 C   sT   d|    }| jd }| d }||||| d|}| d }||||d}||fS )N      r   )element_sizeshapeview)r   r   r   x
num_blocks	key_cachevalue_cache r   V/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/attention/ops/paged_attn.pysplit_kv_cache   s   
zPagedAttention.split_kv_cachekeyvaluer   r   slot_mappingkv_cache_dtypek_scalev_scaleNc              
   C   s    t | |||| ||| d S )N)opsreshape_and_cacheflatten)r   r   r   r   r   r   r   r   r   r   r   write_to_paged_cache   s   z#PagedAttention.write_to_paged_cache)__name__
__module____qualname__staticmethodtorchTensorinttupler   strr    r   r   r   r   r      s>    	r   )r%   vllm.platformsr   is_cuda_alikevllmr   r   is_xpuvllm._xpu_opsr   r   r   r   r   r   <module>   s   