o
    .i                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZ d dlmZ G dd deZdS )    )IteratorN)
VllmConfig)current_platform)AttentionBackend)KVCacheConfig)LoadStoreSpecOffloadingManager)ARCOffloadingManager)
CPUBackend)LRUOffloadingManager)CPULoadStoreSpecGPULoadStoreSpec)OffloadingSpec)CpuGpuOffloadingHandlers)OffloadingHandlerc                
       sv   e Zd Zdedef fddZdefddZdee	e
jf d	ee	ee f deeee ee ef  fd
dZ  ZS )CPUOffloadingSpecvllm_configkv_cache_configc                    s   t  || | jd}|std|d usJ dd |jD }t|dks)J | }|t|j |j	j
 }|| j| j  }|dkrJt|| nd| _d | _d | _| jdd| _d S )	Ncpu_bytes_to_usez?cpu_bytes_to_use must be specified in kv_connector_extra_configc                 S   s   h | ]}|j jqS  )kv_cache_specpage_size_bytes).0kv_cache_groupr   r   S/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/kv_offload/cpu.py	<setcomp>!   s    z-CPUOffloadingSpec.__init__.<locals>.<setcomp>   r   eviction_policylru)super__init__extra_configget	Exceptionkv_cache_groupslenpopkv_cache_tensorsparallel_config
world_sizeoffloaded_block_sizegpu_block_sizeint
num_blocks_manager	_handlersr   )selfr   r   r   
page_sizesr   kv_bytes_per_blockkv_bytes_per_offloaded_block	__class__r   r   r       s8   
zCPUOffloadingSpec.__init__returnc                 C   s   | j s=| jj}|d uo|j}t| j| jd}| jdkr%t||d| _ | j S | jdkr4t	||d| _ | j S t
d| j d| j S )N)
block_sizer-   r   )backendenable_eventsarczUnknown eviction policy: z. Supported policies: lru, arc)r.   r   kv_events_configenable_kv_cache_eventsr
   r*   r-   r   r   r	   
ValueError)r0   r;   r9   r8   r   r   r   get_manager>   s*   

zCPUOffloadingSpec.get_manager	kv_cachesattn_backendsc                 c   sd    | j st stdt|| j| j| j|d| _ | j d us J tt	| j j
fV  t	t| j jfV  d S )Nz=CPU Offloading is currently only supported on CUDA-alike GPUs)r@   r+   cpu_block_sizenum_cpu_blocks
gpu_caches)r/   r   is_cuda_aliker#   r   r+   r*   r-   r   r   gpu_to_cpu_handlercpu_to_gpu_handler)r0   r?   r@   r   r   r   get_handlersX   s    zCPUOffloadingSpec.get_handlers)__name__
__module____qualname__r   r   r    r   r>   dictstrtorchTensortyper   r   tupler   r   rG   __classcell__r   r   r4   r   r      s    (r   )collections.abcr   rM   vllm.configr   vllm.platformsr   vllm.v1.attention.backendr   vllm.v1.kv_cache_interfacer   vllm.v1.kv_offload.abstractr   r   vllm.v1.kv_offload.arc_managerr	   vllm.v1.kv_offload.backends.cpur
   vllm.v1.kv_offload.lru_managerr   vllm.v1.kv_offload.mediumsr   r   vllm.v1.kv_offload.specr   !vllm.v1.kv_offload.worker.cpu_gpur    vllm.v1.kv_offload.worker.workerr   r   r   r   r   r   <module>   s   