o
    پi|"                     @  s   d dl mZ d dlZd dlZd dlZd dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZ d dlmZmZ d d	lmZ erNd d
lmZ eeZG dd dZdS )    )annotationsN)TYPE_CHECKING)HiCacheController)BaseTokenToKVPoolAllocator)BasePrefixCache)MHATokenToKVPoolMLATokenToKVPoolReqToTokenPool)MHATokenToKVPoolHostMLATokenToKVPoolHost)
ServerArgs)Reqc                   @  sX   e Zd ZdZd%ddZd&ddZdd Zdd Zd'ddZdd Z	dd  Z
d(d"d#Zd$S ))DecodeKVCacheOffloadManagerz@Manage decode-side KV cache offloading lifecycle and operations.req_to_token_poolr	   token_to_kv_pool_allocatorr   tp_grouptorch.distributed.ProcessGroup
tree_cacher   server_argsr   returnNonec                 C  s   || _ || _|j| _|| _d| _|| _| j }t|tr+t	||j
|j| j|j| _nt|tr>t||j
|j| j|j| _ntd|| _tjj| jd| _t| j| j| j||jt |j|j|jd	| _i | _i | _t !d d S )Nr   z,Unsupported KV cache type for decode offload)group)	r   mem_pool_host	page_sizer   
io_backendload_cache_eventstorage_backend
model_namestorage_backend_extra_configz'Enable offload kv cache for decode side)"r   r   r   r   request_counterr   get_kvcache
isinstancer   r
   hicache_ratiohicache_sizehicache_mem_layoutdecode_host_mem_poolr   r   
ValueErrorr   torchdistributedget_world_sizetp_world_sizer   hicache_io_backend	threadingEventhicache_storage_backendserved_model_name$hicache_storage_backend_extra_configcache_controllerongoing_offloadongoing_backuploggerinfo)selfr   r   r   r   r   kv_cache r8   l/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/disaggregation/decode_kvcache_offload_manager.py__init__!   sP   


z$DecodeKVCacheOffloadManager.__init__boolc                 C  sT  | j du s
| jdu rdS |jdkst|jdkrdS | jj|j }| dks-| dkr/dS |j	|jdd  }t|j	| j
 | j
 }t|| }|| j
 | j
 }|dkrXdS ||| }}||| }	||| }
|dkry| j|d|  |  jd7  _| j}| j j|
 |d}|du rtd|j  dS |||	t |f| j|< dS )	z-Offload incremental KV cache for decode side.NFr      )device_indicesnode_idz#Not enough host memory for request T)r1   r%   req_pool_idxlen
output_idsr   req_to_tokendimnumelorigin_input_idsr   r   freer   writelongr4   errorridtimer2   )r6   reqtoken_indices
all_tokensprefill_offloaded_lenincremental_lenincremental_aligned_lenstartendincremental_tokensincremental_indicesack_idhost_indicesr8   r8   r9   offload_kv_cacheV   sJ   
z,DecodeKVCacheOffloadManager.offload_kv_cachec                 C  st   | j }tjt|j|j gtjd}| jdkr%tj	j
|tj	jj| jd tt| \}}| | | | dS )zRCheck the progress of offload from device to host and backup from host to storage.)dtyper=   )opr   N)r1   r'   tensorrA   ack_write_queueack_backup_queueqsizeintr*   r(   
all_reduceReduceOpMINr   maptolist_check_offload_progress_check_backup_progress)r6   ccqsizesn_writen_backupr8   r8   r9   check_offload_progress   s   

z2DecodeKVCacheOffloadManager.check_offload_progressc                 C  sz   |dkr;| j jd\}}}|  |D ]}| j|\}}}}	}
| ||
 | ||||	|
 q|d8 }|dksdS dS )z2Check the progress of offload from device to host.r   r=   N)r1   r]   popsynchronizer2   _release_finished_req_trigger_backup)r6   finish_count_finish_eventack_listrW   rM   rX   rU   
start_timerP   r8   r8   r9   rf      s*   
z3DecodeKVCacheOffloadManager._check_offload_progressrM   r   rP   r`   c                 C  sP   |  }| jj|j||f }| j| | j| | j jt|j	8  _d S )N)
pop_committed_kv_cacher   rC   r@   r   rG   r   protected_size_rA   prefix_indices)r6   rM   rP   kv_committed_len
kv_indicesr8   r8   r9   ro      s   z1DecodeKVCacheOffloadManager._release_finished_reqc              
   C  sn   t |D ]0}| jj }|j}| j|\}}}| j| t	
d| dt| dt | dd qdS )z2Check the progress of backup from host to storage.zFinished backup request z, free host memory, len:z, cost time:z.2fz	 seconds.N)ranger1   r^   getidr3   rm   r%   rG   r4   r5   rA   rL   )r6   rq   rr   storage_operationrW   req_idrX   ru   r8   r8   r9   rg      s   $z2DecodeKVCacheOffloadManager._check_backup_progressc           
      C  s\   |  |jd| }|dkr|d nd}|  ||}| jj|||d}	|j||f| j|	< dS )z*Trigger async backup from host to storage.Nr   r<    )
hash_value)_compute_prefix_hashrF   r1   write_storagerK   r3   )
r6   rM   rX   rU   ru   rP   prefill_hasheslast_prefill_hashpage_hashesrW   r8   r8   r9   rp      s   z+DecodeKVCacheOffloadManager._trigger_backupr   c                 C  sN   g }|}t dt|| jD ]}|||| j  }| j||}|| q|S )Nr   )r{   rA   r   r1   get_hash_strappend)r6   tokens
prior_hashr   	last_hashoffsetpage_tokensr8   r8   r9   r      s   z0DecodeKVCacheOffloadManager._compute_prefix_hashN)r   r	   r   r   r   r   r   r   r   r   r   r   )r   r;   )rM   r   rP   r`   )r   )__name__
__module____qualname____doc__r:   rY   rl   rf   ro   rg   rp   r   r8   r8   r8   r9   r      s    

58
r   )
__future__r   loggingr,   rL   typingr   r'   $sglang.srt.managers.cache_controllerr   sglang.srt.mem_cache.allocatorr   &sglang.srt.mem_cache.base_prefix_cacher    sglang.srt.mem_cache.memory_poolr   r   r	   %sglang.srt.mem_cache.memory_pool_hostr
   r   sglang.srt.server_argsr   "sglang.srt.managers.schedule_batchr   	getLoggerr   r4   r   r8   r8   r8   r9   <module>   s     
