o
    پi                     @  sN  d dl mZ 	 d dlZd dlZd dlZd dlmZmZmZ d dl	m
Z
mZmZmZ d dlZd dlmZmZ e
rCd dlmZ d dlmZ d dlmZmZ d d	lmZmZmZmZ d d
lmZ d dl m!Z! e"e#Z$e! Z%G dd dZ&G dd dZ'G dd dZ(G dd deZ)G dd dZ*G dd dZ+G dd de+Z,G dd dZ-dS )    )annotationsN)EmptyFullQueue)TYPE_CHECKINGList
NamedTupleOptional)HiCacheStorageConfigHiCacheStorageExtraInfo)BaseTokenToKVPoolAllocator)HostKVCache)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)get_attention_dp_rankget_attention_tp_rankget_attention_tp_sizeis_dp_attention_enabled)MLATokenToKVPool)get_device_modulec                   @  s6   e Zd ZdddZdddZddd	Zed
d ZdS )LayerLoadingEvent
num_layersintc                 C  s(   || _ dd t|D | _t | _d S )Nc                 S  s   g | ]}t  qS  )device_moduleEvent.0_r   r   X/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/cache_controller.py
<listcomp>6       z.LayerLoadingEvent.__init__.<locals>.<listcomp>)_num_layersrangeload_eventsr   r   start_eventselfr   r   r   r   __init__4   s   zLayerLoadingEvent.__init__layer_indexc                 C  s0   d|  kr| j k sJ  J | j|   d S Nr   )r"   r$   recordr'   r)   r   r   r   complete9   s   zLayerLoadingEvent.completec                 C  s   t  | j|  d S N)r   current_stream
wait_eventr$   r,   r   r   r   wait=   s   zLayerLoadingEvent.waitc                 C  s
   | j d S N)r$   r'   r   r   r   finish_event@   s   
zLayerLoadingEvent.finish_eventNr   r   )r)   r   )__name__
__module____qualname__r(   r-   r1   propertyr5   r   r   r   r   r   3   s    


r   c                   @  s:   e Zd ZdddZdd Zddd	ZdddZdd ZdS )LayerDoneCounterr   r   c                   s6    | _ d| _ fddt| jD | _d| _d| _d S )N   c                   s   g | ]}t  qS r   )r   r   r   r   r   r    J   r!   z-LayerDoneCounter.__init__.<locals>.<listcomp>r3   )r   num_countersr#   eventsproducer_indexconsumer_indexr&   r   r=   r   r(   F   s
   
zLayerDoneCounter.__init__c                 C  s2   | j d | j | _ | j| j  j sJ d| j S )N   z:Producer finish event should be ready before being reused.)r@   r>   r?   r5   queryr4   r   r   r   update_producerN   s   z LayerDoneCounter.update_producerindexc                 C  s
   || _ d S r.   )rA   )r'   rE   r   r   r   set_consumerW      
zLayerDoneCounter.set_consumer	thresholdc                 C  s$   | j dk rd S | j| j  | d S r*   )rA   r?   r1   )r'   rH   r   r   r   
wait_untilZ   s   
zLayerDoneCounter.wait_untilc                 C  s   d| _ d| _d S r2   )r@   rA   r4   r   r   r   reset_   s   
zLayerDoneCounter.resetNr6   )rE   r   )rH   r   )r7   r8   r9   r(   rD   rF   rI   rJ   r   r   r   r   r;   E   s    

	
r;   c                   @  s6   e Zd ZdZ	ddd
dZedddZdddZdS )CacheOperationr   Nhost_indicestorch.Tensordevice_indicesnode_idr   priorityOptional[int]c                 C  sN   || _ || _|g| _d | _tj| _t jd7  _|d ur!|| _d S | j| _d S )NrB   )rL   rN   node_idsdatarK   counteridrP   )r'   rL   rN   rO   rP   r   r   r   r(   h   s   zCacheOperation.__init__opsList[CacheOperation]returnc                 C  s   t | dksJ t | dkr| d S tdd | D }tdd | D }g }tdd | D }| D ]}||j q3t||d|}||_|S )	Nr   rB   c                 S     g | ]}|j qS r   )rL   r   opr   r   r   r           z,CacheOperation.merge_ops.<locals>.<listcomp>c                 S  rY   r   )rN   rZ   r   r   r   r       r\   c                 s  s    | ]}|j V  qd S r.   rP   rZ   r   r   r   	<genexpr>   s    z+CacheOperation.merge_ops.<locals>.<genexpr>r3   )lentorchcatminextendrR   rK   )rV   rL   rN   rR   rP   r[   	merged_opr   r   r   	merge_opsy   s   zCacheOperation.merge_opsotherc                 C     | j |j k S r.   r]   r'   rf   r   r   r   __lt__      zCacheOperation.__lt__r.   )rL   rM   rN   rM   rO   r   rP   rQ   )rV   rW   rX   rK   )rf   rK   )r7   r8   r9   rT   r(   staticmethodre   ri   r   r   r   r   rK   d   s    rK   c                   @  s&   e Zd ZU ded< ded< ded< dS )
HiCacheAckzdevice_module.Eventr%   r5   	List[int]rR   N)r7   r8   r9   __annotations__r   r   r   r   rl      s   
 rl   c                   @  sR   e Zd ZdZ	ddd	d
ZdddZdddZddddZddddZdd Z	dS ) TransferBufferzW
    Overlapping buffer preparation and transfer operations to improve throughput.
    r<      buffer_countr   max_buffer_sizerX   Nonec                 C  s   || _ t|d| _|| _d S )N)maxsize)
stop_eventr   buffersrr   )r'   ru   rq   rr   r   r   r   r(      s   
zTransferBuffer.__init__boolc                 C  
   | j  S r.   )rv   fullr4   r   r   r   ry      rG   zTransferBuffer.fullc                 C  rx   r.   )rv   emptyr4   r   r   r   rz      rG   zTransferBuffer.emptyTrB   c              
   C  s~   | j  s=z| jj|||d W d S  ty   |sY d S Y q  ty5 } zt| W Y d }~nd }~ww | j  rd S d S Nblocktimeout)ru   is_setrv   putr   	Exceptionloggererror)r'   itemr}   r~   er   r   r   r      s   
zTransferBuffer.putOptional[CacheOperation]c              
   C  sV   z	| j j||dW S  ty   Y d S  ty* } zt| W Y d }~d S d }~ww r{   )rv   getr   r   r   r   )r'   r}   r~   r   r   r   r   r      s   zTransferBuffer.getc                 C  s   | j j  d S r.   )rv   queueclearr4   r   r   r   r      s   zTransferBuffer.clearN)r<   rp   )rq   r   rr   r   rX   rs   rX   rw   )TrB   rX   rs   )rX   r   )
r7   r8   r9   __doc__r(   ry   rz   r   r   r   r   r   r   r   ro      s    

ro   c                   @  s,   e Zd ZdZ			ddddZdddZdS )StorageOperationr   NrL   rM   	token_idsrm   	last_hashOptional[str]
hash_valueOptional[List[str]]prefix_keysc                 C  sJ   || _ || _|| _d| _|d ur|ng | _|| _tj| _t jd7  _d S )Nr   rB   )	rL   r   r   completed_tokensr   r   r   rT   rU   )r'   rL   r   r   r   r   r   r   r   r(      s   zStorageOperation.__init__rf   'StorageOperation'c                 C  rg   r.   )rU   rh   r   r   r   ri      rj   zStorageOperation.__lt__)NNN)
rL   rM   r   rm   r   r   r   r   r   r   )rf   r   )r7   r8   r9   rT   r(   ri   r   r   r   r   r      s    r   c                      s@   e Zd Z		dd fddZdddZdd ZdddZ  ZS )PrefetchOperationN
request_idstrrL   rM   r   rm   r   r   r   r   c                   s8   || _ t | _d| _t | _t j	||||d d S )NFr   )
r   	threadingLock_lock_terminated_flagtime	monotonic
start_timesuperr(   )r'   r   rL   r   r   r   	__class__r   r   r(      s
   

zPrefetchOperation.__init__
num_tokensr   c                 C  sT   | j  | jr	 W d    dS |  j|7  _	 W d    dS 1 s#w   Y  d S )NFT)r   r   r   )r'   r   r   r   r   	increment   s   $zPrefetchOperation.incrementc                 C  s2   | j  d| _W d    d S 1 sw   Y  d S )NT)r   r   r4   r   r   r   mark_terminate   s   "z PrefetchOperation.mark_terminaterX   rw   c                 C  s   | j S r.   )r   r4   r   r   r   is_terminated   s   zPrefetchOperation.is_terminatedNN)
r   r   rL   rM   r   rm   r   r   r   r   )r   r   r   )r7   r8   r9   r(   r   r   r   __classcell__r   r   r   r   r      s    
r   c                   @  sZ  e Zd Z								dodpddZdd Zd d! Z			dqdrd"d#Zd$d% Z		dsdtd&d'Zd(d) Z			*dudvd2d3Z
dwd5d6Z		*dudxd8d9Zdyd<d=Zdzd>d?Zd{d@dAZd|d}dEdFZ		dsd~dNdOZdPdQ ZddRdSZ	ddTdUZddVdWZdXdY ZdZd[ Zdd\d]Zdd_d`Zdadb Z		dsddedfZdddgdhZdddidjZdkdl Zdmdn ZdS )HiCacheControllerwrite_through_selective N   r   rB   token_to_kv_pool_allocatorr   mem_pool_hostr   	page_sizer   tp_grouptorch.distributed.ProcessGroupload_cache_eventthreading.Eventwrite_policyr   
io_backendstorage_backendr   prefetch_threshold
model_namestorage_backend_extra_configOptional[dict]pp_rankpp_sizec              
   C  sL  || _ || _| | _|| _|| _|| _|| _d| _d | _	d | _
|| _|| _| j| _| j| _t | _| jj| _| jj| _t| j| _| j| j |dvrUtd| g | _g | _g | _g | _t | _t| j| _ t| jddd| _!t"# | _$t"# | _%|d urz| j&||	|
|d W d S  ty } ztd| |d }~ww d S )	NF)write_throughr   
write_backzInvalid write policy: 
   d   )rq   rr   )r   r   r   r   z"Failed to create storage backend: )'r   mem_pool_device_allocatorget_kvcachemem_pool_devicer   r   r   r   enable_storager   storage_backend_typer   r   _generic_page_getpage_get_func_generic_page_setpage_set_funcr   r   storage_stop_eventdevice	layer_numr;   layer_done_counterregister_layer_transfer_counter
ValueError
load_queuewrite_queueack_load_queueack_write_queueru   ro   write_bufferload_bufferr   Streamwrite_streamload_streamattach_storage_backend)r'   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(      sZ   






zHiCacheController.__init__c                 C  s|   | j sJ | j rJ tj| jdd| _tj| jdd| _t	 | _
t	 | _t	 | _t	 | _t	 | _| j  | j  dS )zStart storage prefetch/backup threads and their queues.

        This is used by runtime attach, and also by reset when storage is enabled.
        TtargetdaemonN)r   r   r   r   Threadprefetch_thread_funcprefetch_threadbackup_thread_funcbackup_threadr   prefetch_queuebackup_queueprefetch_revoke_queueack_backup_queuehost_mem_release_queuestartr4   r   r   r   _start_storage_threadsI  s   

z(HiCacheController._start_storage_threadsc              	   C  s
  | j   z#t| dr| jd t| dr| jd t| dr'| jd W n	 ty1   Y nw g }t| dr?|| j	 t| drJ|| j
 t| drU|| j |D ]}z|jdd	 W qW tyj   Y qWw d
d |D }|rtddd |D  tddS )z}Stop storage prefetch/backup threads and drain internal queues.

        Caller should ensure no in-flight requests.
        r   Nr   prefetch_bufferr   r   prefetch_io_aux_threadr   )r~   c                 S  s"   g | ]}t |d dd  r|qS )is_alivec                   S  s   dS )NFr   r   r   r   r   <lambda>  s    zDHiCacheController._stop_storage_threads.<locals>.<listcomp>.<lambda>)getattrr   tr   r   r   r      s   " z;HiCacheController._stop_storage_threads.<locals>.<listcomp>z2Failed to stop HiCache storage threads cleanly: %sc                 S  s   g | ]
}t |d t|qS )name)r   reprr   r   r   r   r      s    z/Failed to stop HiCache storage threads cleanly.)r   sethasattrr   
put_nowaitr   r   r   appendr   r   r   joinr   r   RuntimeError)r'   threadsr   aliver   r   r   _stop_storage_threadsa  sB   







z'HiCacheController._stop_storage_threadsc           
      C  s|  | j rtdz|   W n ty } ztd|d}~ww || _ddlm} || _| ||| _| jj	o;| jj
dk| _ddlm} z||| j| j| _| j| j d| _ t|| j| _tdtd| jj| jj  | _d	| _d| _tjj| jd
| _| jdkrddl m!} tj"| j}	||	dd| _#| j$| _%| j&| _'| jdv s| jdkrt(| jj)*ddr| j+| _%| j,| _'| j-.  | /  W dS  ty=   z|   W n	 ty   Y nw zt0| drz	tj1| j# W n	 ty   Y nw d| _#W n
 ty   Y nw zt0| dr| jdurt0| jdr| j2  W n
 ty*   Y nw d| _d| _d| _ | j$| _%| j&| _' w )zAttach (enable) storage backend at runtime.

        Requirement: no in-flight requests. This call is expected to run on the scheduler
        thread (control path), not concurrently with prefetch/backup.
        z!Storage backend already attached.zTCannot attach storage backend: previous detach did not stop storage threads cleanly.Nr   )get_hash_str)StorageBackendFactoryTg?   )grouprB   )create_custom_parallel_groupgloo)group_ranksbackend)hf3fsmooncakeeicnixldynamicinterface_v1prefetch_tp_groupr   closeF)3r   r   r   r   r   $sglang.srt.mem_cache.hicache_storager   _generate_storage_configstorage_configis_mla_modeltp_rankbackup_skipsglang.srt.mem_cache.storager   create_backendr   r   register_mem_pool_hostmaxr   r   r   sizer   prefetch_capacity_limitstorage_batch_sizeprefetch_tokens_occupiedr`   distributedget_world_sizer   tp_world_size%sglang.srt.distributed.parallel_stater   get_process_group_ranksr
  r   r   r   r   rw   extra_configr   _page_get_zero_copy_page_set_zero_copyr   r   r   r   destroy_process_groupr  )
r'   r   r   r   r   r   r   r   r   r  r   r   r   r     s   







z(HiCacheController.attach_storage_backendc              
   C  s  z|    W n ty } ztd| td|d}~ww z"t| dr?| jdur?z	tj	| j W n	 ty;   Y nw d| _W n	 tyI   Y nw zt| dr`| j
dur`t| j
dr`| j
  W n tyo   td Y nw d| _
d| _d| _| j| _| j| _| j  dS )	zDetach (disable) storage backend at runtime.

        Requirement: no in-flight requests. This will stop storage threads and release
        the backend instance (best-effort close).
        zStop storage threads failed: %sz,Stop storage threads failed; detach aborted.Nr
  r   r  z(Failed to close storage backend cleanly.F)r   r   r   	exceptionr   r   r
  r`   r  r"  r   r  r   r   r   r   r   r   r   r   )r'   r   r   r   r   detach_storage_backend  sL   	





z(HiCacheController.detach_storage_backendc              
   C  sj   t  rt | _t | _t | _nt | _t | _d| _t	| j
t}t| j| j| j| j|| jjdk||dS )Nr   
page_first)r  tp_sizer   r   r  is_page_first_layoutr   r  )r   r   r  r   r&  r   dp_rankr   r   
isinstancer   r   r
   r   r   r   layout)r'   r   r   is_mla_backendr   r   r   r  9  s$   

z*HiCacheController._generate_storage_configc                 C  s   | j   | j  | j  | j  | j  | j  | j  | j	  | j
rM| j  | j  | jj  | jj  | jj  | jj  | j   | j  | j
rxtj| jdd| _tj| jdd| _| j  | j  d S d S )NTr   )ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r4   r   r   r   rJ   V  s6   












zHiCacheController.resetr3   rN   rM   rP   rQ   rO   rX   Optional[torch.Tensor]c                 C  s>   | j t|}|du rdS | jt|||| |   |S )zF
        Back up KV caches from device memory to host memory.
        N)r   allocr_   r   r   rK   start_writing)r'   rN   rP   rO   rL   r   r   r   writeu  s   	zHiCacheController.writers   c                 C  s   t | jdkr	d S t| j}| |\}}| j  t }t }|  t	| j
/ || j
 | j| j||| j |  |jrL|| j
 |jrU|| j
 W d    n1 s_w   Y  | jt|||j d S r*   )r_   r   rK   re   move_indicesr   r   r   r+   streamr   r1   r   backup_from_device_all_layerr   r   is_cudarecord_streamr   r   rl   rR   )r'   r[   rL   rN   r%   r5   r   r   r   r.    s*   
zHiCacheController.start_writingrL   c                 C  s6   | j t|}|du rdS | jt|||| |S )zC
        Load KV caches from host memory to device memory.
        N)r   r-  r_   r   r   rK   )r'   rL   rP   rO   rN   r   r   r   load  s   	zHiCacheController.loadr[   rK   c                 C  s   |j |j}}| jdkr|js|j| jdd}||fS | jdkrF| jjdkr8| }|	 \}}||
d|fS | jjdkrD|| fS d S | jdkrQ|| fS td	)
NkernelT)non_blockingdirectlayer_firstr   page_first_directkernel_ascendzUnsupported io backend)rL   rN   r   r3  tor   r   r*  cpusortindex_selectr   )r'   r[   rL   rN   idxr   r   r   r0    s    


zHiCacheController.move_indicesc              	   C  s  t | jdkr	dS | j }t| j}| |\}}| j  | jj| }|j	
  t| j: |j	| j t| jD ]}| j| j|||| j || q>|jr[|| j |jrd|| j W d    n1 snw   Y  | jt|j	|j|jd |S )Nr   r3   )r%   r5   rR   )r_   r   r   rD   rK   re   r0  r   r?   r%   r+   r   r1  r   r1   r#   r   r   load_to_device_per_layerr   r   r-   r3  r4  r   r   rl   r5   rR   )r'   producer_idr[   rL   rN   producer_eventir   r   r   start_loading  sB   


zHiCacheController.start_loadingc                 C  s   | j | t|S r.   )r   freer_   )r'   rN   r   r   r   evict_device  s   zHiCacheController.evict_deviceTbackup_onlyrw   c                 C  s    |st d| j| t|S )Nz.Other eviction policies are not supported yet.)r   r   rF  r_   )r'   rL   rH  r   r   r   
evict_host  s   zHiCacheController.evict_hostr   new_input_tokensrm   r   r   r   r   c                 C  s    t |||||}| j| |S )zI
        Prefetch KV caches from storage backend to host memory.
        )r   r   r   )r'   r   rL   rJ  r   r   	operationr   r   r   prefetch  s
   
zHiCacheController.prefetchc                 C  s   |   |j|jfS r.   )r   r   r   r'   rK  r   r   r   terminate_prefetch
  s   z$HiCacheController.terminate_prefetchc                 C  s8   |  dkrd S || jj}|D ]}| j| qd S r*   )numelsplitr   r   r   r   )r'   rL   pagespager   r   r   append_host_mem_release  s   z)HiCacheController.append_host_mem_releasec                 C  sh   | j |||}d}tt|D ]}|| s'td|j d||  d  n|| j7 }q|| d S )Nr   Prefetch operation  failed to retrieve page .)	r   batch_get_v1r#   r_   r   warningr   r   r   )r'   rK  hash_valuesrL   
extra_inforesultsincrD  r   r   r   r     s   z%HiCacheController._page_get_zero_copyc                   s    fdd|D } j ||}|d u rd S tt|D ]1}|| d u r6td|j d||  d  d S  j|| j	  ||  |
 j	sM d S qd S )Nc                   s   g | ]} j  qS r   )r   get_dummy_flat_data_pager   r4   r   r   r    '  s    
z7HiCacheController._generic_page_get.<locals>.<listcomp>rT  rU  rV  )r   	batch_getr#   r_   r   rX  r   r   set_from_flat_data_pager   r   )r'   rK  rY  rL   rZ  dummy_page_dst	page_datarD  r   r4   r   r   &  s(   
z#HiCacheController._generic_page_getc                 C  s   |j }tdt|j| jD ]L}|j||| j  }|j|| j |t| | j  }|j}t|d}| 	|||| |j|t|| j  krM|
   d S |rYt|dkrY||7 }qd S Nr   r   )r   r#   r_   r   r  rL   r   r   r   r   r   )r'   rK  r   rD  batch_hashesbatch_host_indicesprev_completed_tokensrZ  r   r   r   _page_transfer<  s$   
z HiCacheController._page_transferc                 C  sp   | j  s6z | jjddd}|du rW q | | | |j|jd  W n	 ty.   Y q w | j  rdS dS )zN
        Auxiliary function conducting IO operations for prefetching.
        TrB   r|   N)	r   r   r   r   rf  rS  rL   r   r   rM  r   r   r   prefetch_io_aux_funcS  s   

z&HiCacheController.prefetch_io_aux_funcc                 C  s   | j | jkrdS dS )zb
        Rate limit the prefetching operations to avoid overwhelming the storage backend.
        TF)r  r  r4   r   r   r   prefetch_rate_limitedd  s   z'HiCacheController.prefetch_rate_limitedtuple[list[str], int]c                 C  s  |j }|j}|jr|j nd }d}g }tdt|| j| j D ]h}t|| j| j  t|}||| }	g }
tdt|	| jD ]}| 	|	||| j  |}|

| q@t|d}| j|
|}||
d |  ||| j 7 }|t|
k r| ||fS |rt|dkr||
7 }q ||fS rb  )r   r   r   copyr#   r_   r   r  rb   r   r   r   r   batch_existsrc   )r'   rK  r   tokens_to_fetchr   storage_query_countr   r   endbatch_tokensrc  rD  rZ  hit_page_numr   r   r   _storage_hit_queryn  s:   
z$HiCacheController._storage_hit_queryc                 C  sv  t  | _tj| jdd| _| j  | j r| j	
 sz| j	jddd}|du r+W q| |\}}| jdkrPtj|tjd}tjj|tjjj| jd | }|| jk rp| j|j | |j td|j d	| d
 n2|d|| j  |_ | |j|d  |jd| |_tdt!|j  d|j d | j| W n	 t"y   Y qw | j r| j	
 rdS dS )zT
        Manage prefetching operations from storage backend to host memory.
        Tr   rB   r|   N)dtype)r[   r   zRevoking prefetch for request z due to insufficient hits (z).zPrefetching z pages for request rV  )#r   r   r   r   rg  r   r   r   r   r   rz   r   rq  r  r`   tensorr   r  
all_reduceReduceOpMINr
  r   r   r   r   r   rS  rL   r   debugr   r   r_   r   )r'   rK  r   storage_hit_countstorage_hit_count_tensorr   r   r   r     sV   


z&HiCacheController.prefetch_thread_funcr   r   c                 C  s"   t ||||d}| j| |jS )zF
        Write KV caches from host memory to storage backend.
        )r   r   )r   r   r   rU   )r'   rL   r   r   r   rK  r   r   r   write_storage  s
   
zHiCacheController.write_storagec                   s*    fddt t|D }j||S )Nc                   s"   g | ]}j  |j  qS r   )r   get_data_pager   )r   rD  rL   r'   r   r   r      s    z7HiCacheController._generic_page_set.<locals>.<listcomp>)r#   r_   r   	batch_set)r'   rY  rL   rZ  rS   r   r|  r   r     s   
z#HiCacheController._generic_page_setc                 C  s   t | j|||S r.   )allr   batch_set_v1)r'   rY  rL   rZ  r   r   r   r!    s   z%HiCacheController._page_set_zero_copyc                 C  s   |j }tdt|j| jD ]Q}|j||| j  }|j|| j |t| | j  }t|d}| |||}|sFt	
dt| d  d S |rRt|dkrR||7 }| j| jt| 7  _qd S )Nr   r   zWrite page to storage: z pages failed.)r   r#   r_   r   r  rL   r   r   r   r   rX  r   )r'   rK  r   rD  rc  rd  rZ  successr   r   r   _page_backup  s"   
zHiCacheController._page_backupc                 C  sl   | j  s4z| jjddd}|du rW q | js| | | j| W n	 ty,   Y q w | j  rdS dS )zO
        Manage backup operations from host memory to storage backend.
        TrB   r|   N)	r   r   r   r   r  r  r   r   r   rM  r   r   r   r     s   

z$HiCacheController.backup_thread_func)r   r   Nr   NNr   rB   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   NN)r   r   r   r   r   r   r   r   r   )r   r   r   r   r2   )rN   rM   rP   rQ   rO   r   rX   r,  r   )rL   rM   rP   rQ   rO   r   rX   r,  )r[   rK   )rX   r   )rN   rM   rX   r   )T)rL   rM   rH  rw   rX   r   )r   r   rL   rM   rJ  rm   r   r   r   r   rX   r   )rL   rM   r.   r   )rX   ri  )
rL   rM   r   rm   r   r   r   r   rX   r   ) r7   r8   r9   r(   r   r   r   r$  r  rJ   r/  r.  r5  r0  rE  rG  rI  rL  rN  rS  r   r   rf  rg  rh  rq  r   rz  r   r!  r  r   r   r   r   r   r      sh    	P1u7"



'





 6r   ).
__future__r   loggingr   r   r   r   r   r   typingr   r   r   r	   r`   r  r
   r   sglang.srt.mem_cache.allocatorr   %sglang.srt.mem_cache.memory_pool_hostr   sglang.srt.distributedr   r   sglang.srt.layers.dp_attentionr   r   r   r    sglang.srt.mem_cache.memory_poolr   sglang.srt.utilsr   	getLoggerr7   r   r   r   r;   rK   rl   ro   r   r   r   r   r   r   r   <module>   s4    
)+ 