o
    ÔÙ¾i¼Ø  ã                   @  s6  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZmZmZmZ d dlZd dlmZmZ d dlmZmZmZmZmZmZ d dlmZmZmZ d dlm Z m!Z!m"Z" d d	l#m$Z$m%Z%m&Z&m'Z'm(Z( d d
l)m*Z* d dl+m,Z, erŒd dl-m.Z. d dl/m0Z0 e 1e2¡Z3G dd„ de$ƒZ4dS )é    )ÚannotationsN)ÚEmpty)ÚTYPE_CHECKINGÚDictÚListÚOptional)ÚHiCacheControllerÚPrefetchOperation)ÚEvictParamsÚEvictResultÚInsertParamsÚInsertResultÚMatchPrefixParamsÚMatchResult)ÚMHATokenToKVPoolÚMLATokenToKVPoolÚNSATokenToKVPool)ÚMHATokenToKVPoolHostÚMLATokenToKVPoolHostÚNSATokenToKVPoolHost)Ú
RadixCacheÚRadixKeyÚTreeNodeÚcompute_node_hash_valuesÚsplit_node_hash_value)ÚStorageMetricsCollector)Úbind_to_closest_numa_node_cuda)ÚCacheInitParams)Ú
ServerArgsc                      sÈ  e Zd Zd‡ fdd„Zdd„ Zd‘dd„Z				d’d“d!d"„Zd”d#d$„Zd%d&„ Zd'd(„ Z	d•d.d/„Z
d–d1d2„Z‡ fd3d4„Zd—d7d8„Zd˜d9d:„Zd™d—d<d=„Zd—d>d?„Zd™d—d@dA„Zd™dBdC„ZdDdE„ ZdFdG„ Zd—dHdI„Zd—dJdK„Zd—dLdM„ZdšdPdQ„Zd—dRdS„Zd—dTdU„Zd›dWdX„Z	dœdd[d\„Z	dœdžd_d`„ZdŸdadb„Zdcdd„ Zdedf„ Z d didj„Z!d dkdl„Z"d¡dndo„Z#d¢dpdq„Z$d£drds„Z%d¤dudv„Z&		d¥d¦d}d~„Z'd§dd‚„Z(d§dƒd„„Z)d¨d‡dˆ„Z*d©d‹dŒ„Z+dªdŽd„Z,‡  Z-S )«ÚHiRadixCacheÚparamsr   Úserver_argsr   c                   s  |j | _|jdkr|jdkrd|_t d¡ |jstƒ  |j| _|j	 
¡ | _t| jtƒr>t| j|j|j| j|j|jd| _n4t| jtƒrVt| j|j|j| j|j|jd| _nt| jtƒrnt| j|j|j| j|j|jd| _ntdƒ‚|j| _tjj| jd| _|j| _|j| _|jd u| _ | j o’|j | _!|j"| _"|  #|j$¡\}}}}}| j%| _&|j'| _(t) *¡ | _+t,|j	| j| j| j| j+|j-|j|j||j.|| j| jd| _/| j0|j||||| j | j!| j"d	 i | _1i | _2i | _3i | _4i | _5|j-d
krôdnd| _6d| _7t8 9| j:¡ t;ƒ | _<t=ƒ j>|d d S )NÚdirectÚ
page_firstÚpage_first_directz`Page first layout is not supported with direct IO backend, switching to page first direct layout)Úallocator_typez*HiRadixCache only supports MHA and MLA yet©Úgroup)	Úload_cache_eventÚwrite_policyÚ
io_backendÚstorage_backendÚprefetch_thresholdÚ
model_nameÚstorage_backend_extra_configÚpp_rankÚpp_size©r+   r,   Úprefetch_timeout_baseÚprefetch_timeout_per_ki_tokenÚ hicache_storage_pass_prefix_keysÚenable_storageÚenable_storage_metricsÚextra_metric_labelsÚwrite_throughé   é   é
   )r    )?Úenable_metricsÚ_enable_metrics_flagÚhicache_io_backendÚhicache_mem_layoutÚloggerÚwarningÚdisable_hicache_numa_detectr   Ú	page_sizeÚtoken_to_kv_pool_allocatorÚget_kvcacheÚkv_cacheÚ
isinstancer   r   Úhicache_ratioÚhicache_sizeÚhicache_storage_backendÚtoken_to_kv_pool_hostr   r   r   r   Ú
ValueErrorÚtp_cache_groupÚtp_groupÚtorchÚdistributedÚget_world_sizeÚtp_world_sizer/   r0   r5   r6   r7   Ú#_parse_storage_backend_extra_configÚ$hicache_storage_backend_extra_configÚ#_prefetch_timeout_check_linear_funcÚis_prefetch_timeoutÚhicache_storage_prefetch_policyÚprefetch_stop_policyÚ	threadingÚEventr(   r   Úhicache_write_policyÚserved_model_nameÚcache_controllerÚ_apply_storage_runtime_configÚongoing_write_throughÚongoing_load_backÚongoing_prefetchÚongoing_backupÚprefetch_loaded_tokens_by_reqidÚwrite_through_thresholdÚload_back_thresholdÚatexitÚregisterÚshutdownÚsetÚevictable_host_leavesÚsuperÚ__init__)Úselfr    r!   Úextra_configr,   r2   r3   r4   ©Ú	__class__© úV/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/mem_cache/hiradix_cache.pyrl   5   s¼   

ÿ
ú
ú
ú	ÿú
óøÿzHiRadixCache.__init__c                 C  s:   z| j r|  ¡  W dS W dS  ty   t d¡ Y dS w )zðBest-effort auto-detach of storage backend on process shutdown.

        This keeps startup and runtime behavior consistent: if a backend was attached
        (either via CLI args or via admin API), we attempt to detach it on exit.
        z5Failed to detach storage backend on process shutdown.N)r5   Údetach_storage_backendÚ	Exceptionr@   Ú	exception©rm   rq   rq   rr   rh   ª   s   ÿÿzHiRadixCache.shutdownr+   úOptional[str]r,   Úintr2   Úfloatr3   r4   Úboolr5   r6   r7   úOptional[Dict[str, str]]ÚreturnÚNonec                C  sÖ   | j d | }	|| _|| _|| _|	| _|| _|| _| jri|| jj| jj	| jj
| jjdœ}
|r3|
 |¡ t| dd ƒ}|d u rEt|
d| _d S t|j ¡ ƒt|
 ¡ ƒkrW|
|_d S t dt|j ¡ ƒt|
 ¡ ƒ¡ d S d S )Ni   )r+   Útp_rankÚdp_rankr/   r0   Ústorage_metrics_collector)ÚlabelszgStorage metrics labels changed (%s -> %s). Keep existing labels to avoid duplicate metric registration.)rC   r5   r,   r2   Úprefetch_timeout_per_pager4   r6   r]   r~   r   r/   r0   ÚupdateÚgetattrr   r€   ri   r   Úkeysr@   rA   Úsorted)rm   r+   r,   r2   r3   r4   r5   r6   r7   r‚   r   Úexisting_collectorrq   rq   rr   r^   ¶   s8   ÿû


üðz*HiRadixCache._apply_storage_runtime_configNÚstrÚ!storage_backend_extra_config_jsonr\   rW   r[   útuple[bool, str]c              
   C  s*  |durg d¢}||vrdd|›d|› dfS |dur.g d¢}||vr.dd|›d|› dfS | j rn| jj}||krc|durH|| _t d	|› ¡ |dura|| j_|d
krVdnd| _t d|› ¡ dS dd|› d|› dfS |dur}|| _t d	|› ¡ |dur–|| j_|d
kr‹dnd| _t d|› ¡ t d|› ¡ z|  |¡\}}	}
}}W n$ t	yÎ } zt 
d|› ¡ dd|› d|› fW  Y d}~S d}~ww z| jj||	||d W n( t	y } zt 
d|› d|› ¡ dd|› d|› fW  Y d}~S d}~ww | j||	|
||d| j| jd dS )zòAttach (enable) storage backend at runtime.

        This will start storage threads inside `HiCacheController` and enable
        prefetch/backup paths. Caller must ensure there are no running/queued
        requests to avoid races.
        N)Úbest_effortÚwait_completeÚtimeoutFz)Invalid hicache_storage_prefetch_policy: z. Expected one of Ú.)Ú
write_backr8   Úwrite_through_selectivezInvalid hicache_write_policy: z'Set hicache_storage_prefetch_policy to r8   r9   r:   zSet hicache_write_policy to )TzLHiCache storage backend already enabled with same backend; policies updated.z9HiCache storage backend is already enabled with backend 'z$'. Cannot attach different backend 'z'. Detach first.z#Attaching HiCache storage backend: z3Failed to parse storage_backend_extra_config_json: z3Failed to parse storage_backend_extra_config_json 'z': )r+   r,   r-   r.   z"Failed to attach storage backend 'Tr1   )Tz.Attached HiCache storage backend successfully.)r5   r]   Ústorage_backend_typerX   r@   Úinfor)   rd   rS   rt   ru   Úattach_storage_backendr^   r=   r7   )rm   r+   r‰   r\   rW   r[   ÚallowedÚcurrent_backendrn   r,   r2   r3   r4   Úerq   rq   rr   r“   ä   s°   ÿþÿþ	ÿÿÿþÿÿÿúþ€þ
üÿ €üø
z#HiRadixCache.attach_storage_backendc              
   C  st   z|   ¡  | j ¡  W n ty) } zt d¡ dd|› fW  Y d}~S d}~ww |   ¡  |  ¡  d| _d| _dS )z†Detach (disable) storage backend at runtime.

        Caller must ensure there are no running/queued requests to avoid races.
        z!Failed to detach storage backend.Fz*Failed to detach HiCache storage backend: N)Tz.Detached HiCache storage backend successfully.)	Ú#_drain_storage_control_queues_localr]   rs   rt   r@   ru   Ú"_force_release_pending_storage_opsr5   r6   ©rm   r–   rq   rq   rr   rs   [  s   
€ýz#HiRadixCache.detach_storage_backendc           
   	   C  s–  | j }z}t| j ¡ ƒD ]s\}}z|\}}}}W n ty'   | j |d¡ Y qw z|dur3|j |¡ W n tyC   t 	d|¡ Y nw z| 
¡  W n tyY   t 	d|¡ Y nw z| jt|ƒ8  _|jdk rld|_W n	 tyv   Y nw | j |d¡ qW n tyŽ   t 	d¡ Y nw z,t| j ¡ ƒD ]!\}}	z|	 
¡  W n ty°   t 	d|¡ Y nw | j |d¡ q—W dS  tyÊ   t 	d¡ Y dS w )a  Force release any leftover pending prefetch/backup bookkeeping.

        This is a safety net for detach/shutdown paths. It assumes storage threads
        have been stopped already (via controller.detach), so no concurrent access
        to these structures should happen.
        Nz+Failed to free host indices for prefetch %sz1Failed to release host protection for prefetch %sr   z*Force release pending prefetch ops failed.z2Failed to release host protection for backup op %sz(Force release pending backup ops failed.)r]   Úlistra   Úitemsrt   ÚpopÚmem_pool_hostÚfreer@   ru   Úrelease_hostÚprefetch_tokens_occupiedÚlenrb   )
rm   ÚccÚreq_idr’   Úlast_host_nodeÚ	token_idsÚhost_indicesÚ
_operationÚack_idÚnoderq   rq   rr   r˜   x  sh   ý€ÿÿÿÿ
€ÿâÿÿÿùÿz/HiRadixCache._force_release_pending_storage_opsc                 C  s   | j ddddd dS )zÜDrain storage control queues without TP synchronization.

        This is intended for shutdown/detach paths where we want to make best-effort
        cleanup even if queue sizes temporarily differ across ranks.
        NF©Ún_revokeÚn_backupÚ	n_releaseÚlog_metrics)Ú"_drain_storage_control_queues_implrv   rq   rq   rr   r—   ³  s   
üz0HiRadixCache._drain_storage_control_queues_localr«   úOptional[int]r¬   r­   r®   c                   s\   ˆj ‰ddd„‰ ‡ ‡‡‡fdd„}‡ ‡‡‡‡fdd„}‡ ‡‡fd	d
„}|ƒ  |ƒ  |ƒ  d S )NÚlimitr°   c                 s  s^    d}|d u s||k r-z|   ¡ }W n
 ty   Y d S w |d7 }|V  |d u s||k sd S d S ©Nr   r9   )Ú
get_nowaitr   )Úqr±   ÚdrainedÚitemrq   rq   rr   Ú_drain_queueÉ  s   €ÿúzEHiRadixCache._drain_storage_control_queues_impl.<locals>._drain_queuec                    sb   ˆ ˆj ˆƒD ](} ˆj | d ¡}|d ur.|\}}}}| ¡  ˆ jt|ƒ8  _ˆjdk r.dˆ_qd S ©Nr   )Úprefetch_revoke_queuera   rœ   rŸ   r    r¡   )r£   r’   r¤   r¥   Ú_)r·   r¢   r«   rm   rq   rr   Ú_drain_revokeÓ  s   
€ùzFHiRadixCache._drain_storage_control_queues_impl.<locals>._drain_revokec                    sR   ˆ ˆj ˆƒD ] } | j}ˆj |d ¡}|d ur| ¡  ˆr&ˆjr&ˆj | j¡ qd S ©N)	Úack_backup_queueÚidrb   rœ   rŸ   r6   r€   Úlog_backuped_tokensÚcompleted_tokens)Ú	operationr¨   Úentry)r·   r¢   r®   r¬   rm   rq   rr   Ú_drain_backupÝ  s   
ÿ€úzFHiRadixCache._drain_storage_control_queues_impl.<locals>._drain_backupc                    sF   g } ˆ ˆj ˆƒD ]}|  |¡ q| r!tj| dd}ˆj |¡ d S d S )Nr   )Údim)Úhost_mem_release_queueÚappendrO   Úcatr   rž   )Úhost_indices_listr¦   )r·   r¢   r­   rq   rr   Ú_drain_releaseè  s   þzGHiRadixCache._drain_storage_control_queues_impl.<locals>._drain_release)r±   r°   )r]   )rm   r«   r¬   r­   r®   r»   rÃ   rÉ   rq   )r·   r¢   r®   r¬   r­   r«   rm   rr   r¯   À  s   



z/HiRadixCache._drain_storage_control_queues_implr.   c              
   C  sÎ  i }|r„zi|  d¡rg|dd… }tj |¡d  ¡ }t||dkr"dndƒ9}|dkr0t |¡}n'|dkr>ddl}| |¡}n|d	v rLddl	}| 
|¡}ntd
|› d|› dƒ‚W d  ƒ n1 saw   Y  nt |¡}W n tyƒ } z
t d|› ¡ |‚d}~ww | dd¡}	| dd¡}
| dd¡}| dd¡}t|	tƒs«tdt|	ƒj› ƒ‚t|
ttfƒs¼tdt|
ƒj› ƒ‚t|ttfƒsÍtdt|ƒj› ƒ‚t|tƒsÜtdt|ƒj› ƒ‚||	t|
ƒt|ƒ|fS )ac  
        Parse storage backend extra config JSON and extract specific parameters.

        Args:
            storage_backend_extra_config: JSON string containing extra configuration

        Returns:
            tuple: (extra_config_dict, prefetch_threshold, prefetch_timeout_base, prefetch_timeout_per_ki_token, hicache_storage_pass_prefix_keys)
        ú@r9   Nz.tomlÚrbÚrz.jsonr   )z.yamlz.ymlzUnsupported config file z (config format: ú)z#Invalid backend extra config JSON: r,   é   r2   r3   g      Ð?r4   Fz$prefetch_threshold must be int, got z*prefetch_timeout_base must be number, got z2prefetch_timeout_per_ki_token must be number, got z3hicache_storage_pass_prefix_keys must be bool, got )Ú
startswithÚosÚpathÚsplitextÚlowerÚopenÚjsonÚloadÚtomllibÚyamlÚ	safe_loadrL   Úloadsrt   r@   Úerrorrœ   rG   rx   ÚtypeÚ__name__ry   rz   )rm   r.   rn   rÑ   ÚextÚfr×   rØ   r–   r,   r2   r3   r4   rq   rq   rr   rS   ô  sx   
ÿ€ô€
€€þÿÿ
ÿÿÿ
ÿÿûz0HiRadixCache._parse_storage_backend_extra_configc                   s<   dt _| j ¡  | j ¡  | j ¡  | j ¡  tƒ  ¡  d S r¸   )	r   Úcounterr]   ÚresetrK   Úclearrc   rj   rk   rv   ro   rq   rr   rá   A  s   



zHiRadixCache.resetr©   r   c                 C  s*   d}|| j kr|j}|d7 }|| j ks|S r²   )Ú	root_nodeÚparent)rm   r©   Úheightrq   rq   rr   Ú
get_heightJ  s   

þzHiRadixCache.get_heightc              
   C  s˜   | j rEz&t| jjdƒr| jj ¡  t d¡ W dS t dt| jjƒj	› d¡ W dS  t
yD } zt d|› ¡ W Y d }~dS d }~ww t d¡ dS )	Nrâ   z8Hierarchical cache storage backend cleared successfully!TzStorage backend z" does not support clear operation.Fz4Failed to clear hierarchical cache storage backend: z2Hierarchical cache storage backend is not enabled.)r5   Úhasattrr]   r+   râ   r@   r’   rA   rÜ   rÝ   rt   rÛ   r™   rq   rq   rr   Úclear_storage_backendQ  s&   ÿÿ€þ
z"HiRadixCache.clear_storage_backendFc                 C  s†   | j j|j|jd}|d u r |  t|jƒ¡ | j j|j|jd}|d urA||_t|jƒdks0J ‚|| j|j< |s=|  |¡ t|ƒS dS )N)Údevice_indicesÚnode_idr   )	r]   ÚwriteÚvaluer¾   Ú
evict_hostr¡   Ú
host_valuer_   Úinc_lock_ref)rm   r©   r   r¦   rq   rq   rr   Úwrite_backupg  s$   þþ
þzHiRadixCache.write_backupc                 C  sD   | j r	| |j¡nd }| j |j|j|j|¡}|| j|< | 	¡  d S r¼   )
r4   Úget_prefix_hash_valuesrä   r]   Úwrite_storagerî   ÚkeyÚ
hash_valuerb   Úprotect_host)rm   r©   Úprefix_keysÚoperation_idrq   rq   rr   Úwrite_backup_storage~  s   ÿýÿ
z!HiRadixCache.write_backup_storagec                 C  sJ   | j jdks|r
d S | jd7  _|js!|j| jkr#|  |¡ d S d S d S )Nr   r9   )r]   r)   Ú	hit_countÚbackupedrd   rð   )rm   r©   Úchunkedrq   rq   rr   Ú_inc_hit_count‹  s   ýzHiRadixCache._inc_hit_countc           	      C  sp  |rBt | jƒdkr@| jjD ]\}}}| ¡  |D ]}| j |¡}| jr(|  |¡ qq| jj ¡  t | jƒdks9J ‚t | jƒdks	d S t | jƒdkrKd S d}| jjD ]\}}}| 	¡ s\ n|d7 }qQt
j|t
jdd}| jdkr|t
jj|t
jjj| jd t| ¡ ƒ}|dkr¶| jj d¡\}}}| ¡  |D ]}| j |¡}|  |¡ | jr«|  |¡ q–|d8 }|dks†d S d S )Nr   r9   Úcpu©ÚdtypeÚdevice©Úopr'   )r¡   r_   r]   Úack_write_queueÚsynchronizerœ   r5   rø   râ   ÚqueryrO   Útensorrx   rR   rP   Ú
all_reduceÚReduceOpÚMINrN   r¶   Údec_lock_ref)	rm   r   rº   Úfinish_eventÚack_listr¨   Úbackuped_nodeÚfinish_countÚ
queue_sizerq   rq   rr   Úwriting_check–  sP   
€ýø	

ý

€øzHiRadixCache.writing_checkc                 C  s^   d}| j jD ]\}}}| ¡ s n|d7 }|D ]}| j |¡}|  |¡ qq| j jd |…= d S r²   )r]   Úack_load_queuer  r`   rœ   r
  )rm   r  rº   r  r  r¨   Úend_noderq   rq   rr   Úloading_checkÁ  s   þzHiRadixCache.loading_checkc                 C  s   | j S r¼   )Úevictable_size_rv   rq   rq   rr   Úevictable_sizeÐ  s   zHiRadixCache.evictable_sizec                 C  sŽ   | j rdS d}|| jkrE|jdkr,|  jt|jƒ8  _|  jt|jƒ7  _|t|jƒ8 }| jd7  _|  |¡ |  |¡ |j	}|| jks|S r²   ©
Údisablerã   Úlock_refr  r¡   ró   Úprotected_size_Ú_update_leaf_statusÚ_update_host_leaf_statusrä   ©rm   r©   Údeltarq   rq   rr   rï   Ó  s   




ø	zHiRadixCache.inc_lock_refc                 C  sª   | j rdS d}|| jkrS|jdkr,|  jt|jƒ7  _|  jt|jƒ8  _|t|jƒ7 }| jd8  _|  |¡ |  |¡ |j	d u rK|| ju sKJ dƒ‚|j	}|| jks|S )Nr   r9   z-This request holds the node from another treer  r  rq   rq   rr   r
  ã  s$   




ÿ
ôzHiRadixCache.dec_lock_refc                 C  sz   |j r|jdkr|| jv r| j |¡ d S |j ¡ D ]}|j r-|| jv r*| j |¡  d S q|| jvr;| j |¡ d S d S r¸   )Úevictedr  rj   ÚremoveÚchildrenÚvaluesÚadd)rm   r©   Úchildrq   rq   rr   r  ÷  s   

ý
ÿz%HiRadixCache._update_host_leaf_statusr
   r   c                   sV  t  ¡ }|j}tˆ jƒ}‡ fdd„|D ƒ}t |¡ d}g }||k r…t|ƒr…t |¡\}}	|	j	dkr3q|	j
sSˆ jjdkrK|ˆ j|	dd7 }| |	¡ n|ˆ  |	¡7 }n|ˆ  |	¡7 }|	jj ¡ D ]}
|
|v rgq`|
jsl nq`ˆ j |	j¡}t |||	jf¡ ||k r…t|ƒs&ˆ jjdkr ˆ jdd |D ]}|j
sšJ ‚ˆ  |¡ q“ˆ  ||¡ t|dS )Nc                   ó   g | ]
}ˆ j  |¡|f‘qS rq   ©Úeviction_strategyÚget_priority©Ú.0r©   rv   rq   rr   Ú
<listcomp>
  ó    ÿz&HiRadixCache.evict.<locals>.<listcomp>r   r   T)r   )Únum_tokens_evicted)ÚtimeÚperf_counterÚ
num_tokensrš   Úevictable_leavesÚheapqÚheapifyr¡   Úheappopr  rú   r]   r)   rð   rÆ   Ú_evict_regularÚ_evict_backupedrä   r   r!  r  r&  r'  Úheappushr  Úupdate_eviction_metricsr   )rm   r    Ú
start_timer/  ÚleavesÚeviction_heapÚnum_evictedÚwrite_back_nodesÚ	_priorityÚxr#  Únew_priorityr©   rq   rv   rr   Úevict  sF   

ÿ

ÿè

zHiRadixCache.evictc                 C  sR   | j  |j¡}|dksJ ‚|  j|8  _d |_|  |¡ |  |¡ |  |j¡ |S r¸   )r]   Úevict_devicerì   r  r  r  rä   ©rm   r©   r;  rq   rq   rr   r5  4  s   

zHiRadixCache._evict_backupedc                 C  s(   | j j |j¡ t|jƒ}|  |¡ |S r¼   )r]   Úmem_pool_device_allocatorrž   rì   r¡   Ú_delete_leafrB  rq   rq   rr   r4  @  s   

zHiRadixCache._evict_regularr/  c           
        s(  t ˆ jƒ}‡ fdd„|D ƒ}t |¡ d}||k rŽt|ƒrt |¡\}}|ˆ jkr+d S |js/q|jdkr5q|ˆ j	 
|j¡7 }ˆ  |j¡}|jj |d ¡}||ksWJ d|› ƒ‚|ˆ jv rbˆ j |¡ ˆ  |j¡ t|jjƒdkr„|jjr„ˆ j |j¡}	t ||	|jf¡ ||k r’t|ƒsd S d S d S d S )Nc                   r$  rq   r%  r(  rv   rq   rr   r*  I  r+  z+HiRadixCache.evict_host.<locals>.<listcomp>r   z parent does not have child key, )rš   rj   r1  r2  r¡   r3  rã   r  Úhost_ref_counterr]   rí   rî   Úget_child_key_fnró   rä   r   rœ   r  r  r&  r'  r6  )
rm   r/  r9  r:  r;  r=  r>  ró   Úvr?  rq   rv   rr   rí   G  s2   

ÿ



 ézHiRadixCache.evict_hostÚ	mem_quotaúOptional[torch.Tensor]c                 C  sz  t  ¡ }|}g }|jr|jsJ dƒ‚| d|¡ |j}|js|}|  |¡}t dd„ |D ƒ¡}t	|ƒ| j
k sD|d urCt	|ƒ|| krKnn|  |¡ d S | jj||jd}	|	d u rk|  tt	|ƒd¡ | jj||jd}	|  |¡ |	d u rvd S || j|j< d}
|D ]}|	|
|
t	|jƒ … |_|
t	|jƒ7 }
q€|  jt	|	ƒ7  _|  |¡ | jd ur»| j t  ¡ | ¡ | j t	|	ƒ¡ |	S )Nz7No backup available on evicted nodes, should not happenr   c                 S  s   g | ]}|j ‘qS rq   )rî   )r)  Únrq   rq   rr   r*  }  s    z*HiRadixCache.load_back.<locals>.<listcomp>F)r¦   rê   )r/  )r-  r.  r  rú   Úinserträ   rï   rO   rÇ   r¡   re   r
  r]   rÖ   r¾   r@  r
   r`   rî   rì   r  Úmetrics_collectorÚobserve_load_back_durationÚincrement_load_back_num_tokens)rm   r©   rH  r8  Úlast_hit_nodeÚnodes_to_loadÚancester_noder  r¦   ré   Úoffsetrq   rq   rr   Ú	load_backh  sT   ÿþû

ÿÿ



ÿzHiRadixCache.load_backÚ	last_nodeÚhost_hit_lengthc                 C  sl   |}|j r*|  ||¡}|d ur!t dt|ƒ› d|j› ¡ ||fS |j r*|j}|j s$tjdtj	| j
d|fS )Nzloading back z tokens for node ©r   rþ   )r  rS  r@   Údebugr¡   r¾   rä   rO   ÚemptyÚint64r   )rm   rT  rU  rH  rº   Úloading_valuesrq   rq   rr   Úinit_load_back¢  s   ÿÿþzHiRadixCache.init_load_backc                 C  s
   | j  ¡ S )z—
        Notify the cache controller to start the KV cache loading.
        Return the consumer index for the schedule batch manager to track.
        )r]   Ústart_loadingrv   rq   rq   rr   Úready_to_load_host_cache¹  s   
z%HiRadixCache.ready_to_load_host_cachec                 C  s@   |   ¡  |  ¡  | jr|  ¡  | jr| j | jj 	¡ ¡ d S d S r¼   )
r  r  r5   Údrain_storage_control_queuesr6   r€   Úlog_storage_metricsr]   r+   Ú	get_statsrv   rq   rq   rr   Úcheck_hicache_eventsÀ  s   
ÿÿz!HiRadixCache.check_hicache_eventsc                 C  s|   | j }tj|j ¡ |j ¡ |j ¡ gtjd}| jdkr)tj	j
|tj	jj| jd tt| ¡ ƒ\}}}| j|||dd dS )zŽ
        Combine prefetch revoke, backup ack, and host mem release checks
        to minimize TP synchronization and Python overhead.
        ©rÿ   r9   r  Trª   N)r]   rO   r  r¹   Úqsizer½   rÅ   rx   rR   rP   r  r  r	  rN   ÚmapÚtolistr¯   )rm   r¢   Úqsizesr«   r¬   r­   rq   rq   rr   r^  Ê  s&   ýú
ÿ
üz)HiRadixCache.drain_storage_control_queuesrÁ   r	   c                 C  s$   t  ¡ |j | jt|jƒ| j  kS r¼   )r-  Ú	monotonicr8  r2   r¡   rô   r‚   )rm   rÁ   rq   rq   rr   rU   ç  s   ÿÿÿz0HiRadixCache._prefetch_timeout_check_linear_funcc                 C  sâ   d}| j dkr	|S t|jƒdkrd}n|jt|jƒ| j k}| j dkr&|}n| j dkr3|p1|  |¡}ndS | ¡ }| jdkrktj	dt
|ƒ t
|ƒgtj
d}tjj|tjjj| jd	 |d  ¡ dk}|d  ¡ dk}|pn|}|S )
NTr‹   r   FrŒ   r   r9   rb  r  )rX   r¡   rô   rÀ   rC   rV   Úis_terminatedrR   rO   r  rx   rP   r  r  ÚMAXrN   r¶   )rm   rÁ   Úcan_terminateÚ	completedÚoperation_terminatedÚstatesrq   rq   rr   Úcan_terminate_prefetchï  s6   
ÿ


þýz#HiRadixCache.can_terminate_prefetchr£   c              	   C  s\  || j vrdS | j | \}}}}|jd u rdS |  |¡sdS | j |¡\}}t d|› d|› d¡ |}| jdkrRtj	|tj
d}	tjj|	tjjj| jd |	 ¡ }|d |… }
|d |… }|  |t|
|jjd	||d || j … ¡}| jj |d |… ¡ | j |||… ¡ | ¡  | j |= | j jt|ƒ8  _|| }|| j|< | jr¬| j |¡ dS )
NTFz	Prefetch z completed with z tokensr9   rb  r  )r¥   Ú	extra_key) ra   r¦   rn  r]   Úterminate_prefetchr@   rW  rR   rO   r  rx   rP   r  r  r	  rN   r¶   Ú_insert_helper_hostr   ró   ro  rC   r   rž   Úappend_host_mem_releaserŸ   r    r¡   rc   r6   r€   Úlog_prefetched_tokens)rm   r£   r¤   r¥   r¦   rÁ   rÀ   rô   Úmin_completed_tokensÚcompleted_tokens_tensorÚfetched_token_idsÚwritten_indicesÚmatched_lengthÚloaded_from_storagerq   rq   rr   Úcheck_prefetch_progress  s\   
ÿ

ÿ
ÿýÿú	
ÿ
z$HiRadixCache.check_prefetch_progressc                 C  s:   || j vrd S | j | \}}}}|jd u rd S | ¡  d S r¼   )ra   r¦   Úmark_terminate)rm   r£   rº   rÁ   rq   rq   rr   rp  U  s   

zHiRadixCache.terminate_prefetchc                 C  s   | j  |d¡S )zÞ
        Pop and return the number of tokens loaded from storage for a request.
        Returns 0 if no prefetch was done or was revoked.
        This should be called after check_prefetch_progress() returns True.
        r   )rc   rœ   )rm   r£   rq   rq   rr   Úpop_prefetch_loaded_tokens^  s   z'HiRadixCache.pop_prefetch_loaded_tokensr   c           
      C  sè   |j }tjdtj| jd}|  |¡\}}| jst|ƒdkr't|| j	| j	ddS | j
dkr<t|ƒ| j
 | j
 }|d |… }|  | j	|¡\}}|rMt |¡}n|}d}|}	|jrc|t|jƒ7 }|j}|jsV|	jsl|	j}	|	jrft|||	|dS )NrV  rþ   r   )ré   Úlast_device_noder¤   rU  r9   )ró   rO   rX  rY  r   Úmaybe_bigram_convertr  r¡   r   rã   rC   Ú_match_prefix_helperrÇ   r  rî   rä   rú   )
rm   r    ró   Úempty_valuerº   Úpage_aligned_lenrì   rT  rU  r¤   rq   rq   rr   Úmatch_prefixf  s@   ü
þÿüzHiRadixCache.match_prefixr¤   Únew_input_tokensú	List[int]Ú	last_hashrö   úOptional[List[str]]c           	      C  sÈ   t |ƒt |ƒ| j  }|d |… }| jr|| jk s| j ¡ r d S | ¡  | jj |¡}|d u r;|  	|¡ | jj |¡}|d u rE| 
¡  d S | j |||||¡}||||f| j|< | j jt |ƒ7  _d S r¼   )r¡   rC   r5   r,   r]   Úprefetch_rate_limitedrõ   r   Úallocrí   rŸ   Úprefetchra   r    )	rm   r£   r¤   rƒ  r…  rö   Úprefetch_lengthr¦   rÁ   rq   rq   rr   Úprefetch_from_storage‹  s6   	ÿÿ
ÿ

ÿ
üz"HiRadixCache.prefetch_from_storageró   r   c           	      C  sF  t  ¡ |_t|ƒdkrdS |  |¡}d}t|ƒdkrr||j ¡ v rr|j| }t  ¡ |_|  |j|¡}||d … }||d … }||| j	 d … }||7 }|t|jƒk r\|  
|j||¡}|}t|ƒre|  |¡}t|ƒdkrr||j ¡ v s!t|ƒr¡t|jd}||_||_d |_| ¡ |_||_||j|< |  |¡ |  |¡ |  |¡ |S )Nr   ©Úpriority)r-  rg  Úlast_access_timer¡   rF  r   r…   Úkey_match_fnró   rC   Ú_split_noder   r  rä   rì   Úclonerî   rô   r  r  )	rm   r©   ró   rî   rô   Ú	child_keyrx  Ú
prefix_lenÚnew_noderq   rq   rr   rq  ³  s>   




ò




z HiRadixCache._insert_helper_hostc                 C  sè   t  ¡ |_|  |¡}g }t|ƒdkrp||j ¡ v rp|j| }t  ¡ |_|  |j|¡}|t|jƒk rI|  	|j||¡}|j
sB| |j¡ |}	 ||fS |j
sR| |j¡ |}||d … }t|ƒrc|  |¡}t|ƒdkrp||j ¡ v s||fS r¸   )r-  rg  rŽ  rF  r¡   r   r…   r  ró   r  r  rÆ   rì   )rm   r©   ró   r’  rì   r#  r“  r”  rq   rq   rr   r  Û  s,   




ø
ïz!HiRadixCache._match_prefix_helperr#  Ú	split_lenc                 C  sô   t |jd}|  ||d … ¡|i|_|j|_|j|_|jd |… |_|j|_|jr-d |_	n|j	d |…  
¡ |_	|j	|d …  
¡ |_	|jrX|jd |…  
¡ |_|j|d …  
¡ |_t|j|| jƒ\|_|_||_|j|d … |_||jj|  |¡< |S )NrŒ  )r   r  rF  r   rä   r  ró   rù   r  rì   r‘  rú   rî   r   rô   rC   )rm   ró   r#  r•  r”  rq   rq   rr   r  õ  s(   
ÿzHiRadixCache._split_noder   r   c                 C  s  |j }|j}|j}|j}|d u rd}|  ||¡\}}t|ƒdkr%tddS | jr4|d ur4|d t|ƒ… }| j}|  	|¡}d}t|ƒdkrþ||j
 ¡ v rþ|j
| }t ¡ |_t|j|ƒ|_|  |j |¡}	|	t|j ƒkrš|jr|d |	… |_|  jt|jƒ7  _|  |¡ |  |¡ |  |j¡ nM|  ||¡ ||	7 }nB|  |j ||	¡}
t|
j|ƒ|
_|
jrÐ|d |	…  ¡ |
_|  jt|
jƒ7  _|  |
¡ |  |
¡ |  |
j¡ n
|  |
|¡ ||	7 }|
}||	d … }||	d … }t|ƒrñ|  	|¡}t|ƒdkrþ||j
 ¡ v sKt|ƒrCt|d}
||
_||
_ | ¡ |
_|
|j
|< |  jt|ƒ7  _|  |¡ |  |
¡ | jr6t|
| jƒ|
_| jjdkrC|  |
|¡ t|dS )Nr   )r“  rŒ  r   ) ró   rì   rû   r  r~  r¡   r   Úis_eaglerã   rF  r   r…   r-  rg  rŽ  Úmaxr  r  r  r  r  rä   rü   r  r‘  r   r5   r   rC   rô   r]   r)   )rm   r    ró   rì   rû   r  r©   r’  Útotal_prefix_lengthr“  r”  rq   rq   rr   rK    st   









Ø
*





zHiRadixCache.insertÚridc                 C  s¢   | j  |d ¡ || jvrd S | j| \}}}}|jd u rd S | j |¡\}}| jdkr3tjj	| j
d | ¡  | j|= | j |d |… ¡ | j jt|ƒ8  _d S )Nr9   r&   )rc   rœ   ra   r¦   r]   rp  rR   rO   rP   ÚbarrierrN   rŸ   rr  r    r¡   )rm   r™  r¤   r¥   r¦   rÁ   rÀ   rº   rq   rq   rr   Úrelease_aborted_requesta  s   


z$HiRadixCache.release_aborted_request)r    r   r!   r   )r+   rw   r,   rx   r2   ry   r3   ry   r4   rz   r5   rz   r6   rz   r7   r{   r|   r}   )NNNN)r+   rˆ   r‰   rw   r\   rw   rW   rw   r[   rw   r|   rŠ   )r|   rŠ   )r«   r°   r¬   r°   r­   r°   r®   rz   )r.   rw   )r©   r   )r|   rz   )F)r    r
   r|   r   )r/  rx   r¼   )r©   r   rH  r°   r|   rI  )rT  r   rU  rx   rH  r°   )r|   rx   )rÁ   r	   )r£   rˆ   r|   rz   )r£   rˆ   )r£   rˆ   r|   rx   )r    r   )NN)
r£   rˆ   r¤   r   rƒ  r„  r…  rw   rö   r†  )r©   r   ró   r   )ró   r   r#  r   r•  rx   )r    r   r|   r   )r™  rˆ   ).rÝ   Ú
__module__Ú__qualname__rl   rh   r^   r“   rs   r˜   r—   r¯   rS   rá   ræ   rè   rð   rø   rü   r  r  r  rï   r
  r  r@  r5  r4  rí   rS  r[  r]  ra  r^  rU   rn  rz  rp  r|  r‚  r‹  rq  r  r  rK  r›  Ú__classcell__rq   rq   ro   rr   r   3   sf    u
1ú
w;

4M
	


+




.

"ÿ>ü




'
?
	
*ú
(
(

Qr   )5Ú
__future__r   rf   r1  rÕ   ÚloggingrÐ   rY   r-  Úqueuer   Útypingr   r   r   r   rO   Ú$sglang.srt.managers.cache_controllerr   r	   Ú&sglang.srt.mem_cache.base_prefix_cacher
   r   r   r   r   r   Ú sglang.srt.mem_cache.memory_poolr   r   r   Ú%sglang.srt.mem_cache.memory_pool_hostr   r   r   Ú sglang.srt.mem_cache.radix_cacher   r   r   r   r   Úsglang.srt.metrics.collectorr   Úsglang.srt.utilsr   Ú&sglang.srt.mem_cache.cache_init_paramsr   Úsglang.srt.server_argsr   Ú	getLoggerrÝ   r@   r   rq   rq   rq   rr   Ú<module>   s.     
