o
    پi|                     @  sR  d dl mZ d dlmZ d dlmZ 	 	 d dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZmZ d dlmZmZmZmZmZmZmZ d dlZeeZd dlmZmZmZmZ d d	l m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' d d
l(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2 erd dl3m4Z4 G dd dZ5G dd dZ6d5ddZ7d5ddZ8d6ddZ9d7d8ddZ:d9d#d$Z;d:d)d*Z<G d+d, d,e!Z=ed-kr'e=> Z?e?@e$e5g d.dd/d0 e?@e$e5g d.dd/d0 e?@e$e5g d1dd/d0 e?@e$e5g d2dd/d0 e?@e$e5g d3dd/d0 e?A  eBe?Ce&e5g d4dd/d0 dS dS );    )annotations)CacheInitParams)convert_to_bigram_keyN)defaultdict)	lru_cachepartial)TYPE_CHECKINGAnyIteratorListOptionalTupleUnion)
MEDIUM_GPUAllBlocksClearedBlockRemovedBlockStored)BasePrefixCacheEvictParamsEvictResultInsertParamsInsertResultMatchPrefixParamsMatchResult)EvictionStrategyFIFOStrategyFILOStrategyLFUStrategyLRUStrategyMRUStrategyPriorityStrategy)get_hash_strhash_str_to_int64)Reqc                   @  sD   e Zd Z		ddd	d
ZdddZdddZdddZdddZdS ) RadixKeyNF	token_ids	List[int]	extra_keyOptional[str]	is_bigramboolc                 C  s   || _ || _|| _d S N)r%   r'   r)   )selfr%   r'   r)    r-   T/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/mem_cache/radix_cache.py__init__D   s   
zRadixKey.__init__returnintc                 C  
   t | jS r+   )lenr%   r,   r-   r-   r.   __len__Q      
zRadixKey.__len__Iterator[int]c                 C  r2   r+   )iterr%   r4   r-   r-   r.   __iter__T   r6   zRadixKey.__iter__idxUnion[int, slice]
'RadixKey'c                 C  s0   t |trt| j| | jS t| j| g| jS r+   )
isinstanceslicer$   r%   r'   )r,   r:   r-   r-   r.   __getitem__W   s   
zRadixKey.__getitem__strc                 C  s>   | j d d }d| jd| t| j dkrd dS d dS )N
   zRadixKey(extra_key=z, token_ids=z... ))r%   r'   r3   )r,   previewr-   r-   r.   __repr__\   s   0zRadixKey.__repr__)NF)r%   r&   r'   r(   r)   r*   )r0   r1   )r0   r7   )r:   r;   r0   r<   )r0   r@   )__name__
__module____qualname__r/   r5   r9   r?   rE   r-   r-   r-   r.   r$   C   s    


r$   c                   @  sl   e Zd ZdZdd ddZed	d
 Zedd Zdd Zdd Z	d!ddZ
eddd"ddZd#ddZdS )$TreeNoder   NidOptional[int]priorityr1   c                 C  sz   t t| _d | _d | _d | _d| _t | _	t | _
d| _d| _d | _d | _|| _|d u r1tjn|| _t jd7  _d S Nr      )r   rI   childrenparentkeyvaluelock_reftime	monotoniclast_access_timecreation_time	hit_counthost_ref_counter
host_value
hash_valuerL   counterrJ   )r,   rJ   rL   r-   r-   r.   r/   e   s   


zTreeNode.__init__c                 C  s
   | j d u S r+   )rR   r4   r-   r-   r.   evicted|      
zTreeNode.evictedc                 C  s
   | j d uS r+   )rZ   r4   r-   r-   r.   backuped   r^   zTreeNode.backupedc                 C  s   |  j d7  _ dS )z%Protect the host value from eviction.rN   N)rY   r4   r-   r-   r.   protect_host   s   zTreeNode.protect_hostc                 C  s$   | j dkr|  j d8  _ dS td)z2Release the host value, allowing it to be evicted.r   rN   z'Host reference counter is already zero.N)rY   RuntimeErrorr4   r-   r-   r.   release_host   s   
zTreeNode.release_hostr0   r(   c                 C  s&   | j du st| j dkrdS | j d S )z5Returns the hash value of the last page in this node.Nr   )r[   r3   r4   r-   r-   r.   get_last_hash_value   s   
zTreeNode.get_last_hash_valuerN   )maxsizenode	List[str]c                 C  s(   |d u s	|j d u rg S ||j|j  S r+   )r[   get_prefix_hash_valuesrP   )r,   rf   r-   r-   r.   rh      s   zTreeNode.get_prefix_hash_valuesother
'TreeNode'c                 C  s   | j |j k S r+   )rV   )r,   ri   r-   r-   r.   __lt__   s   zTreeNode.__lt__Nr   )rJ   rK   rL   r1   )r0   r(   )rf   rI   r0   rg   )ri   rj   )rF   rG   rH   r\   r/   propertyr]   r_   r`   rb   rd   r   rh   rk   r-   r-   r-   r.   rI   a   s    


rI   key0key1c                 C  s(   | j |j krtd| j  d|j  d S )NzG_key_match should be run on the same extra key, but got key0.extra_key=z != key1.extra_key=)r'   
ValueError)rn   ro   r-   r-   r.   _check_extra_key   s
   rq   c                 C  s@   t | | d}t| j|jD ]\}}||kr |S |d7 }q|S rM   )rq   zipr%   )rn   ro   ik0k1r-   r-   r.   _key_match_page_size1   s   

rv   	page_sizer1   c                 C  sf   t | | tt| t|}d}||k r1| j|||  |j|||  kr)	 |S ||7 }||k s|S rl   )rq   minr3   r%   )rn   ro   rw   min_lenrs   r-   r-   r.   _key_match_paged   s   
$rz   rN   rQ   c                 C  s>   |dkr
| j d }n	t| j d | }| jd u r|S | j|fS )NrN   r   )r%   tupler'   )rQ   rw   	plain_keyr-   r-   r.   get_child_key   s   

r}   rf   rj   r0   rg   c                 C  s   g }d}| j dur%| j jdur%t| j jdkr%t| j jdkr%| j jd }tdt| j|D ]}| jj|||  }|s=q.t||d}|| |}q.|S )zCompute SHA256-based hash values for position-aware identification.

    Args:
        node: The TreeNode to compute hash values for
        page_size: The page size for chunking tokens

    Returns:
        List of SHA256 hex strings, one per page
    Nr   rc   )
prior_hash)rP   r[   r3   rQ   ranger%   r!   append)rf   rw   hash_valuesparent_hashstartpage_tokenshash_valr-   r-   r.   compute_node_hash_values   s   
 
r   child_hash_valueOptional[List[str]]	split_len/tuple[Optional[List[str]], Optional[List[str]]]c                 C  sB   | du rdS |dkr|}n|| }| d| }| |d }||fS )ap  Split hash_value between parent and child nodes during node splitting.

    Args:
        child_hash_value: The hash_value list from the child node being split
        split_len: The length at which to split (in tokens)
        page_size: The page size for calculating number of pages

    Returns:
        Tuple of (new_node_hash_value, updated_child_hash_value)
    N)NNrN   r-   )r   r   rw   split_pagesnew_node_hash
child_hashr-   r-   r.   split_node_hash_value   s   r   c                   @  s(  e Zd Zd]ddZe				d^d_ddZdd Z	d`daddZdbddZdcd!d"Z	ddd$d%Z
dedfd*d+Zdgdhd,d-Zd.d/ Zd0d1 Zdid4d5Zdjd8d9Zdjd:d;Zd<d= Zd>d? Zd@dA ZdkdBdCZdldFdGZdmdndJdKZdodMdNZdOdP ZdjdQdRZdSdT ZdjdUdVZdjdWdXZdYdZ Zd[d\ ZdS )p
RadixCacheparamsr   c                 C  sL  |j | _ |j| _|j| _|j| _|j| _|j| _|j| _|j | _g | _	|j
r,|   | jr5| jj| _ntd| _| jdkrGt| _t| _ntt| jd| _tt| jd| _| jdkrat | _n;| jdkrkt | _n1| jdkrut | _n'| jdkrt | _n| jdkrt | _n| jd	krt | _n	td
| j dt | _|   d S )NcpurN   )rw   lrulfufifomrufilorL   zUnknown eviction policy: zF. Supported policies: 'lru', 'lfu', 'fifo', 'mru', 'filo', 'priority'.)disablereq_to_token_pooltoken_to_kv_pool_allocatorrw   enable_kv_cache_eventsis_eagledisable_finished_inserteviction_policylowerkv_event_queueenable_metricsinit_metrics_collectordevicetorchrv   key_match_fnr}   get_child_key_fnr   rz   r   eviction_strategyr   r   r   r   r    rp   setevictable_leavesreset)r,   r   r-   r-   r.   r/     sH   












zRadixCache.__init__FNrN   r   r*   mock_allocatorOptional[Any]rw   r1   r   r0   c                 C  s   t |d|||d}t|S )z?Init a radix cache without memory pools for simulation purpose.N)r   r   r   rw   r   )r   r   )r,   r   r   rw   r   r   r-   r-   r.   create_simulated5  s   	zRadixCache.create_simulatedc                 C  sb   t tj d| _tg d d| j_g | j_g | j_d| j_g | j_	d| _
d| _| j  |   d S )NrL   r%   r'   rN   r   )rI   sysre   	root_noder$   rQ   rR   rZ   rS   r[   evictable_size_protected_size_r   clear_record_all_cleared_eventr4   r-   r-   r.   r   I  s   
zRadixCache.resetrQ   r$   rR   Optional[torch.Tensor]'Tuple[RadixKey, Optional[torch.Tensor]]c                 C  s8   | j r|jst|j|_|d ur|d t| }||fS r+   )r   r)   r   r%   r3   )r,   rQ   rR   r-   r-   r.   maybe_bigram_convertV  s
   zRadixCache.maybe_bigram_convertr   r   c                   s   |j } |\}} fdd} jst|dkr| S  jdkr1t| j  j }|d| }t|dkr:| S   j|\}}|rKt|}n
tj	dtj
 jd}t|||dS )	a  Find the longest cached prefix of ``key`` in the radix tree.

        The logical namespace for prefix matching is determined by both the
        token id sequence and the optional ``extra_key`` carried by ``RadixKey``.
        Entries that share identical leading token ids but have *different*
        ``extra_key`` values are intentionally kept disjoint and never share
        prefix nodes. This is useful to:

        * Isolate KV cache lines for different LoRA / adapter IDs.
        * Separate requests that intentionally should not share state (e.g.,
          different sampling salt, cache version, or retrieval augmentation
          context) by supplying a distinct ``extra_key``.

        Args:
            params (MatchPrefixParams): Parameters containing the lookup key
                with a list of token ids and an optional ``extra_key`` namespace tag.
                If ``page_size > 1`` the length is internally truncated to a multiple
                of ``page_size`` before matching. Passing an empty key returns an
                empty result with the root as the last node.

        Returns:
            MatchResult: ``device_indices`` is a 1-D ``torch.int64`` tensor of
            the concatenated KV cache indices corresponding to the longest
            cached prefix (may be length 0). ``last_device_node`` and
            ``last_host_node`` (currently the same) are the tree node objects
            representing the terminal node of the matched prefix. This method
            may mutate internal structure by splitting an existing node if the
            match ends inside a stored segment.

        Internal updates:
            * Refreshes access metadata (timestamps) used by the
                configured eviction strategy.
            * If the lookup ends inside a stored segment the node is split once
                to expose a precise boundary; this structural refinement improves
                subsequent match efficiency and does not duplicate data.
        c                     s"   t tjdtj jd j jdS )Nr   dtyper   device_indiceslast_device_nodelast_host_node)r   r   emptyint64r   r   r-   r4   r-   r.   empty_match_result  s   z3RadixCache.match_prefix.<locals>.empty_match_resultr   rN   Nr   r   r   )rQ   r   r   r3   rw   _match_prefix_helperr   r   catr   r   r   r   )r,   r   rQ   _r   page_aligned_lenrR   	last_noder-   r4   r.   match_prefix`  s&   %
zRadixCache.match_prefixr   r   c                 C  sh   | j rtddS |j}|j}|j}|d u rtj|jtjd}| 	||\}}| 
| j|||}t|dS )Nr   )
prefix_len)r   )r   r   rQ   rR   rL   r   tensorr%   r   r   _insert_helperr   )r,   r   rQ   rR   rL   r   r-   r-   r.   insert  s   

zRadixCache.insertlistc                 C  s.   | j dkr|S t|| j  | j  }|d | S )NrN   )rw   r3   )r,   rQ   r   r-   r-   r.   _page_align_keys  s   
zRadixCache._page_align_keysTreqr#   	is_insertc                 C  s>  | j rd}| }| jr| jj|jd|f }| j| dS |j|j	 d| }| jj|jdt
|f }| jr=t|n|}| |}|dt
| jtjdd}t||j| jd}|r~t|ddpdd}	| t|||	d}
|
j}| j||j|  n| j||jt
|  | j|t
|d  | |j dS )	zCache request when it finishes.FNTr   copyr)   rL   r   )rQ   rR   rL   )r   pop_committed_kv_cacher   r   req_to_tokenreq_pool_idxr   freeorigin_input_ids
output_idsr3   r   r   r   tor   r   r$   r'   getattrr   r   r   cache_protected_lendec_lock_refr   )r,   r   r   kv_committed_len
kv_indicesr%   keysvalues	radix_keyrL   resultnew_prefix_lenr-   r-   r.   cache_finished_req  s>   
zRadixCache.cache_finished_reqc              
   C  s  | j rdS |j}| jj|jdt|f }| jrt|n|}| |}|dt| j	t
jdd}t||j| jd}| t|||t|ddpGdd}|j}	| j||j|	  | t|d}
|
j|
j}}t|t|ks~J d	t|d
t|| j|jt|jt|f||jd  t||_| |j | | t|t|k rt
||t|d g|_n||_||_dS )z$Cache request when it is unfinished.NTr   r   rL   r   )rQ   rR   chunkedrL   rQ   zlen(new_indices)=z, len(keys)=) r   fill_idsr   r   r   r3   r   r   r   r   r   r   r$   r'   r   r   r   r   r   r   r   r   r   r   r   writer>   r   r   inc_lock_refr   prefix_indices)r,   r   r   r%   r   r   r   r   r   r   match_resultnew_indicesnew_last_noder-   r-   r.   cache_unfinished_req  sP   
,
	

zRadixCache.cache_unfinished_reqc                 C  s$   |  | jd td|    d S )Nr   z	#tokens: )_print_helperr   print
total_sizer4   r-   r-   r.   pretty_print.  s   zRadixCache.pretty_printc                 C  s   |   S r+   )_total_size_helperr4   r-   r-   r.   r   2  s   zRadixCache.total_sizer   r   c           
        s    j rt S t }|j}t j} fdd|D }t| d}||k rot	|rot
|\}} j|j |t	|j7 } | t	|jjdkrb|jjdkrb j|j}	t||	|jf  | ||k rot	|s* || t|dS )Nc                   s   g | ]
} j ||fqS r-   )r   get_priority).0rf   r4   r-   r.   
<listcomp><  s    z$RadixCache.evict.<locals>.<listcomp>r   )num_tokens_evicted)r   r   rT   perf_counter
num_tokensr   r   heapqheapifyr3   heappopr   r   rR   _delete_leafrP   rO   rS   r   r   heappush_record_remove_eventupdate_eviction_metrics)
r,   r   
start_timer   leaveseviction_heapnum_evicted	_priorityxnew_priorityr-   r4   r.   evict5  s,   





zRadixCache.evictrf   rI   c                 C  s   | j rdS d}|| jkr@|jdkr,|  jt|j8  _|  jt|j7  _|t|j8 }| jd7  _| | |j}|| jks|S rM   	r   r   rS   r   r3   rQ   r   _update_leaf_statusrP   r,   rf   deltar-   r-   r.   r   R  s   



zRadixCache.inc_lock_refc                 C  s   | j rdS d}|| jkrN|jdkr,|  jt|j7  _|  jt|j8  _|t|j7 }| jd8  _| | |jd u rF|| ju sFJ d|j}|| jks|S )Nr   rN   z-This request holds the node from another treer	  r  r-   r-   r.   r   a  s"   




zRadixCache.dec_lock_refc                 C     | j S r+   )r   r4   r-   r-   r.   evictable_sizet  s   zRadixCache.evictable_sizec                 C  r  r+   )r   r4   r-   r-   r.   protected_sizew  s   zRadixCache.protected_sizec                   s(   g d fdd  | j  tS )Nrf   rI   c                   s,   | j  D ]\}}|j  | qd S r+   )rO   itemsr   rR   )rf   r   child_dfs_helperr   r-   r.   r  ~  s   
z2RadixCache.all_values_flatten.<locals>._dfs_helperrf   rI   )r   r   r   r4   r-   r  r.   all_values_flatten{  s   

zRadixCache.all_values_flattenc           	      C  s   t  }||_| |}g }t|dkrj||j v rj|j| }||_| |j|}|t|jk rF| 	|j||}|
|j |}	 ||fS |
|j |}||d  }t|r]| |}t|dkrj||j v s||fS rl   )rT   rU   rV   r   r3   rO   r   r   rQ   _split_noder   rR   )	r,   rf   rQ   access_time	child_keyrR   r  r   new_noder-   r-   r.   r     s*   

	
zRadixCache._match_prefix_helperr  r   c                 C  s   t |jd}| ||d  |i|_|j|_|j|_|jd | |_|jd |  |_||_|j|d  |_|j|d   |_||jj| |< t	|j
|| j\|_
|_
|S )Nr   )rI   rL   r   rO   rP   rS   rQ   rR   cloner   r[   rw   )r,   rQ   r  r   r  r-   r-   r.   r    s   
zRadixCache._split_noder   rL   c           
      C  sp  |d u rd}t  }||_t|j||_t|dkrdS | |}d}t|dkr||j v r|j| }||_| 	|j
|}||7 }||d  }||d  }|t|j
k rh| |j
||}	t|	j||	_|	}nt|j||_t|rx| |}t|dkr||j v s0t|rt|d}	||	_||	_
| |	_|	|j|< |  jt|7  _| | | |	 | |	 |S )Nr   r   )rT   rU   rV   maxrL   r3   r   rO   r   r   rQ   r  rI   rP   r  rR   r   r
  _record_store_event)
r,   rf   rQ   rR   rL   r  r  total_prefix_lengthr   r  r-   r-   r.   r     sF   








zRadixCache._insert_helperindentc                 C  s   ||fg}|rP|  \}}td| t|j|jjdd d|j  |j D ]#\}}|||d f || 	|jksKJ d|d| 	|jq(|sdS dS )z1Prints the radix tree in a human-readable format. NrA   zr=   zkey=z#, self.get_child_key_fn(child.key)=)
popr   r3   rQ   r%   rS   rO   r  r   r   )r,   rf   r  stackcurrent_nodecurrent_indentrQ   r  r-   r-   r.   r     s"   

zRadixCache._print_helperc                 C  sl   |  |j}|jj|d }||ksJ d| |  jt|j8  _|| jv r.| j| | 	|j d S )Nz parent does not have child key, )
r   rQ   rP   rO   r!  r   r3   r   remover
  )r,   rf   rQ   vr-   r-   r.   r     s   
zRadixCache._delete_leafc                 C  sz   |j s|jdkr|| jv r| j| d S |j D ]}|j s-|| jv r*| j|  d S q|| jvr;| j| d S d S rl   )r]   rS   r   r%  rO   r   add)r,   rf   r  r-   r-   r.   r
     s   


zRadixCache._update_leaf_statusc                 C  sP   d}| j g}|r&| }|t|j7 }|j D ]}|jrq|| q|s|S rl   )r   r!  r3   rR   rO   r   r]   r   )r,   r   r"  r#  r  r-   r-   r.   r     s   zRadixCache._total_size_helperc                 C  s   | j rm|jd u rt|| j|_d }|jd ur2|j| jkr2|jjd ur2t|jjdkr2t|jjd }d}tdt|j	| jD ]0}|j	j
||| j  }|sNq>t|j| }| jt|g||t|d td |}|d7 }q>d S d S )Nr   rc   )block_hashesparent_block_hashr%   
block_sizelora_idmediumrN   )r   r[   r   rw   rP   r   r3   r"   r   rQ   r%   r   r   r   r   )r,   rf   r)  
page_indexr   r   
block_hashr-   r-   r.   r    s8   

zRadixCache._record_store_eventc                 C  s   | j rB|jd u rt|| j|_d}tdt|j| jD ](}|jj||| j  }|s+qt|j| }| j	
t|gtd |d7 }qd S d S )Nr   )r(  r,  rN   )r   r[   r   rw   r   r3   rQ   r%   r"   r   r   r   r   )r,   rf   r-  r   r   r.  r-   r-   r.   r   A  s   

zRadixCache._record_remove_eventc                 C  s   | j r| jt  d S d S r+   )r   r   r   r   r4   r-   r-   r.   r   V  s   z$RadixCache._record_all_cleared_eventc                 C  s   | j sg S | j}g | _|S )zsAtomically takes all events and clears the queue.

        Returns:
            A list of KV cache events.
        )r   r   )r,   eventsr-   r-   r.   take_eventsZ  s
   zRadixCache.take_events)r   r   )FNrN   F)
r   r*   r   r   rw   r1   r   r*   r0   r   r+   )rQ   r$   rR   r   r0   r   )r   r   r0   r   )r   r   r0   r   )rQ   r   r0   r   )T)r   r#   r   r*   )F)r   r#   )r   r   r0   r   r  )rf   rI   rQ   r$   )rQ   r$   r  rI   r   r1   r   )rf   rI   rQ   r$   rL   r1   )rf   rI   r  r1   ) rF   rG   rH   r/   classmethodr   r   r   r   r   r   r   r   r   r   r  r   r   r  r  r  r   r  r   r   r   r
  r   r  r   r   r0  r-   r-   r-   r.   r     sF    
/


H
/A





-



&r   __main__)rN   r      r   r   )rN   r         )rN   r   r4  r5        )   	   rA         )rN   r   r3        )rn   r$   ro   r$   )rn   r$   ro   r$   rw   r1   )rN   )rQ   r$   rw   r1   )rf   rj   rw   r1   r0   rg   )r   r   r   r1   rw   r1   r0   r   )D
__future__r   &sglang.srt.mem_cache.cache_init_paramsr   sglang.srt.mem_cache.utilsr   r   loggingr   rT   collectionsr   	functoolsr   r   typingr   r	   r
   r   r   r   r   r   	getLoggerrF   logger#sglang.srt.disaggregation.kv_eventsr   r   r   r   &sglang.srt.mem_cache.base_prefix_cacher   r   r   r   r   r   r   !sglang.srt.mem_cache.evict_policyr   r   r   r   r   r   r    $sglang.srt.mem_cache.hicache_storager!   r"   "sglang.srt.managers.schedule_batchr#   r$   rI   rq   rv   rz   r}   r   r   r   r   treer   r   r   r   r-   r-   r-   r.   <module>   sf    $
$$		
?




!    
f