o
    پiz                     @  s@  d dl mZ 	 	 d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZmZmZmZmZmZ d dlm Z  d dl!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' erzd dl(m)Z) d dl*m+Z+ d dl,Z,e,-e.Z/G dd dZ0dddZ1G dd dZ2G dd deZ3dS )    )annotationsN)defaultdict)partial)TYPE_CHECKINGListOptionalTuple)float64)get_tensor_model_parallel_rank)
CHUNK_SIZE)PagedTokenToKVPoolAllocatorTokenToKVPoolAllocator)BasePrefixCacheEvictParamsEvictResultInsertParamsInsertResultMatchPrefixParamsMatchResult)HybridReqToTokenPool)RadixKey_key_match_page_size1_key_match_pagedget_child_key)get_global_server_args)Req)CacheInitParamsc                   @  sF   e Zd ZdZedZddddZedd	 Zed
d Z	dddZ
dS )TreeNoder         ?NidOptional[int]c                 C  s   t t| _d | _d | _d | _d | _d| _d| _t	 | _
d| _d | _d | _d | _d | _d | _|d u r4tjn|| _t jd7  _d S )Nr      )r   r   childrenparentkeyvaluemamba_valuefull_lock_refmamba_lock_refget_last_access_timelast_access_time	hit_count
host_valueprevnext
mamba_prev
mamba_nextcounterr   )selfr    r3   Z/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/mem_cache/mamba_radix_cache.py__init__D   s    
zTreeNode.__init__c                 C  s
   | j d u S N)r%   r2   r3   r3   r4   evictedc      
zTreeNode.evictedc                 C  s
   | j d uS r6   )r,   r7   r3   r3   r4   backupedg   r9   zTreeNode.backupedother
'TreeNode'c                 C  s   | j |j k S r6   )r*   )r2   r;   r3   r3   r4   __lt__k   s   zTreeNode.__lt__r6   )r   r    )r;   r<   )__name__
__module____qualname__r1   r	   last_access_time_counter_floatr5   propertyr8   r:   r=   r3   r3   r3   r4   r   ?   s    

r   returnr	   c                  C  s   t j} t  jd7  _| S )Nr   )r   rA   )retr3   r3   r4   r)   o   s   r)   c                   @  s   e Zd Zd0d1ddZdd Zdd	 Zd
d Zd2ddZdd Zdd Z	dd Z
d3ddZd2ddZd2ddZ	d4d5d d!Zd4d6d"d#Zd7d$d%Zd8d9d)d*Zd+d, Zd:d.d/Zd&S );LRUListFmambaboolc                 C  sp   || _ | j rd| _d| _d| _n	d| _d| _d| _t | _t | _t| j| j| j t| j| j| j i | _d S )Nr/   r0   r(   r-   r.   r'   )	rF   prvnxtlock_refr   headtailsetattrcache)r2   rF   r3   r3   r4   r5   v   s   
zLRUList.__init__c                 C  s   |  | j| dS )z8Helper to add node right after head (most recently used)N)_add_node_afterrK   r2   noder3   r3   r4   	_add_node   s   zLRUList._add_nodec                 C  sL   t || j| t || jt|| j t t|| j| j| t || j| dS )z'Helper to add node right after old_nodeN)rM   rH   rI   getattr)r2   old_nodenew_noder3   r3   r4   rO      s   zLRUList._add_node_afterc                 C  s@   t t|| j| jt|| j t t|| j| jt|| j dS )z&Helper to remove node from linked listN)rM   rS   rH   rI   rP   r3   r3   r4   _remove_node   s   zLRUList._remove_noderC   Optional[TreeNode]c                 C  s    t | jdkr	dS t| j| jS )z2
        Get the least recently used node
        r   N)lenrN   rS   rL   rH   r7   r3   r3   r4   _get_lru   s   zLRUList._get_lruc                 C  sV   |j | jv sJ d|j d| jr|jdusJ d|j | | | | dS )zG
        Move a (existing) node to most recently used position
        Resetting node node.id= not in lru listNz:Resetting mamba tombstone node in mamba lru list: node.id=)r   rN   rF   r&   rV   rR   rP   r3   r3   r4   reset_node_mru      

zLRUList.reset_node_mruc                 C  sl   | j }||kr4| jr|jdur+|j| jv sJ d|jd| | | || |}|j}||ksdS dS )z
        Move an (existing) node and its parents to most recently used position. Child node is
        more recently used than parent node.
        NrZ   z4 not in lru list when resetting node and parents mru)rK   rF   r&   r   rN   rV   rO   r#   )r2   rQ   	root_node	prev_noder3   r3   r4   reset_node_and_parents_mru   s   
z"LRUList.reset_node_and_parents_mruc                 C  sf   | j r|jdusJ d|j|j| jvs&J d|jd| j|j j|| j|j< | | dS )z;
        Insert a (new) node as most recently used
        Nz:Inserting mamba tombstone node in mamba lru list: node.id=zInserting node node.id=z< already in lru list, existing node: self.cache[node.id].id=)rF   r&   r   rN   rR   rP   r3   r3   r4   
insert_mru   s   
zLRUList.insert_mrurQ   r   c                 C  sV   |j | jv sJ d|j d| jr|jdusJ d|j | j|j = | | dS )z+
        Remove node from lru list
        zRemoving node node.id=r[   Nz;Removing mamba tombstone node from mamba lru list: node.id=)r   rN   rF   r&   rV   rP   r3   r3   r4   remove_node   r]   zLRUList.remove_nodec                 C     | j | jddS )zE
        Get the least recently used node that is not locked
        Fcheck_id)get_prev_no_lockrL   r7   r3   r3   r4   get_lru_no_lock      zLRUList.get_lru_no_lockc                 C  rc   )zJ
        Get the least recently used leaf node that is not locked
        Frd   )get_prev_leaf_no_lockrL   r7   r3   r3   r4   get_leaf_lru_no_lock   rh   zLRUList.get_leaf_lru_no_lockTre   c                 C  sl   |r|j | jv sJ d|j dt|| j}t|| jdkr-t|| j}t|| jdks|| jkr4dS |S )zT
        Get the previous (i.e. more recently used) node that is not locked
        Getting prev of node node.id=r[   r   N)r   rN   rS   rH   rJ   rK   r2   rQ   re   xr3   r3   r4   rf      s   
zLRUList.get_prev_no_lockc                 C  s   |r|j | jv sJ d|j dt|| j}t|| jdks&t|jdkr;t|| j}t|| jdks&t|jdks&|| jkrBdS |S )zY
        Get the previous (i.e. more recently used) leaf node that is not locked
        rk   r[   r   N)r   rN   rS   rH   rJ   rX   r"   rK   rl   r3   r3   r4   ri      s   
zLRUList.get_prev_leaf_no_lockc                 C  s   |sdS |j | jv S )z6
        Check if the node is in the lru list
        F)r   rN   rP   r3   r3   r4   in_list  s   zLRUList.in_listN
tree_cacheOptional['MambaRadixCache']c                 C  s   d| j d}|  }|dur3|j| jv r3|d|j d|jdd7 }t|| j}|dur3|j| jv st| |s;dS d| j d}| j rJ| }n|	 }t
| t|rnt
|}|d|j d|jdd7 }t|sWt| dS )	z+
        Pretty print the lru list
        self.mamba=z LRU list: N[z] fz -> z% Nodes (sorted by last_access_time): )rF   rY   r   rN   r*   rS   rH   print_collect_nontombstone_nodes_collect_all_nodesheapqheapifyrX   heappop)r2   ro   msgx_lrunodesrm   r3   r3   r4   pretty_print  s&   


zLRUList.pretty_printc                 C  sL   |   }d}| |r$|| jst|jnt|j7 }| |}| |s|S )z[
        Check the evictable size (i.e. the size of the nodes that are not locked)
        r   )rg   rn   rF   rX   r%   r&   rf   )r2   rQ   evictable_sizer3   r3   r4   sanity_check_evictable_size)  s   


z#LRUList.sanity_check_evictable_size'MambaRadixCache'c                 C  s  z| j r	| }n| }t|}t| j}t| t||| j r$dnd ks4J dt| d| |  }t|rt|}||j	krGq8|durQ|j
| jv s\J d|d|j
||ksxJ d| j d	|j
d
|j
d|jd|j
|jdksJ d|jd|j
|jdksJ d|jd|j
t|| j}t|s<| j r| }|  }n| }|  }||ksJ d| j d| d| d| d| 
W dS  ty	 }	 z*t dkrd|	 }
t|
 |  |j| |j| t|
W Y d}	~	dS d}	~	ww )z
        Check if the lru list is valid by rebuilding the lru list from the tree, heapifying it, and
        checking if the lru list is valid.
        r   r!   zlen(nodes): z, total_lru: Nz9Incorrect LRU list, x_lru is None or not in cache: x_lru=z, x.id=zIncorrect LRU list, self.mamba=z
, x: x.id=z != x_lru: x_lru.id=z, x.last_access_time=z, x_lru.last_access_time=z:x_lru should not be locked when idle, x_lru.full_lock_ref=z, x_lru.id=z;x_lru should not be locked when idle, x_lru.mamba_lock_ref=rq   z, total nodes: z, total lru: z, evictable size: z != lru list evictable size: z9Mamba Radix tree sanity check failed, ping @yizhang2077: )rF   ru   rv   rX   rN   rw   rx   rY   ry   r^   r   r*   r'   r(   rS   rH   mamba_evictable_sizer   full_evictable_size	Exceptionr
   loggererrorr}   full_lru_listmamba_lru_list)r2   ro   r|   total_nodes	total_lrur{   rm   r~   lru_list_evictable_sizeerz   r3   r3   r4   sanity_check7  sd   





*

"



zLRUList.sanity_checkF)rF   rG   )rC   rW   rQ   r   T)rQ   r   re   rG   rC   rW   )rQ   r   re   rG   )rQ   rW   r6   )ro   rp   )ro   r   )r>   r?   r@   r5   rR   rO   rV   rY   r\   r`   ra   rb   rg   rj   rf   ri   rn   r}   r   r   r3   r3   r3   r4   rE   u   s&    
	



rE   c                   @  s  e Zd ZdxddZdyddZdzd
dZd{ddZd|ddZd}d~ddZddddZ	dzddZ
dd d!Zdd&d'Zdd*d+Zdd.d/Zdd1d2Zdd5d6Zdd7d8Zd9d: Zdd;d<Zdd=d>Zdd?d@ZddAdBZddCdDZddEdFZddHdIZddJdKZddMdNZddRdSZddUdVZdd[d\Zdd_d`ZddbdcZ ddedfZ!ddgdhZ"ddidjZ#ddkdlZ$ddndoZ%ddpdqZ&ddsdtZ'ddudvZ(dwS )MambaRadixCacheparamsr   c                 C  s   t |jtst |jtsJ |j| _|j| _|j| _|j| _|j| _| js2| jdks2J d| j | jr;| jj| _nt	d| _|j
rH|   | jdkrTt| _t| _ntt| jd| _tt| jd| _|   d S )Nr!   z0Page size must be 1 for MambaRadixCache v1, got cpu)	page_size)
isinstancetoken_to_kv_pool_allocatorr   r   req_to_token_poolr   disableenable_mamba_extra_bufferdevicetorchenable_metricsinit_metrics_collectorr   key_match_fnr   get_child_key_fnr   r   reset)r2   r   r3   r3   r4   r5   t  s2   


zMambaRadixCache.__init__rC   rG   c                 C  s   dS )NTr3   r7   r3   r3   r4   supports_mamba  s   zMambaRadixCache.supports_mambaNonec                 C  sb   t  | _tg d | j_g | j_d| j_d| j_d| _d| _d| _	d| _
tdd| _tdd| _d S )Nr!   r   F)rF   T)r   r^   r   r$   r%   r'   r(   full_evictable_size_mamba_evictable_size_full_protected_size_mamba_protected_size_rE   r   r   r7   r3   r3   r4   r     s   zMambaRadixCache.resetr   r   c                 C  sT   |  |}|du rttjdtj| jd| j| jdS | |\}}}| ||||S )a  Find the matching prefix from the radix tree.
        Args:
            params: MatchPrefixParams containing key and optional Mamba-specific parameters.
        Returns:
            A tuple of a tensor of matching prefix token IDs and
            the last node that contains the prefix values. Note that
            this API can modify the internal state of the Radix tree.
            The last node create a new child if the prefix is shorter
            than the last node's value.
        Nr   dtyper   )device_indiceslast_device_nodelast_host_node)	_match_pre_processorr   r   emptyint64r   r^   _match_prefix_helper_match_post_processor)r2   r   r$   r%   	last_nodebest_value_lenr3   r3   r4   match_prefix  s   

zMambaRadixCache.match_prefixr   r   c                 C  sj   | j r	tdddS |j}|j}|j}|d u r$tjdd |jD tjd}| 	| j
|||\}}t||dS )Nr   F)
prefix_lenmamba_existc                 S  s   g | ]}|qS r3   r3   ).0rm   r3   r3   r4   
<listcomp>  s    z*MambaRadixCache.insert.<locals>.<listcomp>)r   )r   r   r$   r%   r&   r   tensor	token_idsr   _insert_helperr^   )r2   r   r$   r%   r&   r   r   r3   r3   r4   insert  s   
zMambaRadixCache.insertTreqr   	is_insertc                 C  s@  |  }| jr | jj|jd|f }| j| | j| dS |j|j	 d| }| jj|jd|f }|r| j
r=|jnt|}|du rGd}|t|krit||j}| j||d  |d| }|d| }| jdkrt|| j | j }|d| jtjdd}	nt|}|jtjdd}	||ksJ d|d|d|d	t|jd
t|j	d| j
r| j|j}
|j|
 d }n
|jd }d}
| tt|d| |j|	|d}|j|j}}| j||j|  n| j||jd  d}|rd}
| j
rdn|}|r| jj||
d | |j  dS )zCache request when it finishes.Nr   r!   Tr   copyzIt is required cache_len=z, page_aligned_len=z, kv_committed_len=z, len(req.origin_input_ids)=z, len(req.output_ids)=z" ping @yizhang2077 if you see thisr$   r%   r&   )$mamba_ping_pong_track_buffer_to_keep)!pop_committed_kv_cacher   r   req_to_tokenreq_pool_idxr   freefree_mamba_cacheorigin_input_ids
output_idsr   mamba_last_track_seqlenrX   maxcache_protected_lenr   tor   r   get_mamba_ping_pong_other_idxmamba_next_track_idxmamba_ping_pong_track_buffer	unsqueezeclonemamba_pool_idxr   r   r   	extra_keyr   r   dec_lock_refr   )r2   r   r   kv_committed_len
kv_indicesr   	cache_lencache_end_idxpage_aligned_lenpage_aligned_kv_indicesr   r&   resultnew_prefix_lenr   r   r3   r3   r4   cache_finished_req  s   

.z"MambaRadixCache.cache_finished_reqFc                   s  d fdd}|j } jr|jnt|} js|du r||S  jj|jdt|f }|d| } jdkrNt| j  j }|d| j	t
jd	d
}	nt|}|j	t
jd	d
}	|t|ksvJ d|dt|d|d jdt
|d| }
 jr j|j}|j| d }n
 j|jd} jj|}|du r tddd  jj|}|dusJ d tt|
|j|	|d}|j|j}} j||j|  |r jj|   t!t|
|jd}|j"|j#}}|st
$|j%|sJ |jt| j d ks)J d|jdt|dt|
d||t|ks<J d|dt| j&|jt'|jt|f||jd   (|j)  *| t
+||t|d g|_,t||_d|_||_)dS )z$Cache request when it is unfinished.r   r   rC   r   c                   s2    j j| jd t| jf }|jtjdd| _d S )NTr   )	r   r   r   rX   fill_idsr   r   r   prefix_indices)r   r   r7   r3   r4   _skip_cache_unfinished_req/  s
   zHMambaRadixCache.cache_unfinished_req.<locals>._skip_cache_unfinished_reqNr!   Tr   z6page_aligned_len != len(kv_indices), page_aligned_len=z, len(kv_indices)=z, cache_len=z, self.page_size=z, FLA_CHUNK_SIZE=r   r   
num_tokens	mamba_numCan not alloc mamba cacher   )r$   zreq.cache_protected_len=z, len(new_indices)=z, len(page_aligned_token_ids)=z, mamba_exist=znew_prefix_len=r   r   rC   r   )-r   r   r   rX   r   r   r   r   r   r   r   r   FLA_CHUNK_SIZEr   r   r   r   r   get_mamba_indices
mamba_pool	fork_fromevictr   r   r   r   r   r   r   r   r   r   r   r   r   r   equalr&   writeslicer   r   inc_lock_refcatr   )r2   r   chunkedr   r   r   kv_indices_origr   r   r   page_aligned_token_idsr   r&   mamba_value_forkedr   r   r   match_resultnew_indicesnew_last_noder3   r7   r4   cache_unfinished_req,  s   	
&
$



z$MambaRadixCache.cache_unfinished_reqc                 C  s2   |  | jd |  \}}td| d|  d S )Nr   z#full_tokens: z, #mamba_num: )_print_helperr^   _total_size_helperrt   )r2   
total_sizetotal_mamba_sizer3   r3   r4   r}     s   zMambaRadixCache.pretty_printTuple[int, int]c                 C  s   |   S r6   )r   r7   r3   r3   r4   r     s   zMambaRadixCache.total_sizerm   r   is_evict_mamba#Tuple[int, int, TreeNode, TreeNode]c                 C  s   |j dkr
|jdksJ d|jd|j d|j|jd us'J d|j| j|j t|j}| jj	|j t|j}|rI| j
|}n| j|}| j| | j
| | | | |\}}||7 }||||fS )Nr   z"evict leaf node invalid with x.id=z x.full_lock_ref=z x.mamba_lock_ref=z(leaf node mamba value is not None, x.id=)r'   r(   r   r&   r   r   r%   rX   r   r   r   rf   r   ri   rb   _delete_leaf"_iteratively_delete_tombstone_leaf)r2   rm   r   full_num_evictedmamba_num_evictedx_nextleaf_full_num_evictedr3   r3   r4   _evict_leaf_node  s"   


z MambaRadixCache._evict_leaf_noder   r   c                 C  sL   | j rt S d}d}|jdkr| |j}|jdkr | |j}t||dS )Nr   )num_tokens_evictedr  )r   r   r   
evict_fullr   evict_mamba)r2   r   r  r  r3   r3   r4   r     s   

zMambaRadixCache.evictr   intc                 C  s4  | j s|dkr	dS | j }d}||k r| j|r|jdus'J d|jt|jdks<J d|jdt|j|| jksIJ d|j|jdksVJ d|jt|j	dkr~| j
j|j |t|j7 }| j|}| j| | | n| |d	\}}}}||7 }|}||k r| j|s|S )
z?Evict mamba states. Returns the number of mamba states evicted.r   Nznode has no mamba value, x.id=r!   z%node has abnormal mamba length, x.id=z, len(x.mamba_value)=z!root node is not evictable, x.id=z)node is in use by mamba kv indices, x.id=T)r   r   rg   rn   r&   r   rX   r^   r(   r"   r   r   r   rf   rb   _tombstone_internal_noder  )r2   r   rm   r  r  _mamba_evicted_deltar3   r3   r4   r	    s,   
zMambaRadixCache.evict_mambafull_num_tokensc                 C  s   | j s|dkr	dS d}| j }||k rN| j|rN|| jks'J d|j| |d\}}}}||7 }t|jj	dkrB| j }|}||k rN| j|s|S )z:Evict full KV cache. Returns the number of tokens evicted.r   z2root node should not exist in full lru list, x.id=F)
r   r   rj   rn   r^   r   r  rX   r#   r"   )r2   r  r  rm   full_num_evicted_deltar  r  r3   r3   r4   r    s   


zMambaRadixCache.evict_fullrQ   r    c                 C  s   | j rdS |jdur*|jdkr#|  jt|j8  _|  jt|j7  _| jd7  _|| jkrh|jdks@J d|jd|j|jdkrY|  j	t|j
8  _	|  jt|j
7  _| jd7  _|j}|| jks/dS )z
        Increment the lock reference count for the node.
        It locks the full_lock_ref for nodes between the [last node, root), exclusive.
        It locks the mamba_lock_ref for current node if its mamba_value exists.
        Nr   r!   z-inc_lock_ref on node with node.full_lock_ref=
, node.id=)r   r&   r(   r   rX   r   r^   r'   r   r   r%   r   r#   rP   r3   r3   r4   r     s$   





zMambaRadixCache.inc_lock_refc                 C  s   | j rdS |jdur;|jdksJ d|jd|j|jdkr4|  jt|j7  _|  jt|j8  _| jd8  _|| jkr{|jdksQJ d|jd|j|jdkrj|  j	t|j
7  _	|  jt|j
8  _| jd8  _|j}|| jks@dS dS )z
        Decrement the lock reference count for the node.
        It unlocks the full_lock_ref for nodes between the [last node, root), exclusive.
        It unlocks the mamba_lock_ref for current node if its mamba_value exists.
        Nr   z.dec_lock_ref on node with node.mamba_lock_ref=r  r!   z-dec_lock_ref on node with node.full_lock_ref=)r   r&   r(   r   r   rX   r   r^   r'   r   r%   r   r#   rP   r3   r3   r4   r   0  s(   



zMambaRadixCache.dec_lock_refc                 C  s&   | j rd S | j|  | j|  d S r6   )r   r   r   r   r7   r3   r3   r4   r   L  s   zMambaRadixCache.sanity_checkc                 C     t r6   NotImplementedErrorr7   r3   r3   r4   r~   R     zMambaRadixCache.evictable_sizec                 C     | j S r6   )r   r7   r3   r3   r4   r   V     z#MambaRadixCache.full_evictable_sizec                 C  r  r6   )r   r7   r3   r3   r4   r   Y  r  z$MambaRadixCache.mamba_evictable_sizec                 C  r  r6   r  r7   r3   r3   r4   protected_size\  r  zMambaRadixCache.protected_sizec                 C  r  r6   )r   r7   r3   r3   r4   full_protected_size`     z#MambaRadixCache.full_protected_sizec                 C  r  r6   )r   r7   r3   r3   r4   mamba_protected_sized  r  z$MambaRadixCache.mamba_protected_sizetorch.Tensorc                   >   g d fdd  | j  tdkrtS tg S )NrQ   r   c                   s,   | j  D ]\}}|j  | qd S r6   )r"   itemsappendr%   rQ   r  child_dfs_helpervaluesr3   r4   r"  k  s   
z7MambaRadixCache.all_values_flatten.<locals>._dfs_helperr   r   r^   rX   r   r   r   r7   r3   r!  r4   all_values_flattenh  s   
 z"MambaRadixCache.all_values_flattenc                   r  )NrQ   r   c                   s6   | j d ur| j  | j D ]\}} | qd S r6   )r&   r  r"   r  r  r!  r3   r4   r"  v  s
   

z=MambaRadixCache.all_mamba_values_flatten.<locals>._dfs_helperr   r   r$  r7   r3   r!  r4   all_mamba_values_flattens  s   
 z(MambaRadixCache.all_mamba_values_flattenstrc              	   C  s:   | j  }|  }d||  d|d|d| j  d	S )NzAvailable full tokens: z (full_available_size=z + full_evictable_size=z )
Full LRU list evictable size: 
)r   available_sizer   r   r   )r2   full_available_sizer   r3   r3   r4   available_and_evictable_str  s   
z+MambaRadixCache.available_and_evictable_strr$   r   (Tuple[List[torch.Tensor], TreeNode, int]c           
      C  s   | j }| |}g }d}|}t|dkrn||j v rn|j| }|jdur+t|}|}| |j|}|t|jk rJ| |j||}	|	|	j
 |	}n$|	|j
 |}||d }t|ra| |}t|dkrn||j v s|jduryt|}|}|||fS )a^  
        Mamba prefix matching helper. It factors in the sliding window size such that
        the matched node is guaranteed to either 1. connected to root without mamba tombstone,
        or 2. the number of matching tokens from the matched node to the last mamba tombstone
        node is greater than or equal to the sliding window size.
        r   N)r^   r   rX   r"   keysr&   r   r$   _split_noder  r%   )
r2   r$   rQ   	child_keyr%   r   best_last_noder   r   rU   r3   r3   r4   r     s4   	





z$MambaRadixCache._match_prefix_helperOptional[RadixKey]c                 C  s    |j }| jst|dkrdS |S )z#Preprocess the key before matching.r   N)r$   r   rX   )r2   r   r$   r3   r3   r4   r     s   z$MambaRadixCache._match_pre_processorr%   List[torch.Tensor]r   r   c                 C  s  |j }|j}|}| j|| j | j|| j t }|r)||_|d8 }|j}|st	||krIt
 j}	tdd |D |	 |	 }
|
dkrF|
nd}nd}|r|jdur|jdu r| jjd}|du r| | | tddd | jjd}| | |dusJ d|j}| jj|| |d |_n|j}|jd}| jj|| |d| }|rt|}n
tjd	tj| jd
}t||||dS )z Post-process the matched result.gh㈵>c                 s  s    | ]}t |V  qd S r6   )rX   )r   vr3   r3   r4   	<genexpr>  s    z8MambaRadixCache._match_post_processor.<locals>.<genexpr>r   Nr!   r   r   r   r   )r   r   r   mamba_branching_seqlen)	cow_mambar   r   r`   r^   r   r)   r*   r#   rX   r   mamba_cache_chunk_sizesumr&   r   r   r   allocr   r   r   r   	copy_fromr   r   r   r   r   r   r   )r2   r   r%   r   r   r6  r   node_updatecur_timer7   mamba_cache_chunk_aligned_seqlenr5  	dst_index	src_indexr3   r3   r4   r     s`   	


z%MambaRadixCache._match_post_processorr   	split_lenc                 C  s   t  }| ||d  |i|_|j|_d |_|j|_d|_|jd | |_|jd | 	 |_t
 |_| j| |jd urD| j| ||_|j|d  |_|j|d  	 |_||jj| |< | j| | j| |jd ury| j| |S Nr   )r   r   r"   r#   r&   r'   r(   r$   r%   r   r)   r*   r   rb   r   ra   )r2   r$   r   r@  rU   r3   r3   r4   r.    s*   

zMambaRadixCache._split_nodeTuple[int, bool]c           
      C  s  |d usJ dt  |_|| jkr"| j| |jd ur"| j| t|dkr*dS | |}d}t|dkr||j	
 v r|j	| }t  |_| j| |jd urX| j| | |j|}||7 }||d  }||d  }|t|jk r| |j||}|}t|r| |}t|dkr||j	
 v s>d}	t|rt }||_||_| |_||_| j| | j| ||j	|< |  jt|7  _|  jt|7  _||	fS |jd u r||_| j| | j| |  jt|7  _t  |_||	fS d}	| j| | j| t  |_||	fS )Nz$Mamba value should not be None here.r   )r   TFT)r)   r*   r^   r   r\   r&   r   rX   r   r"   r-  r   r$   r.  r   r#   r   r%   ra   r   r   )
r2   rQ   r$   r%   r&   r/  total_prefix_lengthr   rU   mamba_value_existr3   r3   r4   r   $  sf   	








zMambaRadixCache._insert_helperTuple[TreeNode, int]c                 C  s   d}|j jd u rmt|j jdkrm|j | jkr	 ||fS |j jdkr&	 ||fS |j jdks?J d|j jd|j jd|j j| j	|j j
 |t|j j
7 }| j|j  | |j  |j }|j jd u rmt|j jdks||fS )Nr   zGtombstone mamba_lock_ref should always be 0, node.parent.full_lock_ref=z, node.parent.mamba_lock_ref=z, node.parent.id=)r#   r&   rX   r"   r^   r'   r(   r   r   r   r%   r   rb   _delete_tombstone_leaf)r2   rQ   r  r3   r3   r4   r  e  s$    z2MambaRadixCache._iteratively_delete_tombstone_leafc                 C  s   |j d usJ d|jt|jdksJ d|j| |j}|jj|d }||ks5J d| |  jt|j8  _|  j	t|j 8  _	d S )Nz6Invariant violated: leaf node is a tombstone, node.id=r    leaf node has children, node.id= parent does not have child key, )
r&   r   rX   r"   r   r$   r#   popr   r   r2   rQ   r$   r3  r3   r3   r4   r   |  s   
zMambaRadixCache._delete_leafc                 C  s<   t |jdksJ d|j|  jt |j8  _d |_d S )Nr   z&Cannot tombstone a leaf node, node.id=)rX   r"   r   r   r&   rP   r3   r3   r4   r    s   
z(MambaRadixCache._tombstone_internal_nodec                 C  s   |j d u sJ d|jt|jdksJ d|j| |j}|jj|d }||ks5J d| |  jt|j8  _d S )Nz7Deleting a unexpected non-tombstone leaf node, node.id=r   rG  rH  )	r&   r   rX   r"   r   r$   r#   rI  r   rJ  r3   r3   r4   rF    s   
z&MambaRadixCache._delete_tombstone_leafList[TreeNode]c                 C  sD   g }| j g}|r | }|jd ur|| ||j  |s|S r6   )r^   rI  r&   r  extendr"   r#  r2   ret_liststackcur_noder3   r3   r4   ru     s   

z+MambaRadixCache._collect_nontombstone_nodesc                 C  s:   g }| j g}|r| }|| ||j  |s|S r6   )r^   rI  r  rL  r"   r#  rM  r3   r3   r4   rv     s   
z"MambaRadixCache._collect_all_nodesindentc                 C  s   ||fg}|ri|  \}}td| d|j dt|jd|j d|j d| j| d| j	| d|j
  |j D ]#\}}|||d	 f || |jksdJ d
|d| |jqA|sdS dS )z1Prints the radix tree in a human-readable format. rr   ]zfr=zmr=zfll=zmll=zmv=   zkey=z#, self.get_child_key_fn(child.key)=N)rI  rt   r   rX   r$   r'   r(   r   rn   r   r&   r"   r  r  r   )r2   rQ   rQ  rO  current_nodecurrent_indentr$   r   r3   r3   r4   r     s*   




zMambaRadixCache._print_helperc                 C  sp   d}d}| j g}|r4| }|t|j7 }|jd ur!|t|j7 }|j D ]}|jr,q&|| q&|s
||fS rA  )	r^   rI  rX   r%   r&   r"   r#  r8   r  )r2   r   r   rO  rU  r   r3   r3   r4   r     s   
	z"MambaRadixCache._total_size_helperN)r   r   )rC   rG   )rC   r   )r   r   rC   r   )r   r   rC   r   r   )r   r   r   rG   rC   r   r   r   )rC   r   )rm   r   r   rG   rC   r   )r   r   rC   r   )r   r
  rC   r
  )r  r
  rC   r
  )rQ   r   rC   r    r   )rC   r
  )rC   r  )rC   r'  )r$   r   rC   r,  )r   r   rC   r1  )
r   r   r%   r2  r   r   r   r
  rC   r   )r$   r   r   r   r@  r
  rC   r   )rQ   r   r$   r   rC   rB  )rQ   r   rC   rE  )rQ   r   rC   r   )rC   rK  )rQ   r   rQ  r
  rC   r   ))r>   r?   r@   r5   r   r   r   r   r   r   r}   r   r  r   r	  r  r   r   r   r~   r   r   r  r  r  r%  r&  r+  r   r   r   r.  r   r  r   r  rF  ru   rv   r   r   r3   r3   r3   r4   r   s  sN    

"


\
v




#













*
	
J

A





	r   )rC   r	   )4
__future__r   rw   collectionsr   	functoolsr   typingr   r   r   r   r   numpyr	   sglang.srt.distributedr
   -sglang.srt.layers.attention.fla.chunk_delta_hr   r   sglang.srt.mem_cache.allocatorr   r   &sglang.srt.mem_cache.base_prefix_cacher   r   r   r   r   r   r    sglang.srt.mem_cache.memory_poolr    sglang.srt.mem_cache.radix_cacher   r   r   r   sglang.srt.server_argsr   "sglang.srt.managers.schedule_batchr   &sglang.srt.mem_cache.cache_init_paramsr   logging	getLoggerr>   r   r   r)   rE   r   r3   r3   r3   r4   <module>   s4    $	

0 