o
    پi.'                     @  s   d dl mZ d dlZd dlZd dlmZmZmZ d dlZd dl	m
Z
mZmZmZmZ d dlmZmZmZ d dlmZ erNd dlmZ d dlmZ d d	lmZ eeZG d
d de
ZdS )    )annotationsN)TYPE_CHECKINGListSet)BasePrefixCacheEvictParamsEvictResultMatchPrefixParamsMatchResult)IOHandleRadixTreeCppTreeNodeCpp)RadixKey)Req)CacheInitParams)
ServerArgsc                   @  s   e Zd Z	d7d8dd	Zd9ddZdd Zd:ddZd;ddZd<ddZd<d d!Z	d=d$d%Z
d&d' Zd(d) Zd*d+ Zd>d?d0d1Zd7d@d2d3Zd4d5 Zd6S )ARadixCacheCppFparamsr   server_argsr   enable_write_cancelboolc                 C  s   |j | _ || _|jdu sJ dt | _t | _|jdkrdnd| _|j| _| jj	| _	|j
| _
|j| _| j | _|j| _|jrD|   |jsXt| j | jd | jd| _d | _d S td)NFz1HiRadixCache does not support kv cache events yetwrite_through      )disabled	page_size	host_sizewrite_through_thresholdHost cache is not supported yet)disabler   enable_kv_cache_eventssetongoing_write_throughongoing_load_backhicache_write_policyr   token_to_kv_pool_allocatordevicereq_to_token_poolr   get_kvcachekv_cachetp_cache_grouptp_groupenable_metricsinit_metrics_collectorenable_hierarchical_cacher   treecache_controllerNotImplementedError)selfr   r   r    r3   X/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/mem_cache/radix_cache_cpp.py__init__!   s6   
zRadixCacheCpp.__init__lList[torch.Tensor]returntorch.Tensorc                 C  s>   t |dkrtjdtj| jdS t |dkr|d S t|S )z
        Merge a list of tensors into a single tensor.
        Args:
            l (List[torch.Tensor]): List of tensors to merge.
        Returns:
            torch.Tensor: Merged tensor.
        r   )dtyper&   r   )lentorchemptyint64r&   cat)r2   r6   r3   r3   r4   _merge_tensorM   s
   
zRadixCacheCpp._merge_tensorc                 C  s    | j d ur	td| j  d S )Nr   )r0   r1   r/   resetr2   r3   r3   r4   rA   \   s   
zRadixCacheCpp.resetr	   r
   c                 C  s2   |j }| j|j\}}}}t| ||||dS )N)device_indiceslast_device_nodelast_host_nodehost_hit_length)keyr/   match_prefix	token_idsr
   r@   )r2   r   rG   device_indices_vechost_indices_lengthnode_gpunode_cpur3   r3   r4   rH   b   s   
zRadixCacheCpp.match_prefixrG   r   valueintc                 C  s>   | j |j|\}}| jdu rt|dksJ d|S td)aT  
        Insert a key-value pair into the radix tree.
        Args:
            key (RadixKey): The key to insert, represented as a RadixKey.
            value (torch.Tensor): The value to associate with the key.
        Returns:
            int: Number of device indices that were already present in the tree before the insertion.
        Nr   zImplementation errorr   )r/   writing_throughrI   r0   r;   r1   )r2   rG   rN   ongoing_writelengthr3   r3   r4   _insertn   s
   	
zRadixCacheCpp._insertnoder   c                 C     | j |d dS )z
        Decrement the reference count of a node to root of the radix tree.
        Args:
            node (TreeNodeCpp): The handle of the node to decrement the reference count for.
        FNr/   lock_refr2   rT   r3   r3   r4   dec_lock_ref~      zRadixCacheCpp.dec_lock_refc                 C  rU   )z
        Increment the reference count of from a node to root of the radix tree.
        Args:
            node (TreeNodeCpp): The handle of the node to increment the reference count for.
        TNrV   rX   r3   r3   r4   inc_lock_ref   rZ   zRadixCacheCpp.inc_lock_refr   r   c                 C  sV   t  }|j}| j|}d}|D ]}|t|7 }| j| q| || t	|dS )Nr   )num_tokens_evicted)
timeperf_counter
num_tokensr/   evictr;   r%   freeupdate_eviction_metricsr   )r2   r   
start_timer_   evicted_device_indicesnum_evictedindicer3   r3   r4   r`      s   
zRadixCacheCpp.evictc                 C  
   | j  S N)r/   evictable_sizerB   r3   r3   r4   ri         
zRadixCacheCpp.evictable_sizec                 C  rg   rh   )r/   protected_sizerB   r3   r3   r4   rk      rj   zRadixCacheCpp.protected_sizec                 C  rg   rh   )r/   
total_sizerB   r3   r3   r4   rl      rj   zRadixCacheCpp.total_sizeTreqr   	is_insertc           	      C  s   |j dusJ | }|j|j d| }| jj|j d|f jtjdd}t	|j
| j | j }|| j | j }|r\| t||j|}||ksMJ d||k r[| j|||  n
| j|||  ||k rt| j||d  | |j dS )zCache request when it finishes.NTr:   copyWrong prefix indices)req_pool_idxpop_committed_kv_cacheorigin_input_ids
output_idsr'   req_to_tokentor<   r>   r;   prefix_indicesr   rS   r   	extra_keyr%   ra   rY   	last_node)	r2   rm   rn   kv_committed_lenrI   
kv_indicesold_prefix_lenpage_aligned_overall_lennew_prefix_lenr3   r3   r4   cache_finished_req   s4   

z RadixCacheCpp.cache_finished_reqc                 C  s|  |j dusJ |j}t|}| jj|j d|f jtjdd}t|j| j	 | j	 }| 
t||j|}||ks<J d| jt||jj\}}	}
}	| |}|t|ksXJ ||k rw| j|||  ||| }|| jj|j ||f< |j|
kr| |j | |
 t||  krt|| j	 k sJ  J | j	dkrt||k rt||t|d g|_n||_|
|_dS )z$Cache request when it is unfinished.NTro   rq   r   )rr   fill_idsr;   r'   rv   rw   r<   r>   rx   r   rS   r   ry   r/   rH   rI   r@   r%   ra   rz   rY   r[   r?   )r2   rm   chunkedrI   prefill_lenr|   r}   r   new_indices_vec_new_last_nodenew_indicesreused_indicesr3   r3   r4   cache_unfinished_req   sF   



*
z"RadixCacheCpp.cache_unfinished_reqc                 C  rg   rh   )r/   debug_printrB   r3   r3   r4   pretty_print   rj   zRadixCacheCpp.pretty_printN)F)r   r   r   r   r   r   )r6   r7   r8   r9   )r   r	   r8   r
   )rG   r   rN   r9   r8   rO   )rT   r   )r   r   r8   r   )T)rm   r   rn   r   )rm   r   )__name__
__module____qualname__r5   r@   rA   rH   rS   rY   r[   r`   ri   rk   rl   r   r   r   r3   r3   r3   r4   r       s     
,




&2r   )
__future__r   loggingr]   typingr   r   r   r<   &sglang.srt.mem_cache.base_prefix_cacher   r   r   r	   r
   .sglang.srt.mem_cache.cpp_radix_tree.radix_treer   r   r    sglang.srt.mem_cache.radix_cacher   "sglang.srt.managers.schedule_batchr   &sglang.srt.mem_cache.cache_init_paramsr   sglang.srt.server_argsr   	getLoggerr   loggerr   r3   r3   r3   r4   <module>   s    
