o
    پi                     @  s6  d dl mZ d dlZd dlZd dlmZmZ d dlmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ erLd dlmZ d d	lmZ eG d
d deZejG dd dZejG dd dZejG dd dZejG dd dZejG dd dZ G dd de
Z!G dd deeZ"dS )    )annotationsN)ABCabstractmethod)TYPE_CHECKINGAny
NamedTupleOptionalProtocolTupleruntime_checkable)BaseTokenToKVPoolAllocator)ReqToTokenPool)RadixCacheMetricsCollector)Req)RadixKeyc                   @  s.   e Zd ZU ded< ded< ded< ded< d	S )
PrefixCacheTraitr   req_to_token_poolr   token_to_kv_pool_allocatorint	page_sizebooldisableN)__name__
__module____qualname____annotations__ r   r   Z/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/mem_cache/base_prefix_cache.pyr      s
   
 r   c                   @  s2   e Zd ZU dZded< dZded< dZded	< dS )
MatchPrefixParamsz@Unified parameters for match_prefix across different cache typesr   keyFr   	cow_mambaNzOptional[Req]req)r   r   r   __doc__r   r    r!   r   r   r   r   r   #   s
   
 r   c                   @  sb   e Zd ZU dZded< dZded< dZded< dZd	ed
< dZd	ed< dZ	ded< dZ
d	ed< dS )InsertParamsz:Unified parameters for insert across different cache typesr   r   NzOptional[torch.Tensor]valuemamba_valuer   r   prev_prefix_lenswa_evicted_seqlenFr   chunkedpriority)r   r   r   r"   r   r$   r%   r&   r'   r(   r)   r   r   r   r   r#   .   s   
 r#   c                   @  s&   e Zd ZU dZded< dZded< dS )InsertResultzResult of an insert operationr   
prefix_lenFr   mamba_existN)r   r   r   r"   r   r,   r   r   r   r   r*   A   s   
 r*   c                   @  s2   e Zd ZU dZded< dZded< dZded< dS )EvictParamsz9Unified parameters for evict across different cache typesr   
num_tokensr   swa_num_tokens	mamba_numN)r   r   r   r"   r   r/   r0   r   r   r   r   r-   I   s
   
 r-   c                   @  s6   e Zd ZU dZdZded< dZded< dZded< dS )EvictResultzResult of an evict operationr   r   num_tokens_evictedswa_num_tokens_evictedmamba_num_evictedN)r   r   r   r"   r2   r   r3   r4   r   r   r   r   r1   R   s
   
 r1   c                   @  sB   e Zd ZU dZded< ded< ded< dZded	< d
Zded< d
S )MatchResulta7  Result of a prefix match operation.

    Attributes:
        device_indices  :   Indices of the KV cache on the device matched by common prefix.
        last_device_node:   The last TreeNode on the device that was matched.
        last_host_node  :   The last TreeNode on the host that was matched.
                            Note that if HiCache is not enabled,
                            this **must** be the same as `last_device_node`.
        host_hit_length :   Length of the KV cache hit on the host, if applicable.
                            0 if HiCache is not enabled.
        mamba_branching_seqlen: The mamba radix cache branching point, which is the longest
                                page-aligned position that could've been cache hit if there
                                exists a mamba state.
    ztorch.Tensordevice_indicesr   last_device_nodelast_host_noder   r   host_hit_lengthNzOptional[int]mamba_branching_seqlen)r   r   r   r"   r   r9   r:   r   r   r   r   r5   [   s   
 r5   c                   @  s,  e Zd ZU dZdZded< dd ZdPddZedd Z	edQddZ
edRdSddZedTddZedUd d!ZedVd$d%ZedWdXd(d)Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 ZdYd=d>ZdZd?d@ZdZdAdBZdCdD Zd[dEdFZd[dGdHZd[dIdJZd[dKdLZd\dNdOZ dS )]BasePrefixCachez*Cache can be indexed by either rid or key.Nz$Optional[RadixCacheMetricsCollector]metrics_collectorc                 C  s@   ddl m} | }d| jji}|jr||j t|d| _d S )Nr   )get_global_server_args
cache_type)labels)sglang.srt.server_argsr=   	__class__r   extra_metric_labelsupdater   r<   )selfr=   server_argsr?   r   r   r   init_metrics_collectory   s   z&BasePrefixCache.init_metrics_collectornum_evictedr   
start_timefloatc                 C  s>   | j d ur|dkr| j t |  | j | d S d S d S Nr   )r<   observe_eviction_durationtimeperf_counterincrement_eviction_num_tokens)rD   rG   rH   r   r   r   update_eviction_metrics   s   
z'BasePrefixCache.update_eviction_metricsc                 C     d S Nr   rD   r   r   r   reset      zBasePrefixCache.resetparamsr   returnr5   c                 C  rP   rQ   r   rD   rU   r   r   r   match_prefix   rT   zBasePrefixCache.match_prefixTr!   r   	is_insertr   c                 K  rP   rQ   r   )rD   r!   rY   kwargsr   r   r   cache_finished_req   rT   z"BasePrefixCache.cache_finished_reqc                 K  rP   rQ   r   )rD   r!   rZ   r   r   r   cache_unfinished_req   rT   z$BasePrefixCache.cache_unfinished_reqr-   r1   c                 C  rP   rQ   r   rW   r   r   r   evict   rT   zBasePrefixCache.evictnoder   c                 C  rP   rQ   r   )rD   r^   r   r   r   inc_lock_ref   rT   zBasePrefixCache.inc_lock_refswa_uuid_for_lockOptional[str]c                 C  rP   rQ   r   )rD   r^   r`   r   r   r   dec_lock_ref   rT   zBasePrefixCache.dec_lock_refc                 C     dS rJ   r   rR   r   r   r   evictable_size      zBasePrefixCache.evictable_sizec                 C  rc   rJ   r   rR   r   r   r   full_evictable_size   re   z#BasePrefixCache.full_evictable_sizec                 C  rc   rJ   r   rR   r   r   r   swa_evictable_size   re   z"BasePrefixCache.swa_evictable_sizec                 C  rc   rJ   r   rR   r   r   r   protected_size   re   zBasePrefixCache.protected_sizec                 C  rc   rJ   r   rR   r   r   r   full_protected_size   re   z#BasePrefixCache.full_protected_sizec                 C  rc   rJ   r   rR   r   r   r   swa_protected_size   re   z"BasePrefixCache.swa_protected_sizec                 C     t  rQ   NotImplementedErrorrR   r   r   r   
total_size      zBasePrefixCache.total_sizec                 C  rk   rQ   rl   rR   r   r   r   pretty_print   ro   zBasePrefixCache.pretty_printr8   r9   Tuple[torch.Tensor, Any]c                 C  rk   )zA
        Preparing KV cache loading from host to device.
        rl   )rD   r8   r9   r   r   r   init_load_back   s   zBasePrefixCache.init_load_backc                 C  rk   )zK
        Notify the cache controller to start the KV cache loading
        rl   rR   r   r   r   ready_to_load_host_cache      z(BasePrefixCache.ready_to_load_host_cachec                 C  rk   )zs
        Check HiCache related activities to update radix tree and synchronize across TP workers if needed
        rl   rR   r   r   r   check_hicache_events   rt   z$BasePrefixCache.check_hicache_eventsc                 C  s   g S rQ   r   rR   r   r   r   take_events   re   zBasePrefixCache.take_eventsc                 C  rc   NFr   rR   r   r   r   supports_swa   re   zBasePrefixCache.supports_swac                 C  rc   rw   r   rR   r   r   r   supports_mamba   re   zBasePrefixCache.supports_mambac                 C  rc   rw   r   rR   r   r   r   is_chunk_cache   re   zBasePrefixCache.is_chunk_cachec                 C  s
   |    S rQ   )rz   rR   r   r   r   is_tree_cache   s   
zBasePrefixCache.is_tree_cachestrc                 C  s.   | j  }|  }d||  d|d|dS )NzAvailable tokens: z (available_size=z + evictable_size=z)
)r   available_sizerd   )rD   r}   rd   r   r   r   available_and_evictable_str   s   
z+BasePrefixCache.available_and_evictable_str)rG   r   rH   rI   )rU   r   rV   r5   )T)r!   r   rY   r   )r!   r   )rU   r-   rV   r1   )r^   r   rQ   )r^   r   r`   ra   )r8   r   r9   r   rV   rq   )rV   r   )rV   r   )rV   r|   )!r   r   r   r"   r<   r   rF   rO   r   rS   rX   r[   r\   r]   r_   rb   rd   rf   rg   rh   ri   rj   rn   rp   rr   rs   ru   rv   rx   ry   rz   r{   r~   r   r   r   r   r;   r   sJ   
 

	








r;   )#
__future__r   dataclassesrL   abcr   r   typingr   r   r   r   r	   r
   r   torchsglang.srt.mem_cache.allocatorr    sglang.srt.mem_cache.memory_poolr   sglang.srt.metrics.collectorr   "sglang.srt.managers.schedule_batchr    sglang.srt.mem_cache.radix_cacher   r   	dataclassr   r#   r*   r-   r1   r5   r;   r   r   r   r   <module>   s4    $

