o
    پig:                     @  s   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d d	lmZ erFd d
lmZ eeZG dd dZ	ddddZdS )    )annotationsN)TYPE_CHECKING)DisaggregationMode)envs)ScheduleBatch)
ceil_alignraise_error_or_warn)disable_request_loggingWatchdogRaw)	Schedulerc                   @  s   e Zd Zd ddZd ddZd ddZd d	d
Zd ddZd ddZd!ddZ	d ddZ
d ddZd ddZd ddZd ddZdS )"SchedulerRuntimeCheckerMixinselfr   c                 C  s8   | j  }| j }| j||  }|| j }||||fS N)token_to_kv_pool_allocatoravailable_size
tree_cacheevictable_sizemax_total_num_tokens)r   r   r   num_usedtoken_usage r   g/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/scheduler_runtime_checker_mixin.py_get_token_info   s
   


z,SchedulerRuntimeCheckerMixin._get_token_infoc           
      C  s   | j  o	| j  }| j }|r| j  nd}| jj }|r%| j  nd}| jj	||  }| jjj	||  }|| jj	 }|| jjj	 }	||||	||||fS Nr   )
r   supports_mambais_tree_cacher   r   full_evictable_sizereq_to_token_pool
mamba_poolmamba_evictable_sizesize)
r   is_mamba_radix_cachefull_available_sizer   mamba_available_sizer    full_num_usedmamba_num_usedfull_token_usagemamba_usager   r   r   _get_mamba_token_info   s2   
z2SchedulerRuntimeCheckerMixin._get_mamba_token_infoc           	      C  sl   | j  }| j }| j  }| j }| j||  }| j||  }|| j }|| j }||||||||fS r   )r   r#   r   r   swa_available_sizeswa_evictable_sizefull_tokens_per_layerswa_tokens_per_layer)	r   r#   r   r*   r+   r%   swa_num_usedr'   swa_token_usager   r   r   _get_swa_token_info<   s*   





z0SchedulerRuntimeCheckerMixin._get_swa_token_infoc           
      C  sv   |   \}}}}}}}}|dkp|dk}d| jd|d|d| j d| jd|d|d	| j d
}	||	fS )Nr   zself.full_tokens_per_layer=z, full_available_size=, full_evictable_size=(, self.tree_cache.full_protected_size()=z
self.swa_tokens_per_layer=z, swa_available_size=z, swa_evictable_size=z', self.tree_cache.swa_protected_size()=
)r0   r,   r   full_protected_sizer-   swa_protected_size)
r   r%   r.   _r#   r   r*   r+   memory_leak	token_msgr   r   r   _check_hybrid_memoryT   s,   
"z1SchedulerRuntimeCheckerMixin._check_hybrid_memoryc                 C  s  |   \}}}}}}}}|| j kp|| j k}|rt| jj | jj  }	t| j	  }
tt
d| jjd }||	 |
 }t| jjj }t| j  }tt
| jjj}|| | }d|d|d| jjd| j d|d|d| jjjd	| j d
t|dkr|nd  dt|dkr|nd  d}||fS d|d|d| jjd| j d|d|d| jjjd	| j d}||fS )N   zfull_available_size=r1   z', self.token_to_kv_pool_allocator.size=r2   z
mamba_available_size=z, mamba_evictable_size=z), self.req_to_token_pool.mamba_pool.size=z), self.tree_cache.mamba_protected_size()=z, leaked_full_pages=r   z, leaked_mamba_pages=r3   )r)   r   r4   mamba_protected_sizesetr   
free_pagestolistrelease_pagesall_values_flattenranger!   r   r   
free_slotsall_mamba_values_flattenlen)r   r%   r&   r6   r#   r   r$   r    r7   free_full_pagescached_full_pagesexpected_full_pagesleaked_full_pagesfree_mamba_pagescached_mamba_pagesexpected_mamba_pagesleaked_mamba_pagesr8   r   r   r   _check_mamba_memoryf   sx   




$	$z0SchedulerRuntimeCheckerMixin._check_mamba_memoryc              	   C  sT   |   \}}}}| j }|| | j| k}d| jd|d|d|d	}||fS )Nzself.max_total_num_tokens=z, available_size=, evictable_size=, protected_size=r3   )r   r   protected_sizer   )r   r6   r   r   rP   r7   r8   r   r   r   _check_radix_cache_memory   s   
 z6SchedulerRuntimeCheckerMixin._check_radix_cache_memorybatchr   returnintc                 C  sp   d}|j D ]0}|j|jksJ d}|js1|j}| jdkr,t|| j}|j| j dks,J ||j }||7 }q|S )Nr   r:   )reqskv_committed_freedkv_overallocated_freedkv_allocated_len	page_sizer   cache_protected_len)r   rR   retrequncached_lenallocated_lenr   r   r   _get_batch_uncached_size   s   



z5SchedulerRuntimeCheckerMixin._get_batch_uncached_sizec           
      C  s   | j }|d u r	d S | jjpd}|dkrtd d S |  \}}}}| j }| |}|j	
 rC| jd urC| j sC|| | j7 }tj dkr]d|d|d|d|}t| || | | }	|	| jksuJ d|	d| jd S )	Nr:   zGRuntime memory check (busy) is not supported when speculation topk > 1.z"[Mem Check (BUSY)] available_size=rN   rO   z, uncached_size=z Mem Leak Detected! total_tokens=z vs self.max_total_num_tokens=)
last_batchserver_argsspeculative_eagle_topkwarningswarnr   r   rP   r_   forward_mode	is_extendrunning_batchis_emptyr   *SGLANG_ENABLE_STRICT_MEM_CHECK_DURING_BUSYgetloggerinfor   )
r   current_batch	spec_topkr6   r   r   rP   uncached_sizelog_msgtotal_tokensr   r   r   self_check_during_busy   s2   



z3SchedulerRuntimeCheckerMixin.self_check_during_busyc                 C  sp   | j tjkr| jj| jj }n| jj}t| jj|kr6dt| jj d| jj d}t| t	j
 d| d S d S )Nz6req_to_token_pool memory leak detected!available_size=z, total_size=r3   count_req_pool_leak_warnings)disaggregation_moder   DECODEr   r!   pre_alloc_sizerD   rB   r   r   *SGLANG_ENABLE_STRICT_MEM_CHECK_DURING_IDLErj   )r   req_total_sizemsgr   r   r   _check_req_pool   s$   
z,SchedulerRuntimeCheckerMixin._check_req_poolc                 C  s  | j r
|  \}}n| jr| j r|  \}}n|  \}}|r0d| }t| tj	
 d| |   | jr| jrt | jjd kr| j r^|  \}}}}}}}}t||}	t||}
n| jrn|  \}	}}
}}}}}n|  \}	}
}}t| jj}|| j_|	| j_t|
d| j_d| j_t| j| j_ t| j!| j_"| j#t$j%krt| j&j'| j_(t| j)| j_*| j#t$j+krt| j,j'| j_-t| j.j'| j_/| j0| j | 1  d S )Nz1token_to_kv_pool_allocator memory leak detected! count_memory_leak_warnings      r   )2is_hybrid_swar9   is_hybrid_ssmr   r   rM   rQ   r   r   rw   rj   rz   enable_metrics!current_scheduler_metrics_enabledtimeperf_countermetrics_collectorlast_log_timer0   maxr)   r   rD   rg   rU   statsnum_running_reqsnum_used_tokensroundr   gen_throughputwaiting_queuenum_queue_reqsgrammar_managernum_grammar_queue_reqsrt   r   PREFILLdisagg_prefill_bootstrap_queuequeuenum_prefill_prealloc_queue_reqsdisagg_prefill_inflight_queuenum_prefill_inflight_queue_reqsru   disagg_decode_prealloc_queuenum_decode_prealloc_queue_reqsdisagg_decode_transfer_queuenum_decode_transfer_queue_reqs	log_stats_publish_kv_events)r   r7   r8   ry   r%   r.   r'   r/   r6   r   r   r   r   r   r   check_memory   s   



z)SchedulerRuntimeCheckerMixin.check_memoryc                 C  s@   | j  r| jr| j  s| jr| j  r| j   d S d S d S r   )r   r   r~   supports_swar   r   sanity_check)r   r   r   r   check_tree_cache5  s   z-SchedulerRuntimeCheckerMixin.check_tree_cachec                 C  s   | j tjkrt| jdkrd S n'| j tjkr7t| jt| jj t| j	j }| j
jr3|t| jj7 }|r7d S |   |   | j| _|   d S r   )rt   r   r   rD   r   ru   r   r   r   r   ra   ,disaggregation_decode_enable_offload_kvcachedecode_offload_managerongoing_offloadr   r   init_new_token_rationew_token_ratiomaybe_sleep_on_idle)r   
queue_sizer   r   r   self_check_during_idle=  s&   

z3SchedulerRuntimeCheckerMixin.self_check_during_idleN)r   r   )r   r   rR   r   rS   rT   )__name__
__module____qualname__r   r)   r0   r9   rM   rQ   r_   rr   rz   r   r   r   r   r   r   r   r      s    





0


"

Kr   F	schedulerr   watchdog_timeoutfloatsoftboolrS   r   c                   s2   d
 fdd}t d fdd fdd|||d	S )NrS   strc                    sn    j st rdS  jr  \} }n jr! j r!  \} }n  \} }d j	
 d j	jd| S )N z!scheduler.cur_batch.batch_size()=z
scheduler.cur_batch.reqs=r3   )is_initializingr	   r~   r9   r   r   r   rM   rQ   	cur_batch
batch_sizerU   )r6   info_msgr   r   r   	dump_infoU  s   z,create_scheduler_watchdog.<locals>.dump_infor   c                     s   t  ddS )N
forward_ctr   )getattrr   r   r   r   <lambda>f  s    z+create_scheduler_watchdog.<locals>.<lambda>c                     s    j p
t dd d uS )Nr   )r   r   r   r   r   r   r   g  s    )
debug_nameget_counter	is_activer   r   r   )rS   r   r
   )r   r   r   r   r   r   r   create_scheduler_watchdogR  s   

r   )F)r   r   r   r   r   r   rS   r   )
__future__r   loggingr   rc   typingr   sglang.srt.disaggregation.utilsr   sglang.srt.environr   "sglang.srt.managers.schedule_batchr   sglang.srt.utils.commonr   r   sglang.srt.utils.request_loggerr	   sglang.srt.utils.watchdogr   sglang.srt.managers.schedulerr   	getLoggerr   rk   r   r   r   r   r   r   <module>   s&    
  @