o
    پiO                  	   @   s  d Z ddlZddlmZmZ ddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZ ddlmZ e Zd	efd
efdefdefdefdZdd Zdd ZdedefddZdefddZeddddeefdee dee dee fddZ dS )z
/v1/loads API endpoint for comprehensive load metrics.

This module provides the /v1/loads endpoint which returns detailed scheduler
metrics for load balancing, monitoring, and capacity planning.
    N)datetimetimezone)Optional)	APIRouterDependsHTTPException)Response)DisaggregationMetricsGetLoadsReqOutputLoRAMetricsMemoryMetricsQueueMetricsSpeculativeMetrics)__version__memoryspecloradisaggqueues)r   speculativer   disaggregationr   c                  C   s   ddl m}  |  jS )z6Dependency to get tokenizer_manager from global state.r   get_global_state)"sglang.srt.entrypoints.http_serverr   tokenizer_managerr    r   S/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/v1_loads.py_get_tokenizer_manager1   s   r   c                 C   s   dd | D S )zIFactory for dataclasses.asdict() that excludes None values and timestamp.c                 S   s&   i | ]\}}|d ur|dkr||qS )N	timestampr   ).0kvr   r   r   
<dictcomp>:   s   & z'_loads_dict_factory.<locals>.<dictcomp>r   )itemsr   r   r   _loads_dict_factory8   s   r$   
load_dictsreturnc              	   C   s   | sdddddddS t | }tdd | D tdd | D tdd | D ttdd | D | d	ttd
d | D | dttdd | D | d	dS )z*Compute aggregate metrics from load dicts.r   g        )total_running_reqstotal_waiting_reqs
total_reqsavg_token_usageavg_throughputavg_utilizationc                 s       | ]}|d  V  qdS )num_running_reqsNr   r   dr   r   r   	<genexpr>K       z%_compute_aggregate.<locals>.<genexpr>c                 s   r-   )num_waiting_reqsNr   r/   r   r   r   r1   L   r2   c                 s   s     | ]}|d  |d  V  qdS )r.   r3   Nr   r/   r   r   r   r1   M   s    
c                 s   r-   )token_usageNr   r/   r   r   r   r1   P   r2      c                 s   r-   )gen_throughputNr   r/   r   r   r   r1   Q   r2      c                 s   r-   )utilizationNr   r/   r   r   r   r1   R   r2   )lensumround)r%   nr   r   r   _compute_aggregate=   s$   	r=   c              
      s  g }t tD ]I}d|jvrq|jd \}}d|j }|d| d|  |d| d|  | D ]}t||jd}|durO|| d|j d|  q4qt	 D ]l\ \}}	t
 fd	d
| D sgqUt |	D ]T}d|jvrtql|jd \}}d| d|j }|d| d|  |d| d|  | D ]#}t| d}
|
rt|
|jd}|dur|| d|j d|  qqlqUtd|d ddS )zFormat load metrics in Prometheus text exposition format.

    Metrics are derived from dataclass field metadata, providing a single source of truth.
    metricsglang_z# HELP  z# TYPE Nz
{dp_rank="z"} c                 3   s    | ]	}t | d V  qd S )N)getattr)r   load	attr_namer   r   r1   j   s    z+_format_loads_prometheus.<locals>.<genexpr>_
z(text/plain; version=0.0.4; charset=utf-8)content
media_type)dataclassesfieldsr
   metadatanameappendrA   dp_rank_OPTIONAL_METRIC_SECTIONSr#   anyr   join)load_resultslinesfmetric_typedescriptionmetric_namerB   valueprefixdataclass_typesectionr   rC   r   _format_loads_prometheusV   sN   

r\   z	/v1/loadsrN   includeformatc           
   
      s   |rdd | dD nd}z|j|| dI dH }W n ty/ } ztdt|dd}~ww |dkr8t|S g }|D ]}tj|td	}	|	d
 |	d  |	d< |	|	 q<t
tj tt||t|dS )a  
    Get comprehensive load metrics for all DP ranks.

    Query Parameters:
        dp_rank: Filter to specific DP rank (optional)
        include: Comma-separated sections to include (optional)
                 Options: core, memory, spec, lora, disagg, queues, all
                 Default: all
        format: Response format - 'json' (default) or 'prometheus'

    Returns:
        JSON response with timestamp, version, dp_rank_count, per-DP-rank loads, and aggregates
    c                 S   s   g | ]}|  qS r   )strip)r   sr   r   r   
<listcomp>   s    zget_loads.<locals>.<listcomp>,N)r]   rN   i  )status_codedetail
prometheus)dict_factoryr.   r3   num_total_reqs)r   versiondp_rank_countloads	aggregate)split	get_loads
ValueErrorr   strr\   rI   asdictr$   rM   r   nowr   utc	isoformatr   r9   r=   )
rN   r]   r^   r   include_listrR   erj   rB   r0   r   r   r   rm      s0   rm   )!__doc__rI   r   r   typingr   fastapir   r   r   fastapi.responsesr   sglang.srt.managers.io_structr	   r
   r   r   r   r   sglang.versionr   routerrO   r   r$   listdictr=   r\   getintro   rm   r   r   r   r   <module>   s>    	,