o
    پi                     @   s   d dl Z d dlmZmZmZ d dlZd dlmZ d dl	Z	e 
eZdd ZG dd dZG dd dZeed	Zee Zd
d Zdeeeef  fddZdS )    N)AnyDictListc                  C   sr   t  dkrtd dd tD } dd tt  D }t | t  dkr(|nd  t  dkr7t| d S d S )Nr   z*[slow_rank_detector] Start benchmarking...c                 S   s   i | ]}|t |qS  )_compute_local_metric).0
bench_namer   r   W/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/utils/slow_rank_detector.py
<dictcomp>   s    
zexecute.<locals>.<dictcomp>c                 S   s   g | ]}d qS Nr   )r   _r   r   r	   
<listcomp>   s    zexecute.<locals>.<listcomp>)	distget_rankloggerinfo_BENCH_NAMESrangeget_world_sizegather_object_analyze_metrics)local_metricsall_metricsr   r   r	   execute   s   
r   c                   @      e Zd Zdd Zdd ZdS )_GemmExecutorc                 C   s,   t jdt jdd| _t jdt jdd| _d S )N)    r   cudadtypedevice)torchrandnbfloat16lhsrhsselfr   r   r	   __init__   s   z_GemmExecutor.__init__c                 C   s   | j | j  d S r   )r$   r%   r&   r   r   r	   __call__   s   z_GemmExecutor.__call__N__name__
__module____qualname__r(   r)   r   r   r   r	   r      s    r   c                   @   r   )_ElementwiseExecutorc                 C   s   t jdddt jdd| _d S )Nr   i'  )i   r   r   )r!   randintint32valuer&   r   r   r	   r(   $   s   z_ElementwiseExecutor.__init__c                 C   s   |  j d7  _ d S )N   )r1   r&   r   r   r	   r)   )   s   z_ElementwiseExecutor.__call__Nr*   r   r   r   r	   r.   #   s    r.   )gemmelementwisec                 C   s    t |   }tjj|ddd}|S )Nmean   )return_moderep)_EXECUTOR_CLS_OF_BENCHtritontestingdo_bench_cudagraph)r   executormsr   r   r	   r   5   s   
r   r   c                    sz   t D ]8 t fdd| D }d| }||  }|  }td d|d|d| |dk r:td	 qd S )
Nc                    s   g | ]}|  qS r   r   )r   mr   r   r	   r   =   s    z$_analyze_metrics.<locals>.<listcomp>r2   z [slow_rank_detector] bench_name=z slowest_rel_speed=z rel_speed_of_rank=z time_of_rank=g?zA[slow_rank_detector] Some ranks are too slow compared with others)	r   r!   tensormaxminitemr   r   warning)r   time_of_rankspeed_of_rankrel_speed_of_rankslowest_rel_speedr   r@   r	   r   ;   s   r   )loggingtypingr   r   r   r!   torch.distributeddistributedr   r:   	getLoggerr+   r   r   r   r.   r9   listkeysr   r   strr   r   r   r   r	   <module>   s    
	