o
     i                  
   @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlZdd ZdddZ		dd
edede
e	eef  fddZdd ZdefddZedkrje jddZejdedd e Zeej dS dS )    N)quote)DictListc                    s   | st d d S t| d  }dd |D  | D ]|D ]}t | tt|  |< qqd fdd|D }t | t dt|  | D ]d fd	d|D }t | qJd S )
Nz
Empty listr   c                 S   s   i | ]
}|t t|d qS )
   )maxlen.0header r   R/home/ubuntu/.local/lib/python3.10/site-packages/xformers/profiler/find_slowest.py
<dictcomp>   s    z+print_json_as_dataframe.<locals>.<dictcomp>z  c                 3   s"    | ]}|d  |  V  qdS <Nr   r   )
col_widthsr   r   	<genexpr>    s     z*print_json_as_dataframe.<locals>.<genexpr>-c                 3   s*    | ]}t | d  |  V  qdS r   )strr   r   rowr   r   r   &   s    
)printlistkeysr   r   r   join)	json_listheadersr
   
header_rowdata_rowr   r   r   print_json_as_dataframe   s$    
r      c                 C   sT   t | jg ddddg dg jdgdd }dd	 |d | D S )
N)namelog_nameduritemsr    r!   r"   F)	ascendingc                 S   (   g | ]\}}||j d  dddqS )  .2f ms)r    std_devr"   r	   idxr   r   r   r   
<listcomp>7       zAcompute_std_dev_of_event_durations_over_ranks.<locals>.<listcomp>)r   filtergroupbysumstdsort_valuesiterrows)eventstopgrouped_sorted_eventsr   r   r   -compute_std_dev_of_event_durations_over_ranks,   s   


r9      top_klast_kreturnc                 C   sP   t | jddgddg dg }dd |d | || d   D S )Nr!   r"   r#   c                 S   r&   )r'   r(   r)   )r!   nccl_msr+   r,   r   r   r   r.   H   r/   z$sort_nccl_events.<locals>.<listcomp>)r   r0   r1   r2   r4   r5   )nccl_eventsr;   r<   r8   r   r   r   sort_nccl_events=   s   r@   c                 C   s   d}|  drdt|  dt| }ndt| dt|  }tj|dtjtjd}z#ztj|jd	d
gd}W n t	yD   |
   w W | dksNJ n	| dksWJ w tj| |d< ||jjd }||jjd  }||fS )Nz@.traceEvents[] | select(.cat == "kernel") | [.name, .dur] | @csvz.gzz
gunzip -c z | jq --raw-output zjq --raw-output  T)shellstdoutstderrr    r"   )namesr   r!   nccl)endswithr   
subprocessPopenPIPEDEVNULLpdread_csvrC   	Exception	terminatewaitospathbasenamer    r   
startswith)profile_trace_pathjq_pipecmdsubpkernel_eventscommunication_kernelscomputation_kernelsr   r   r   parse_one_fileP   s(   
$r\   cuda_profile_dirc           
      C   sJ  |  d}t  |}t|dkr|  d}t  |}t|dkr'td|  g }g }tjjdd2}t|t|D ]!\}\}}t	d|d  d	t| d
dd |
| |
| q;W d    n1 sgw   Y  t|}t|}t	  t	d tt| t	d t|}	t	d t|	 t	d t|}	t	d t|	 d S )Nz/*trace.json.gzr   z/*.jsonz=Couldnt find any profiling trace in the specified directory:     )max_workerszProcessed file    /T)endflushz/The longest and shortest communication_kernels:z

z>The standard deviation of nccl kernels durations across ranks:zEThe standard deviation of computation kernels durations across ranks:)globr   rN   
concurrentfuturesThreadPoolExecutor	enumeratemapr\   r   appendrL   concatr   r@   r9   )
r]   cuda_profile_path_nameprofile_filesrZ   r[   executorindexcomm_kscomp_ksstd_dfr   r   r   print_profiling_infol   sF   






	
rt   __main__zProcess CUDA profile directory.)descriptionzThe CUDA profile directory)typehelp)r   )r:   r:   )argparseconcurrent.futuresrf   re   rQ   rH   shlexr   typingr   r   pandasrL   r   r9   intr   r@   r\   rt   __name__ArgumentParserparseradd_argument
parse_argsargsr]   r   r   r   r   <module>   s4   

+