o
    ٷi&1                     @   s   d Z ddlZddlZedZdddZdd Zddd	ZdddZdd Z	dd Z
edkrQe Zede ddlmZ eej ejZe
eeZeD ]Zee qJdS dS )zThis profiler result processor print out the kernel time spent on each Node of the model.
Example of importing profile result file from onnxruntime_perf_test:
    python profile_result_processor.py --input profile_2021-10-25_12-02-41.json
    N)ScanLoopIfc                 C   s   t  }|jdddtdd |jddtddd	 |jd
dtddd	 |jddddd |jdd |jddddd |jdd || S )Nz-iz--inputFz2Set the input file for reading the profile results)requiredtypehelpz--thresholdg{Gz?zfThreshold of run time ratio among all nodes. Nodes with larger ratio will show in top expensive nodes.)r   r   defaultr   z
--providercudazExecution provider to usez--kernel_time_only
store_truez.Only include the kernel time and no fence time)r   actionr   )kernel_time_onlyz-vz	--verbose)r   r   )verbose)argparseArgumentParseradd_argumentstrfloatset_defaults
parse_args)argvparser r   e/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/profile_result_processor.pyparse_arguments   s@   
r   c                 C   sT   t d|  d t| }t|}W d    n1 sw   Y  t|ts(J |S )Nzloading profile output z ...)printopenjsonload
isinstancelist)profile_fileopened_file	sess_timer   r   r   load_profile_json;   s   
r#   c                 C   sL  i }i }i }d}d}| D ]i}|d dkr|d dkrd}|sq|d dkrud	|v rud
|v rud|d
 v ru|d }|d
 d }	|	t v rBq|	sJd| d}	||v ra||  |d	 7  < ||  d7  < n|d	 ||< d||< |	||< ||d	 7 }q|s{dgS g }
|
d|d dd |
d |
d t| dd ddD ]0\}}|| }||k rq|| }|t| }|
|dd|d dd|dd|dd| 	 qi }| D ]\}}	|| }|	|v r||	  |7  < q|||	< q|
d |
d |
d  t| d!d ddD ]\}	}|| }|
|dd|d dd|	  q	|
S )"a<  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

    Args:
        sess_time (List[Dict]): profile data
        threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

    Returns:
        List[str]: lines of string for output.
    r   FcatSessionnamesession_initializationTKerneldurargsop_name()   zNo kernel record found!z%
Top expensive kernels with Time% >= d   .2f:@----------------------------------------------------------------u&   Total(μs)	Time%	Calls	Avg(μs)	Kernelc                 S      | d S Nr.   r   xr   r   r   <lambda>x       z&parse_kernel_results.<locals>.<lambda>keyreverse10d	      Y@5.2f5d8.1fz
Group kernel time by operator:u   Total(μs)	Time%	Operatorc                 S   r3   r4   r   r5   r   r   r   r7      r8   )_NODES_TYPE_CONTAINING_SUBGRAPHappendsorteditemsr   )r"   	thresholdkernel_name_to_op_namekernel_timekernel_freqtotalsession_inititemkernel_namer+   linesdurationratiocallsavg_timeop_timer   r   r   parse_kernel_resultsE   sf   
(

4



&rT   Fc                 C   s  g }i }i }i }d}| D ]}|d dkrd|v rd|v rd|d v r|d  dd	 d
d	 dd	}	d|d v rj|d d dkrCd}
n|d d dkrNd}
n
|d d dkrXd}
|	|vra|
||	< n||	 |
ksiJ n|rmq|d d }|tv rxq|	|v r||	  |d 7  < ||	  d7  < n|d ||	< d||	< ||	 ||d 7 }qg d}d}|D ]?}	||	 }||	 }|t| }|| d }||	d	}||7 }||dd|dd|dd|dd|dd|dd|	  q|d|d dd  |d! |d" t| d#d$ d%d&D ]A\}	}|| }||k rq||	 }|t| }|| d }||	d	}||dd|dd|dd|dd|dd|	  q|S )'a  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

    Args:
        sess_time (List[Dict]): profile data
        kernel_time_only (bool, optional): Only include items for kernel time. Defaults to False.
        threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

    Returns:
        List[str]: lines of string for output.
    r   r$   Noder)   r*   r+   r&   _kernel_time _fence_before_fence_afterproviderCPUExecutionProviderCPUCUDAExecutionProviderCUDADmlExecutionProviderDMLr.   )z
Nodes in the original order:r2   u3   Total(μs)	Time%	Acc %	Avg(μs)	Calls	Provider	Nodeg        r>   r<   r=   r?   rA   r@   8sz#
Top expensive nodes with Time% >= r/   r0   r1   r2   u-   Total(μs)	Time%	Avg(μs)	Calls	Provider	Nodec                 S   r3   r4   r   r5   r   r   r   r7      r8   z$parse_node_results.<locals>.<lambda>Tr9   )replacerB   rC   r   getrD   rE   )r"   r   rF   node_name_list	node_time	node_freqnode_providerrJ   rL   	node_namedevicer+   rN   before_percentagerO   rQ   rR   
percentagerZ   rP   r   r   r   parse_node_results   sr   (

6


:rl   c                 C   s:  i }i }d}i }i }i }i }d}i }	| D ]}
|
d dkrd|
v rd|
v rd|
d v r|
d d }|t v r5qd|
d vr]d|
d	 v r\||v rP||  |
d 7  < n|
d ||< ||
d 7 }q|
d dd
}||	v rr|	|  d7  < nd|	|< | d| }||v r||  |
d 7  < ||  d7  < n
|
d ||< d||< ||v r||  |
d 7  < n|
d ||< ||v r||  |
d 7  < ||  d7  < n
|
d ||< d||< ||
d 7 }qd
dg}|d |d t| dd ddD ]G\}}||d}|| }|| }|||  }|| }|| }||dd|d dd|dd|d dd|dd|dd|dd|  q|d
dg7 }|d |d t| dd ddD ]C\}}|d}|d }|d }|dd
}|| }|| }|||  }||dd|d dd|dd|dd|d d|  qW|S )!zGroup results by operator name.

    Args:
        sess_time (List[Dict]): profile data

    Returns:
        List[str]: lines of string for output.
    r   r$   rU   r)   r*   r+   rZ   fencer&   rW   r.   r1   zGrouped by operatorr2   uM   Total(μs)	Time%	Kernel(μs)	Kernel%	Calls	AvgKernel(μs)	Fence(μs)	Operatorc                 S   r3   r4   r   r5   r   r   r   r7   0  r8   z$group_node_results.<locals>.<lambda>Tr9   r<   r=   r>   r?   11dr@   z14.1fzGrouped by provider + operatoru<   Kernel(μs)	Provider%	Calls	AvgKernel(μs)	Provider	Operatorc                 S   r3   r4   r   r5   r   r   r   r7   >  r8   ExecutionProviderz9.2fra   )rB   rc   rC   rD   rE   splitrb   )r"   op_kernel_timeop_kernel_recordstotal_kernel_timeprovider_op_kernel_timeprovider_op_kernel_recordsprovider_kernel_timeop_fence_timetotal_fence_timeprovider_counterrL   r+   rZ   r:   rN   rH   
fence_timekernel_time_ratio
total_time
time_ratiokernel_callsavg_kernel_timepartsshort_eprQ   provider_time_ratior   r   r   group_node_results   s   	(

F


2r   c                 C   s8   t | }t||j}|t||j|j7 }|t|7 }|S N)r#   rT   rF   rl   r   r   )r    r*   profile_recordsrN   r   r   r   process_resultsM  s
   r   __main__	Arguments)setup_loggerr   )r   )Fr   )__doc__r   r   	frozensetrB   r   r#   rT   rl   r   r   __name__	argumentsr   benchmark_helperr   r   inputr    resultsliner   r   r   r   <module>   s*   
*


OWb



