o
    ic4                     @   s^  U d dl Z d dlmZ d dlmZ d dlmZmZmZ d dl	m
Z
mZ d dlmZmZmZ d dlmZmZmZ d dlmZ d d	lmZmZ d d
lmZmZmZmZmZmZ d dl m!Z! zd dl"Z#W n e$yn   e!dZ#Y nw eG dd dZ%eG dd dZ&eG dd dZ'e'e&B Z(ee)d< eG dd dZ*eG dd deZ+G dd deZ,dS )    N)defaultdict)Callable)asdict	dataclassfield)Any	TypeAlias)
DeviceType_KinetoEvent_ProfilerResult)
_EventType_ExperimentalConfig_ProfilerEvent)FunctionEvent)ProfilerActivityprofile)TablePrinterevent_has_moduleevent_is_torch_opevent_module_reprevent_torch_op_stack_traceindent_string)PlaceholderModulepandasc                   @   sh   e Zd ZU eed< dZded< eedZed  ed< dZ	e
ed< ed	d
 Zedd Zedd ZdS )_ModuleTreeNodeeventNz_ModuleTreeNode | Noneparent)default_factorychildren tracec                 C   s   | j jd u pt| j jdkS Nr   )r   r   lenself r%   U/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/profiler/layerwise_profile.pyis_leaf&   s   z_ModuleTreeNode.is_leafc                 C   s
   t | jS N)r   r   r#   r%   r%   r&   is_torch_op*   s   
z_ModuleTreeNode.is_torch_opc                 C   s"   | j jtjko| j jd jtjkS )N   )r   tagr   Kinetotypeddevice_typer	   CUDAr#   r%   r%   r&   is_cuda.   s   z_ModuleTreeNode.is_cuda)__name__
__module____qualname__r   __annotations__r   r   listr   r    strpropertyr'   r)   r0   r%   r%   r%   r&   r      s   
 

r   c                   @   s.   e Zd ZU eed< eed< eed< eed< dS )SummaryStatsEntrynamecuda_time_uspct_cuda_timeinvocationsN)r1   r2   r3   r6   r4   floatintr%   r%   r%   r&   r8   6   s
   
 r8   c                   @   s6   e Zd ZU eed< eed< eed< eed< eed< dS )ModelStatsEntryr9   cpu_time_usr:   r;   r    N)r1   r2   r3   r6   r4   r=   r%   r%   r%   r&   r?   >   s   
 r?   
StatsEntryc                   @   s.   e Zd ZU eed< ee ed< edB ed< dS )_StatsTreeNodeentryr   Nr   )r1   r2   r3   rA   r4   r5   r%   r%   r%   r&   rB   J   s   
 rB   c                   @   s  e Zd ZU eed< eddZeee	e
 f ed< eddZeee	e f ed< eddZe	e ed< eddZe	e ed< eddZe	e ed< d	Zed	B ed
< dd Zd1deeef fddZd1deeef fddZdefddZdefddZdeeef fddZe	d2de	eeef  deegef eB fddZdd  Z d!d" Z!d#efd$d%Z"d#efd&d'Z#d(d) Z$d*d+ Z%d,e	e de	eeef  fd-d.Z&d,e	e de	e fd/d0Z'd	S )3LayerwiseProfileResults_kineto_resultsF)init_kineto_event_correlation_map_event_correlation_map_module_tree_model_stats_tree_summary_stats_treeNnum_running_seqsc                 C   s   |    |   |   d S r(   )_build_correlation_map_build_module_tree_build_stats_treesr#   r%   r%   r&   __post_init__]   s   z%LayerwiseProfileResults.__post_init__column_widthsc                 C   s^   t dddddd}|r|jd	i | dd | | jD }tt|| j|dd d d S )
N<      r9   r@   r:   r;   r    c                 S   s,   g | ]\}}|j d ks|jd kr||fqS r   )r:   r@   .0depthrowr%   r%   r&   
<listcomp>h   s
    z=LayerwiseProfileResults.print_model_table.<locals>.<listcomp>c                 S      dd|   d S N|- r%   indentr%   r%   r&   <lambda>p       z;LayerwiseProfileResults.print_model_table.<locals>.<lambda>indent_styler%   )dictupdate_flatten_stats_treerJ   r   r?   print_table _indent_row_names_based_on_depth)r$   rQ   _column_widthsfiltered_model_tabler%   r%   r&   print_model_tableb   s   


z)LayerwiseProfileResults.print_model_tablec                 C   s\   t ddddd}|r|jd
i | dd | | jD }tt|| j|dd d	 d S )NP   rS      r9   r:   r;   r<   c                 S   s"   g | ]\}}|j d kr||fqS rU   r:   rV   r%   r%   r&   rZ   z   s
    
z?LayerwiseProfileResults.print_summary_table.<locals>.<listcomp>c                 S   r[   r\   r%   r`   r%   r%   r&   rb      rc   z=LayerwiseProfileResults.print_summary_table.<locals>.<lambda>rd   r%   )rf   rg   rh   rK   r   r8   ri   rj   )r$   rQ   rk   filtered_summary_tabler%   r%   r&   print_summary_tablet   s   

z+LayerwiseProfileResults.print_summary_tablefilenamec                 C   *   t dd | | jD }|| d S )Nc                 S      g | ]\}}t |qS r%   r   rW   _rY   r%   r%   r&   rZ      s    zHLayerwiseProfileResults.export_model_stats_table_csv.<locals>.<listcomp>)pd	DataFramerh   rJ   to_csvr$   rt   dfr%   r%   r&   export_model_stats_table_csv   s   z4LayerwiseProfileResults.export_model_stats_table_csvc                 C   ru   )Nc                 S   rv   r%   rw   rx   r%   r%   r&   rZ      s    zJLayerwiseProfileResults.export_summary_stats_table_csv.<locals>.<listcomp>)rz   r{   rh   rK   r|   r}   r%   r%   r&   export_summary_stats_table_csv   s   
z6LayerwiseProfileResults.export_summary_stats_table_csvreturnc                 C   s"   d| j i| | j| | jdS )NrL   )metadatasummary_statsmodel_stats)rL   _convert_stats_tree_to_dictrK   rJ   r#   r%   r%   r&   convert_stats_to_dict   s   

z-LayerwiseProfileResults.convert_stats_to_dictr_   depths_rowsre   c                 C   sF   g }| D ]\}}|j dkrqt|}t|j|||_|| q|S r!   )r:   copydeepcopyr   r9   append)r   re   indented_rowsrX   rY   indented_rowr%   r%   r&   rj      s   

z8LayerwiseProfileResults._indent_row_names_based_on_depthc                 C   s2   t t| _| j D ]}| j|  | q
d S r(   )r   r5   rG   rE   eventscorrelation_idr   )r$   r   r%   r%   r&   rM      s   
z.LayerwiseProfileResults._build_correlation_mapc                    sF   g _ j }	 ddtdtd B f fdd |D ]} | qd S )Nr   	curr_nodec                    s   | j dkrd S t| r"t| |d}|r|j| nj| |}| jd u p-t| jdk}|rG|rGt| |t| dd dd}|j| |}| jD ]} || qJd S )Nr*   )r   r   r   c                 S   s   t | S r(   )r   )xr%   r%   r&   rb      s    zSLayerwiseProfileResults._build_module_tree.<locals>._df_traversal.<locals>.<lambda>)until)r   r   r    )	start_tidr   r   r   r   rI   r"   r   )r   r   noder'   child_df_traversalr$   r%   r&   r      s,   

zALayerwiseProfileResults._build_module_tree.<locals>._df_traversalr(   )rI   rE   experimental_event_treer   r   )r$   
event_treerootr%   r   r&   rN      s   

z*LayerwiseProfileResults._build_module_treer   c                    s@    j jtjkr	d S | j j jg } fdd|D }t|d S )Nc                 3   s2    | ]}|  tjkr|  jjkr|V  qd S r(   )r.   r	   r/   r9   r   )rW   r   r   r%   r&   	<genexpr>   s    z@LayerwiseProfileResults._get_kineto_gpu_event.<locals>.<genexpr>)r   r+   r   r,   rG   getr   next)r$   r   correlated_kineto_eventsiteratorr%   r   r&   _get_kineto_gpu_event   s   

z-LayerwiseProfileResults._get_kineto_gpu_eventc                    s   dt f fdd  |S )z Return cuda time in microsecondsr   c                    s@   | j r|  }r| d S d}| jD ]}| |7 }q|S )N     @@r   )r'   r   duration_nsr   )r   gpu_kineto_eventcumulative_cuda_timer   _cumulative_cuda_time_recursiver$   r%   r&   r      s   
zVLayerwiseProfileResults._cumulative_cuda_time.<locals>._cumulative_cuda_time_recursive)r   )r$   r   r%   r   r&   _cumulative_cuda_time   s   	z-LayerwiseProfileResults._cumulative_cuda_timec                    s   t  fdd jD S )Nc                    s   g | ]}  |qS r%   )r   )rW   r   r#   r%   r&   rZ      s    z<LayerwiseProfileResults._total_cuda_time.<locals>.<listcomp>)sumrI   r#   r%   r#   r&   _total_cuda_time   s   z(LayerwiseProfileResults._total_cuda_timec                    s   i   fdd	 	ddtdtd B dtt ffddg _jD ]
}j| q)	 ddtdtd B f fd	d
 g _jD ]
}j | qKd S )Nc                    s   |   d S )Nd   r%   rq   )total_cuda_timer%   r&   r;      s   zALayerwiseProfileResults._build_stats_trees.<locals>.pct_cuda_timer%   r   r   summary_tracec           	         s   t | jrt| j}| }n|  }r"| }| d }nd S ||f }|v rG| j}| j|7  _| j	d7  _	|j|_
ntt|||ddg |d}|r^|j| ||< | jD ]
} || | qe| S )Nr   r*   rp   )rC   r   r   )r   r   r   r   r   r9   r   rC   r:   r<   r;   rB   r8   r   r   )	r   r   r   r9   r:   r   rC   new_noder   )build_summary_stats_tree_dfr;   r$   summary_dictr%   r&   r      s>   






zOLayerwiseProfileResults._build_stats_trees.<locals>.build_summary_stats_tree_dfc           	         s   t | jrt| j}| }| jjd }d}n|  }r/| }| d }d}| j}nd S t	t
|||||d|g d}|rI|j| | jD ]} || qL|S )Ni  r   r   r   rT   )rC   r   r   )r   r   r   r   duration_time_nsr   r9   r   r    rB   r?   r   r   )	r   r   r9   r:   r@   r    r   r   r   )build_model_stats_tree_dfr;   r$   r%   r&   r   '  s:   


zMLayerwiseProfileResults._build_stats_trees.<locals>.build_model_stats_tree_df)Nr%   r(   )	r   r   rB   tupler6   rK   rI   r   rJ   )r$   r   r%   )r   r   r;   r$   r   r   r&   rO      s2   *
%
z*LayerwiseProfileResults._build_stats_treestreec                    s0   g ddt f fdd |D ]} | qS )Nr   r   c                    s0    || jf | jD ]
} ||d d qd S )Nr*   )rX   )r   rC   r   )r   rX   r   df_traversalentriesr%   r&   r   U  s   
zALayerwiseProfileResults._flatten_stats_tree.<locals>.df_traversalrU   )rB   )r$   r   r   r%   r   r&   rh   P  s
   
z+LayerwiseProfileResults._flatten_stats_treec                    s6   g }dt dtt f fdd |D ]} || q|S )Nr   curr_json_listc                    s8   | t| jg d | jD ]} ||d d  qd S )N)rC   r   r   )r   r   rC   r   )r   r   r   r   r%   r&   r   b  s   
zILayerwiseProfileResults._convert_stats_tree_to_dict.<locals>.df_traversal)rB   r5   rf   )r$   r   
root_dictsr   r%   r   r&   r   _  s
   z3LayerwiseProfileResults._convert_stats_tree_to_dictr(   )r_   )(r1   r2   r3   r   r4   r   rG   rf   r>   r5   r
   rH   r   rI   r   rJ   rB   rK   rL   rP   r6   rm   rs   r   r   r   r   staticmethodr   rA   r   rj   rM   rN   r   r   r   rO   rh   r   r%   r%   r%   r&   rD   Q   sB   
 	%^
rD   c                       s@   e Zd Zd	dedB f fddZ fddZ fddZ  ZS )
layerwise_profileNrL   c                    s.   t  jtjtjgdddtddd || _dS )a  
        layerwise profile constructor.

        Args:
            num_running_seqs (Optional[int], optional): When given,
                num_running_seqs will be passed to LayerProfileResults
                for metadata update. Defaults to None.
        T)verbose)
activitiesrecord_shapes
with_stackwith_modulesexperimental_configN)super__init__r   CPUr/   r   rL   )r$   rL   	__class__r%   r&   r   n  s   	

zlayerwise_profile.__init__c                    s
   t   S r(   )r   	__enter__r#   r   r%   r&   r     s   
zlayerwise_profile.__enter__c                    s(   t  ||| t| jj| jd| _d S )N)rL   )r   __exit__rD   profilerkineto_resultsrL   results)r$   exc_typeexc_valexc_tbr   r%   r&   r     s   
zlayerwise_profile.__exit__r(   )r1   r2   r3   r>   r   r   r   __classcell__r%   r%   r   r&   r   m  s    r   )-r   collectionsr   collections.abcr   dataclassesr   r   r   typingr   r   torch._C._autogradr	   r
   r   torch._C._profilerr   r   r   torch.autograd.profilerr   torch.profilerr   r   vllm.profiler.utilsr   r   r   r   r   r   vllm.utils.import_utilsr   r   rz   ImportErrorr   r8   r?   rA   r4   rB   rD   r   r%   r%   r%   r&   <module>   s<   
   