o
    -ip4                     @   sb  U d dl Z d dlmZ d dlmZ d dlmZmZmZ d dl	m
Z
mZmZ d dlmZmZmZ d dlmZmZmZ d dlmZ d d	lmZmZ d d
lmZmZmZmZmZm Z  d dl!m"Z" zd dl#Z$W n e%yp   e"dZ$Y nw eG dd dZ&eG dd dZ'eG dd dZ(e(e'B Z)ee*d< eG dd dZ+eG dd deZ,G dd deZ-dS )    N)defaultdict)Callable)asdict	dataclassfield)AnyOptional	TypeAlias)
DeviceType_KinetoEvent_ProfilerResult)
_EventType_ExperimentalConfig_ProfilerEvent)FunctionEvent)ProfilerActivityprofile)TablePrinterevent_has_moduleevent_is_torch_opevent_module_reprevent_torch_op_stack_traceindent_string)PlaceholderModulepandasc                   @   sl   e Zd ZU eed< dZed  ed< eedZ	ed  ed< dZ
eed< edd	 Zed
d Zedd ZdS )_ModuleTreeNodeeventNparent)default_factorychildren tracec                 C   s   | j jd u pt| j jdkS Nr   )r   r   lenself r&   \/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/profiler/layerwise_profile.pyis_leaf&   s   z_ModuleTreeNode.is_leafc                 C   s
   t | jS N)r   r   r$   r&   r&   r'   is_torch_op*   s   
z_ModuleTreeNode.is_torch_opc                 C   s"   | j jtjko| j jd jtjkS )N   )r   tagr   Kinetotypeddevice_typer
   CUDAr$   r&   r&   r'   is_cuda.   s   z_ModuleTreeNode.is_cuda)__name__
__module____qualname__r   __annotations__r   r   r   listr   r!   strpropertyr(   r*   r1   r&   r&   r&   r'   r      s   
 

r   c                   @   s.   e Zd ZU eed< eed< eed< eed< dS )SummaryStatsEntrynamecuda_time_uspct_cuda_timeinvocationsN)r2   r3   r4   r7   r5   floatintr&   r&   r&   r'   r9   6   s
   
 r9   c                   @   s6   e Zd ZU eed< eed< eed< eed< eed< dS )ModelStatsEntryr:   cpu_time_usr;   r<   r!   N)r2   r3   r4   r7   r5   r>   r&   r&   r&   r'   r@   >   s   
 r@   
StatsEntryc                   @   s.   e Zd ZU eed< ee ed< edB ed< dS )_StatsTreeNodeentryr   Nr   )r2   r3   r4   rB   r5   r6   r&   r&   r&   r'   rC   J   s   
 rC   c                   @   s  e Zd ZU eed< eddZeee	e
 f ed< eddZeee	e f ed< eddZe	e ed< eddZe	e ed< eddZe	e ed< d	Zed	B ed
< dd Zd1deeef fddZd1deeef fddZdefddZdefddZdeeef fddZe	d2de	eeef  deegef eB fddZdd  Z d!d" Z!d#efd$d%Z"d#efd&d'Z#d(d) Z$d*d+ Z%d,e	e de	eeef  fd-d.Z&d,e	e de	e fd/d0Z'd	S )3LayerwiseProfileResults_kineto_resultsF)init_kineto_event_correlation_map_event_correlation_map_module_tree_model_stats_tree_summary_stats_treeNnum_running_seqsc                 C   s   |    |   |   d S r)   )_build_correlation_map_build_module_tree_build_stats_treesr$   r&   r&   r'   __post_init__]   s   z%LayerwiseProfileResults.__post_init__column_widthsc                 C   s^   t dddddd}|r|jd	i | dd | | jD }tt|| j|dd d d S )
N<      r:   rA   r;   r<   r!   c                 S   s,   g | ]\}}|j d ks|jd kr||fqS r   )r;   rA   .0depthrowr&   r&   r'   
<listcomp>h   s
    z=LayerwiseProfileResults.print_model_table.<locals>.<listcomp>c                 S      dd|   d S N|- r&   indentr&   r&   r'   <lambda>p       z;LayerwiseProfileResults.print_model_table.<locals>.<lambda>indent_styler&   )dictupdate_flatten_stats_treerK   r   r@   print_table _indent_row_names_based_on_depth)r%   rR   _column_widthsfiltered_model_tabler&   r&   r'   print_model_tableb   s   


z)LayerwiseProfileResults.print_model_tablec                 C   s\   t ddddd}|r|jd
i | dd | | jD }tt|| j|dd d	 d S )NP   rT      r:   r;   r<   r=   c                 S   s"   g | ]\}}|j d kr||fqS rV   r;   rW   r&   r&   r'   r[   z   s
    
z?LayerwiseProfileResults.print_summary_table.<locals>.<listcomp>c                 S   r\   r]   r&   ra   r&   r&   r'   rc      rd   z=LayerwiseProfileResults.print_summary_table.<locals>.<lambda>re   r&   )rg   rh   ri   rL   r   r9   rj   rk   )r%   rR   rl   filtered_summary_tabler&   r&   r'   print_summary_tablet   s   

z+LayerwiseProfileResults.print_summary_tablefilenamec                 C   *   t dd | | jD }|| d S )Nc                 S      g | ]\}}t |qS r&   r   rX   _rZ   r&   r&   r'   r[      s    zHLayerwiseProfileResults.export_model_stats_table_csv.<locals>.<listcomp>)pd	DataFrameri   rK   to_csvr%   ru   dfr&   r&   r'   export_model_stats_table_csv   s   z4LayerwiseProfileResults.export_model_stats_table_csvc                 C   rv   )Nc                 S   rw   r&   rx   ry   r&   r&   r'   r[      s    zJLayerwiseProfileResults.export_summary_stats_table_csv.<locals>.<listcomp>)r{   r|   ri   rL   r}   r~   r&   r&   r'   export_summary_stats_table_csv   s   
z6LayerwiseProfileResults.export_summary_stats_table_csvreturnc                 C   s"   d| j i| | j| | jdS )NrM   )metadatasummary_statsmodel_stats)rM   _convert_stats_tree_to_dictrL   rK   r$   r&   r&   r'   convert_stats_to_dict   s   

z-LayerwiseProfileResults.convert_stats_to_dictr`   depths_rowsrf   c                 C   sF   g }| D ]\}}|j dkrqt|}t|j|||_|| q|S r"   )r;   copydeepcopyr   r:   append)r   rf   indented_rowsrY   rZ   indented_rowr&   r&   r'   rk      s   

z8LayerwiseProfileResults._indent_row_names_based_on_depthc                 C   s2   t t| _| j D ]}| j|  | q
d S r)   )r   r6   rH   rF   eventscorrelation_idr   )r%   r   r&   r&   r'   rN      s   
z.LayerwiseProfileResults._build_correlation_mapc                    sF   g _ j }	 ddtdtd B f fdd |D ]} | qd S )Nr   	curr_nodec                    s   | j dkrd S t| r"t| |d}|r|j| nj| |}| jd u p-t| jdk}|rG|rGt| |t| dd dd}|j| |}| jD ]} || qJd S )Nr+   )r   r   r   c                 S   s   t | S r)   )r   )xr&   r&   r'   rc      s    zSLayerwiseProfileResults._build_module_tree.<locals>._df_traversal.<locals>.<lambda>)until)r   r   r!   )	start_tidr   r   r   r   rJ   r#   r   )r   r   noder(   child_df_traversalr%   r&   r'   r      s,   

zALayerwiseProfileResults._build_module_tree.<locals>._df_traversalr)   )rJ   rF   experimental_event_treer   r   )r%   
event_treerootr&   r   r'   rO      s   

z*LayerwiseProfileResults._build_module_treer   c                    s@    j jtjkr	d S | j j jg } fdd|D }t|d S )Nc                 3   s2    | ]}|  tjkr|  jjkr|V  qd S r)   )r/   r
   r0   r:   r   )rX   r   r   r&   r'   	<genexpr>   s    z@LayerwiseProfileResults._get_kineto_gpu_event.<locals>.<genexpr>)r   r,   r   r-   rH   getr   next)r%   r   correlated_kineto_eventsiteratorr&   r   r'   _get_kineto_gpu_event   s   

z-LayerwiseProfileResults._get_kineto_gpu_eventc                    s   dt f fdd  |S )z Return cuda time in microsecondsr   c                    s@   | j r|  }r| d S d}| jD ]}| |7 }q|S )N     @@r   )r(   r   duration_nsr   )r   gpu_kineto_eventcumulative_cuda_timer   _cumulative_cuda_time_recursiver%   r&   r'   r      s   
zVLayerwiseProfileResults._cumulative_cuda_time.<locals>._cumulative_cuda_time_recursive)r   )r%   r   r&   r   r'   _cumulative_cuda_time   s   	z-LayerwiseProfileResults._cumulative_cuda_timec                    s   t  fdd jD S )Nc                    s   g | ]}  |qS r&   )r   )rX   r   r$   r&   r'   r[      s    z<LayerwiseProfileResults._total_cuda_time.<locals>.<listcomp>)sumrJ   r$   r&   r$   r'   _total_cuda_time   s   z(LayerwiseProfileResults._total_cuda_timec                    s   i   fdd	 	ddtdtd B dtt ffddg _jD ]
}j| q)	 ddtdtd B f fd	d
 g _jD ]
}j | qKd S )Nc                    s   |   d S )Nd   r&   rr   )total_cuda_timer&   r'   r<      s   zALayerwiseProfileResults._build_stats_trees.<locals>.pct_cuda_timer&   r   r   summary_tracec           	         s   t | jrt| j}| }n|  }r"| }| d }nd S ||f }|v rG| j}| j|7  _| j	d7  _	|j|_
ntt|||ddg |d}|r^|j| ||< | jD ]
} || | qe| S )Nr   r+   rq   )rD   r   r   )r   r   r   r   r   r:   r   rD   r;   r=   r<   rC   r9   r   r   )	r   r   r   r:   r;   r   rD   new_noder   )build_summary_stats_tree_dfr<   r%   summary_dictr&   r'   r      s>   






zOLayerwiseProfileResults._build_stats_trees.<locals>.build_summary_stats_tree_dfc           	         s   t | jrt| j}| }| jjd }d}n|  }r/| }| d }d}| j}nd S t	t
|||||d|g d}|rI|j| | jD ]} || qL|S )Ni  r    r   r   rU   )rD   r   r   )r   r   r   r   duration_time_nsr   r:   r   r!   rC   r@   r   r   )	r   r   r:   r;   rA   r!   r   r   r   )build_model_stats_tree_dfr<   r%   r&   r'   r   '  s:   


zMLayerwiseProfileResults._build_stats_trees.<locals>.build_model_stats_tree_df)Nr&   r)   )	r   r   rC   tupler7   rL   rJ   r   rK   )r%   r   r&   )r   r   r<   r%   r   r   r'   rP      s2   *
%
z*LayerwiseProfileResults._build_stats_treestreec                    s0   g ddt f fdd |D ]} | qS )Nr   r   c                    s0    || jf | jD ]
} ||d d qd S )Nr+   )rY   )r   rD   r   )r   rY   r   df_traversalentriesr&   r'   r   U  s   
zALayerwiseProfileResults._flatten_stats_tree.<locals>.df_traversalrV   )rC   )r%   r   r   r&   r   r'   ri   P  s
   
z+LayerwiseProfileResults._flatten_stats_treec                    s6   g }dt dtt f fdd |D ]} || q|S )Nr   curr_json_listc                    s8   | t| jg d | jD ]} ||d d  qd S )N)rD   r   r   )r   r   rD   r   )r   r   r   r   r&   r'   r   b  s   
zILayerwiseProfileResults._convert_stats_tree_to_dict.<locals>.df_traversal)rC   r6   rg   )r%   r   
root_dictsr   r&   r   r'   r   _  s
   z3LayerwiseProfileResults._convert_stats_tree_to_dictr)   )r`   )(r2   r3   r4   r   r5   r   rH   rg   r?   r6   r   rI   r   rJ   r   rK   rC   rL   rM   rQ   r7   rn   rt   r   r   r   r   staticmethodr   rB   r   rk   rN   rO   r   r   r   rP   ri   r   r&   r&   r&   r'   rE   Q   sB   
 	%^
rE   c                       s@   e Zd Zd	dedB f fddZ fddZ fddZ  ZS )
layerwise_profileNrM   c                    s.   t  jtjtjgdddtddd || _dS )a  
        layerwise profile constructor.

        Args:
            num_running_seqs (Optional[int], optional): When given,
                num_running_seqs will be passed to LayerProfileResults
                for metadata update. Defaults to None.
        T)verbose)
activitiesrecord_shapes
with_stackwith_modulesexperimental_configN)super__init__r   CPUr0   r   rM   )r%   rM   	__class__r&   r'   r   n  s   	

zlayerwise_profile.__init__c                    s
   t   S r)   )r   	__enter__r$   r   r&   r'   r     s   
zlayerwise_profile.__enter__c                    s(   t  ||| t| jj| jd| _d S )N)rM   )r   __exit__rE   profilerkineto_resultsrM   results)r%   exc_typeexc_valexc_tbr   r&   r'   r     s   
zlayerwise_profile.__exit__r)   )r2   r3   r4   r?   r   r   r   __classcell__r&   r&   r   r'   r   m  s    r   ).r   collectionsr   collections.abcr   dataclassesr   r   r   typingr   r   r	   torch._C._autogradr
   r   r   torch._C._profilerr   r   r   torch.autograd.profilerr   torch.profilerr   r   vllm.profiler.utilsr   r   r   r   r   r   vllm.utils.import_utilsr   r   r{   ImportErrorr   r9   r@   rB   r5   rC   rE   r   r&   r&   r&   r'   <module>   s<   
   