o
    oi_                     @   s  d Z ddlZddlZddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZ ddlZddlmZmZ ddlmZmZ ddlmZmZmZ dd	lmZ dd
lm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ e
rddl,m-Z- e.e/Z0e* Z1eej2j3ej4j2j3ej4j2j5f Z6ej27 Z8G dd dZ9G dd dZ:G dd de&Z;de<de=fddZ>dS )z<Profiler to check if there are any bottlenecks in your code.    N)	lru_cachepartial)Path)	TYPE_CHECKINGAnyCallableContextManagerDictListOptionalTypeUnion)Tensornn)	EventListrecord_function)ProfilerActionProfilerActivitytensorboard_trace_handler)RemovableHandle)override)is_cuda_available)_TORCH_GREATER_EQUAL_2_4)Profiler)MisconfigurationException)WarningCacherank_zero_warn)LightningModulec                
   @   s   e Zd ZdZdejddfddZdejded	edefd
dZ	dejdeded	edef
ddZ
dddZdedededdfddZdS )RegisterRecordFunctionaa  While profiling autograd operations, this class will add labels for module names around the forward function.

    The Lightning PyTorch Profiler will activate this feature automatically. It can be deactivated as follows:

    Example::
        from lightning.pytorch.profilers import PyTorchProfiler
        profiler = PyTorchProfiler(record_module_names=False)
        Trainer(profiler=profiler)

    It can be used outside of Lightning as follows:

    Example::
        from lightning.pytorch import Trainer, seed_everything
        with RegisterRecordFunction(model):
            out = model(batch)

    modelreturnNc                 C   s   || _ i | _i | _d S N)_model_records_handles)selfr    r&   W/home/ubuntu/.local/lib/python3.10/site-packages/lightning/pytorch/profilers/pytorch.py__init__B   s   
zRegisterRecordFunction.__init___inputrecord_namec                 C   s"   t d| }|  || j|< |S )Nz[pl][module])r   	__enter__r#   )r%   r)   r*   r+   recordr&   r&   r'   _start_recording_forwardG   s   
z/RegisterRecordFunction._start_recording_forward__outputc                 C   s   | j | d d d  |S r!   )r#   __exit__)r%   r)   r/   r0   r+   r&   r&   r'   _stop_recording_forwardN   s   z.RegisterRecordFunction._stop_recording_forwardc                 C   sz   | j  D ]5\}}|r:t|j dt|j }| d| }|t| j|d}|t| j	|d}||g| j
|< qd S )N.z: )r+   )r"   named_modulestype
__module____name__register_forward_pre_hookr   r.   register_forward_hookr2   r$   )r%   module_namemodule	full_namer+   pre_forward_handlepost_forward_handler&   r&   r'   r,   R   s   z RegisterRecordFunction.__enter__r5   value	tracebackc                 C   s,   | j  D ]}|D ]}|  q	qi | _ d S r!   )r$   valuesremove)r%   r5   r?   r@   handleshr&   r&   r'   r1   `   s
   

zRegisterRecordFunction.__exit__r    N)r7   r6   __qualname____doc__r   Moduler(   r   strr.   r2   r,   r   r1   r&   r&   r&   r'   r   /   s     
r   c                   @   s   e Zd ZdZdeddfddZd!ddZd	eddfd
dZdeddfddZ	e
defddZe
defddZe
defddZe
defddZe
defddZd!ddZe
defddZdeddfdd ZdS )"ScheduleWrapperzThis class is used to override the schedule logic from the profiler and perform recording for both
    `training_step`, `validation_step`.scheduler    Nc                 C   s   t std|| _|   d S )NzEYou are trying to use `ScheduleWrapper` which require kineto install.)_KINETO_AVAILABLEModuleNotFoundError	_schedulereset)r%   rK   r&   r&   r'   r(   k   s   zScheduleWrapper.__init__c                 C   sF   d| _ d| _d| _d| _d| _d| _d| _d| _d | _d | _	d | _
d S )Nr   F)_num_training_step_num_validation_step_num_test_step_num_predict_step_training_step_reached_end_validation_step_reached_end_test_step_reached_end_predict_step_reached_end_current_action_prev_schedule_action_start_action_namer%   r&   r&   r'   rO   q   s   
zScheduleWrapper.resetstart_action_namec                 C   
   || _ d S r!   )rZ   )r%   r\   r&   r&   r'   setup      
zScheduleWrapper.setupcurrent_actionc                 C   r]   r!   )rX   )r%   r`   r&   r&   r'   pre_step   r_   zScheduleWrapper.pre_stepc                 C      | j d usJ | j dS )Ntraining_steprX   endswithr[   r&   r&   r'   is_training      zScheduleWrapper.is_trainingc                 C   rb   )Nvalidation_steprd   r[   r&   r&   r'   is_validating   rg   zScheduleWrapper.is_validatingc                 C   rb   )N	test_steprd   r[   r&   r&   r'   
is_testing   rg   zScheduleWrapper.is_testingc                 C   rb   )Npredict_steprd   r[   r&   r&   r'   is_predicting   rg   zScheduleWrapper.is_predictingc                 C   4   | j r| jS | jr| jS | jr| jS | jr| jS dS )Nr   )rf   rP   ri   rQ   rk   rR   rm   rS   r[   r&   r&   r'   num_step      zScheduleWrapper.num_stepc                 C   s   | j r|  jd7  _d S | jr5| jd usJ | jdr,| jdkr*|  jd7  _d S d S |  jd7  _d S | jrA|  jd7  _d S | jrM|  j	d7  _	d S d S )N   on_fit_startr   )
rf   rP   ri   rZ   re   rQ   rk   rR   rm   rS   r[   r&   r&   r'   _step   s   
zScheduleWrapper._stepc                 C   rn   )NF)rf   rT   ri   rU   rk   rV   rm   rW   r[   r&   r&   r'   has_finished   rp   zScheduleWrapper.has_finishedro   r   c                 C   s   | j d u s| jrtjS |   | t| jd}| jtj	kr&|tj
kr&tj	}|tjkrF| jr2d| _n| jr9d| _n| jr@d| _n| jrFd| _|| _|S )Nr   T)rX   rt   r   NONErs   rN   maxro   rY   RECORDWARMUPRECORD_AND_SAVErf   rT   ri   rU   rk   rV   rm   rW   )r%   ro   actionr&   r&   r'   __call__   s"   
zScheduleWrapper.__call__rE   )r7   r6   rF   rG   r   r(   rO   rI   r^   ra   propertyboolrf   ri   rk   rm   intro   rs   rt   r{   r&   r&   r&   r'   rJ   g   s(    

rJ   c                       s  e Zd Zh dZh dZ									d3deeeef  dee d	e	d
e	de	de
dee de	deeeef  deddf fddZdeddfddZedee
ef fddZde	fddZeeddee fddZded fddZed eddfd!d"Zed eddfd#d$Zedefd%d&Zd4d'd(Zd)ee defd*d+Z d4d,d-Z!d4d.d/Z"ed0ee ddf fd1d2Z#  Z$S )5PyTorchProfiler>   rj   rl   rc   rh   >	   countcpu_time	cuda_timecpu_time_totalcuda_time_totalcpu_memory_usagecuda_memory_usageself_cpu_memory_usageself_cuda_memory_usageNFT   dirpathfilenamegroup_by_input_shapes	emit_nvtxexport_to_chrome	row_limitsort_by_keyrecord_module_namestable_kwargsprofiler_kwargsr    c
                    s(  t  j||d |o|
dd| _|| _|| _|| _|pt|
| _|| _	|
| _
|	dur-|	ni | _d| _d| _d| _d| _d| _i | _d| _d| _trO| |
 | j| jvrbtd| j d| j d| jD ],}|dv rstd	| d
tttjj h d }||vrtd	| d| dqedS )aA	  This profiler uses PyTorch's Autograd Profiler and lets you inspect the cost of
        different operators inside your model - both on the CPU and GPU.

        Args:
            dirpath: Directory path for the ``filename``. If ``dirpath`` is ``None`` but ``filename`` is present, the
                ``trainer.log_dir`` (from :class:`~lightning.pytorch.loggers.tensorboard.TensorBoardLogger`)
                will be used.

            filename: If present, filename where the profiler results will be saved instead of printing to stdout.
                The ``.txt`` extension will be used automatically.

            group_by_input_shapes: Include operator input shapes and group calls by shape.

            emit_nvtx: Context manager that makes every autograd operation emit an NVTX range
                Run::

                    nvprof --profile-from-start off -o trace_name.prof -- <regular command here>

                To visualize, you can either use::

                    nvvp trace_name.prof
                    torch.autograd.profiler.load_nvprof(path)

            export_to_chrome: Whether to export the sequence of profiled operators for Chrome.
                It will generate a ``.json`` file which can be read by Chrome.

            row_limit: Limit the number of rows in a table, ``-1`` is a special value that
                removes the limit completely.

            sort_by_key: Attribute used to sort entries. By default
                they are printed in the same order as they were registered.
                Valid keys include: ``cpu_time``, ``cuda_time``, ``cpu_time_total``,
                ``cuda_time_total``, ``cpu_memory_usage``, ``cuda_memory_usage``,
                ``self_cpu_memory_usage``, ``self_cuda_memory_usage``, ``count``.

            record_module_names: Whether to add module names while recording autograd operation.

            table_kwargs: Dictionary with keyword arguments for the summary table.

            \**profiler_kwargs: Keyword arguments for the PyTorch profiler. This depends on your PyTorch version

        Raises:
            MisconfigurationException:
                If arg ``sort_by_key`` is not present in ``AVAILABLE_SORT_KEYS``.
                If arg ``schedule`` is not a ``Callable``.
                If arg ``schedule`` does not return a ``torch.profiler.ProfilerAction``.

        )r   r   record_shapesFNzFound sort_by_key: z. Should be within z. >   sort_byr   z Found invalid table_kwargs key: z8. This is already a positional argument of the Profiler.>   r%   r   r   r3   ) superr(   get_group_by_input_shapes
_emit_nvtx_export_to_chrome
_row_limit_default_sort_by_key_sort_by_key_record_module_names_profiler_kwargs_table_kwargsprofilerfunction_events_lightning_module	_register_parent_profiler_recording_maprZ   rN   rL   _init_kinetoAVAILABLE_SORT_KEYSr   KeyErrorsetinspect	signaturer   table
parameterskeys)r%   r   r   r   r   r   r   r   r   r   r   keyvalid_table_keys	__class__r&   r'   r(      sB   =


zPyTorchProfiler.__init__c                 C   s   d|v }d|v | _ |dd }|d ur.t|std| |d}t|ts.td| |   |r6|n|  }|d urBt|n|| _| j| j	d< |dd }|pV| 
 | j	d< |dd| _|d	d
| _|ddpp| j}|| j	d< d S )NrK   on_trace_readyz&Schedule should be a callable. Found: r   zASchedule should return a `torch.profiler.ProfilerAction`. Found: 
activitiesexport_to_flame_graphFmetricself_cpu_time_total
with_stack)_has_on_trace_readyr   callabler   
isinstancer   _default_schedulerJ   rN   r   _default_activities_export_to_flame_graph_metric)r%   r   has_schedulerK   rz   r   r   r&   r&   r'   r   N  s*   

zPyTorchProfiler._init_kinetoc                 C   s   | j d usJ | jd usJ | jj}| j jr|jS | j jr=t|jtr(t	|jn|j}t|j
tr6t	|j
n|j
}|| S | j jrSt|jtrNt	|j}|S |j}|S | j jr\t	|jS td)NzUnsupported schedule)rN   r   trainerrf   num_training_batchesri   r   num_val_batcheslistsumnum_sanity_val_batchesrk   num_test_batchesrm   num_predict_batchesNotImplementedError)r%   r   r   r   r   r&   r&   r'   _total_stepsg  s0   



zPyTorchProfiler._total_stepsc                 C   s.   | j d uo| jd uo| jdk o| jj|  kS )N   )r   rN   r   r   r[   r&   r&   r'   _should_override_schedule  s   
z)PyTorchProfiler._should_override_schedulerq   c                   C   s   t rtjjddddS d S )Nrq      )waitwarmupactive)rL   torchr   rK   r&   r&   r&   r'   r     s   z!PyTorchProfiler._default_scheduler   c                 C   sl   g }t s|S tr|tj t r|tj |S | jddr&|tj | jdt r4|tj |S )Nuse_cpuTuse_cuda)	rL   r   appendr   CPUr   CUDAr   r   )r%   r   r&   r&   r'   r     s   z#PyTorchProfiler._default_activitiesaction_namec                 C   s   | j d u r4tj rtj  | jd ur| j| |   | j  }|d ur*|| _ | j	d ur4| j	  | j
d urL| jd u rL| jrLt| j
| _| j  | j d urg|| jvritd| }|  || j|< d S d S d S )Nz[pl][profile])r   r   autograd_profiler_enabled_disable_profilerrN   r^   _create_profilersr,   r   r   r   r   r   r   r   )r%   r   r   	recordingr&   r&   r'   start  s&   







zPyTorchProfiler.startc                    s   j v rj   d d d  j  = trjrd S jd urt fddjD rtjtjj	s5J j
d ur@j
   rStd d _
tjjjj_dtdd f fdd}jsf|j_j
d urqj
jj_j  jdd	 d S d S d S )
Nc                 3   s    | ]}  |V  qd S r!   )re   ).0func)r   r&   r'   	<genexpr>  s    z'PyTorchProfiler.stop.<locals>.<genexpr>zpThe PyTorch Profiler default schedule will be overridden as there is not enough steps to properly record traces.r   r    c                    s   j d ur?jrttj j dd}||  jr=tjj j dd}t	| t
jjjs3J | j|jd d S d S td d S )N )r   	extensionz.stack)r   z?The PyTorchProfiler failed to export trace as `dirpath` is None)r   r   r   rI   _prepare_filenamer   ospathjoinr   r   r   r   profileexport_stacksr   r   )r   handlerr   r   r%   r&   r'   r     s   
z,PyTorchProfiler.stop.<locals>.on_trace_ready	Frameworkzpytorch-lightning)r   r1   rL   r   r   anySTEP_FUNCTIONSr   r   r   rN   ra   r   warning_cachewarn_default_schedule_fnrK   	_PROFILERr   r   ro   step_numstepadd_metadata)r%   r   r   r&   r   r'   stop  s.   

"


zPyTorchProfiler.stopc                 C   s   | j ddr
| jrdS |   | jsdS | jr5ts5| j d}| jd u r'|nt	j
| j|}| j| | jj| jd}|jd| j| jd| j}d|i}| |S )	NenabledTr   z_trace.json)r   )r   r   recordsr&   )r   r   r   _delete_profilersr   r   rL   
local_rankr   r   r   r   export_chrome_tracekey_averagesr   r   r   r   r   _stats_to_str)r%   r   path_to_tracedatar   recorded_statsr&   r&   r'   summary  s   

zPyTorchProfiler.summaryc                 C   sj   | j d urd S | jr!| jd u rtjj  | _| tjj j| _ d S d | _| t	r,tj jntjj j| _ d S r!   )
r   r   r   r   cudar   _create_profilerr   r   rL   r[   r&   r&   r'   r     s   


z!PyTorchProfiler._create_profilersr   c                    s4   t |jj  fdd| j D }|di |S )Nc                    s   i | ]\}}| v r||qS r&   r&   )r   kvinit_parametersr&   r'   
<dictcomp>  s    z4PyTorchProfiler._create_profiler.<locals>.<dictcomp>r&   )r   r   r(   r   r   items)r%   r   kwargsr&   r  r'   r    s   z PyTorchProfiler._create_profilerc                 C   sV   | j rd S trt| jtjjsJ | j | _d S t| jtjjjs$J | jj| _d S r!   )	r   rL   r   r   r   r   eventsr   r   r[   r&   r&   r'   _cache_functions_events  s   z'PyTorchProfiler._cache_functions_eventsc                 C   s   | j d ur| j d d d  |   d | _ | jd ur| j  | jd ur.| jd d d  d | _| jd ur@| jd d d  d | _d S d S r!   )r   r1   r  rN   rO   r   r   r[   r&   r&   r'   r   %  s   





z!PyTorchProfiler._delete_profilersstagec                    s:   |    t| jD ]}| | q	i | _t j|d d S )N)r  )r   r   r   r   r   teardown)r%   r  r  r   r&   r'   r  6  s
   zPyTorchProfiler.teardown)	NNFFTr   NTNrE   )%r7   r6   rF   r   r   r   r   rI   r   r}   r~   r	   r   r(   r   r|   floatr   r   staticmethodr   r   r   r
   r   r   r   r   r  r   r   r   r  r  r   r  __classcell__r&   r&   r   r'   r      sn    	
f0


$r   r   r    c                 C   sJ   |  dg }|  ddp|otj|v p| ot }|r d dS d dS )Nr   r   Fr  cpu_time_total)r   r   r   r   )r   r   is_cudar&   r&   r'   r   A  s   
r   )?rG   r   loggingr   	functoolsr   r   pathlibr   typingr   r   r   r   r	   r
   r   r   r   r   r   r   torch.autograd.profilerr   r   torch.profilerr   r   r   torch.utils.hooksr   typing_extensionsr   "lightning.fabric.accelerators.cudar   "lightning.fabric.utilities.importsr   $lightning.pytorch.profilers.profilerr   &lightning.pytorch.utilities.exceptionsr   %lightning.pytorch.utilities.rank_zeror   r   lightning.pytorch.core.moduler   	getLoggerr7   logr   r   r   r   r   r   kineto_availablerL   r   rJ   r   dictrI   r   r&   r&   r&   r'   <module>   s<   ,

8s  i