o
    }oi                     @   s   d dl mZmZ d dlZd dlmZ z
d dlmZ dZW n e	y'   dZY nw d dl
mZ d dlmZ d d	lmZ d
efddZG dd deZdS )    )ListOptionalN)Callback)configure_nvtx_profilingTF)logging)AppState)get_rankreturnc                 C   s:   t | jdr
| jjS t| jjjjjj	j
j| jjjjj
jS )z0
    Get the value of step within an epoch.
    current_epoch_step)hasattrstrategyr
   maxfit_loop
epoch_loopautomatic_optimizationoptim_progress	optimizerstepcurrent	completedmanual_optimizationoptim_step_progress)trainer r   Y/home/ubuntu/.local/lib/python3.10/site-packages/nemo/lightning/pytorch/callbacks/nsys.pyget_current_epoch_step    s   r   c                   @   sl   e Zd ZdZdgddfdededee dedef
d	d
Zdd Zdede	e fddZ
deddfddZdS )NsysCallbacka  
    A PyTorch Lightning callback for NVIDIA Nsight Systems (Nsys) profiling.

    This callback enables profiling of specific steps during training using NVIDIA Nsys.
    It allows for precise control over when profiling starts and ends, which ranks are profiled,
    and whether to generate detailed shape information.

    More info about nsys can be found [here](https://developer.nvidia.com/nsight-systems).

    Args:
        start_step (int): Global batch to start profiling
        end_step (int): Global batch to end profiling
        ranks (List[int]): Global rank IDs to profile
        gen_shape (bool): Generate model and kernel details including input shapes
        nvtx_ranges (bool): Insert NVTX ranges to categorize execution

    Example:
        >>> callback = NsysCallback(start_step=100, end_step=200, ranks=[0, 1], gen_shape=True, nvtx_ranges=False)
        >>> trainer = Trainer(callbacks=[callback])
    r   F
start_stepend_stepranks	gen_shapenvtx_rangesc                 C   s   t |tu sJ dt | || _t |tu s!J dt | || _| j| jks.J d|| _|| _t }||_|rBtrBt	d t
d| j d| j  d| _d S )Nz,Nsys start_step must be of type int. Found: z*Nsys end_step must be of type int. Found: z>Nsys end_step must be greater than or equal to nsys start_stepTz&Nsys profiling setup with start_step: z,and end_step: F)typeint_nsys_profile_start_step_nsys_profile_end_step_nsys_profile_ranks_nsys_profile_gen_shaper   _nvtx_rangesHAVE_MCORE_UTILSr   r   info_has_nsys_enabled)selfr   r   r   r    r!   	app_stater   r   r   __init__B   s&   

zNsysCallback.__init__c                 C   s6   ddl m} t|j|rdS tj sdS t | jv S )Nr   )SingleDeviceStrategyT)	lightning.pytorch.strategiesr/   
isinstancer   torchdistributedis_initializedr   r&   )r,   r   r/   r   r   r   _rank_is_activeb   s   
zNsysCallback._rank_is_active	batch_idxr	   c                 C   s   |  |r|jjjdkrdS t|}|| jkr=| js?d| _tj	 
  | jr3tjjjdd  dS tjj   dS dS dS )zPyTorch Lightning hook:
        https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-train-batch-start
        We use it here to enable nsys profiling.
        cudaNT)record_shapes)r5   r   root_devicer"   r   r$   r+   r2   r7   cudartcudaProfilerStartr'   autogradprofiler	emit_nvtx	__enter__)r,   r   	pl_modulebatchr6   current_stepr   r   r   on_train_batch_startl   s   z!NsysCallback.on_train_batch_startNc                 C   sj   |  |r|jjjdkrdS t|}|| jkr1| jr3tj	 
  tjj ddd d| _dS dS dS )zPyTorch Lightning hook:
        https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#on-train-batch-end
        We use it here to enable nsys profiling.
        r7   NF)r5   r   r9   r"   r   r%   r+   r2   r7   r:   cudaProfilerStopr<   r=   r>   __exit__)r,   r   r@   outputsrA   r6   rB   r   r   r   on_train_batch_end}   s   
zNsysCallback.on_train_batch_end)__name__
__module____qualname____doc__r#   r   boolr.   r5   r   rC   rG   r   r   r   r   r   ,   s&    
 
r   )typingr   r   r2   $lightning.pytorch.callbacks.callbackr   megatron.core.utilsr   r)   ImportError
nemo.utilsr   nemo.utils.app_stater   nemo.utils.get_rankr   r#   r   r   r   r   r   r   <module>   s   