o
    }oiN                     @   s   d dl mZmZmZmZmZ d dlmZ d dl	Z	d dl
mZ d dlmZ d dlmZ de	jdeeef fdd	Zde	jdeeef fd
dZG dd deZdS )    )CallableDictListOptionalUnionN)Callback)MegatronOptimizerModule)loggingtensorreturnc                 C   s"   t | tjrdt| jiS ddiS )zReturns tensor's precision	Precisionnot-a-tensor)
isinstancetorchTensorstrdtyper
    r   ^/home/ubuntu/.local/lib/python3.10/site-packages/nemo/lightning/pytorch/callbacks/debugging.pycollect_precision   s   r   c                 C   s,   t | tjrt| jt| jdS dddS )z"Returns tensor's shape & precision)Shaper   r   )r   r   r   r   shaper   r   r   r   r   collect_precision_and_shape!   s   
r   c                   @   s   e Zd ZdZeedfdeeej	gee
eef  f  deeej	gee
eef  f  deee ef fddZdejd	ejd
dfddZdS )ParameterDebuggera  
    Debugging tool to help inspect parameters and gradients at any callback event.

    This callback handles the boilerplate needed to iterate over the model parameters and gradients,
    and applies user specified functions to them. These functions can be used to log attributes or
    apply asserts on the param and grad tensors. Attributes are logged in a table, with a row for each parameter name.
    Default behavior is to log the precision and shapes of each parameter and its gradient.

    Args:
        param_fn: Function to apply to model parameters. Can be used to apply assertions on the tensor,
            or return a mapping of labels and values to log for each parameter.
        grad_fn: Function to apply to model gradients. Can be used to apply assertions on the tensor,
            or return a mapping of labels and values to log for each gradient.
        log_on_hooks: PTL callback hook name or list of hook names on which to apply param_fn and grad_fn.
            See `PTL docs <https://lightning.ai/docs/pytorch/stable/extensions/callbacks.html#hooks>`_ for more info
            on callback hooks. Note that some hooks that occur before the model is constructed are invalid.

    Example:
        >>> fn = lambda x: {"Norm": str(x.norm(2).item())}
        >>> callback = ParameterDebugger(param_fn=fn, log_on_hooks=["on_train_start", "on_train_end"])
        >>> trainer = Trainer(callbacks=[callback])
    on_train_startparam_fngrad_fnlog_on_hooksc                 C   s\   || _ || _tg d}t|tr|g}|D ]}||v s$J d||t| || j qd S )N)#teardown
on_fit_endon_sanity_check_starton_sanity_check_endon_train_batch_starton_train_batch_endon_train_epoch_starton_train_epoch_endon_validation_epoch_starton_validation_epoch_endon_test_epoch_starton_test_epoch_endon_predict_epoch_starton_predict_epoch_endon_validation_batch_starton_validation_batch_endon_test_batch_starton_test_batch_endon_predict_batch_starton_predict_batch_endr   on_train_endon_validation_starton_validation_endon_test_starton_test_endon_predict_starton_predict_endon_exceptionon_save_checkpointon_load_checkpointon_before_backwardon_after_backwardon_before_optimizer_stepon_before_zero_gradzTHook {} supplied to log_on_hooks is not valid or can not be used. Valid hooks are {})r   r   setr   r   formatsetattr_apply_user_funcs)selfr   r   r   valid_hooks	hook_namer   r   r   __init__A   s   
(
zParameterDebugger.__init__trainer	pl_moduler   Nc              	      s  dt jdtt j f fdd}g g g }}}  D ]@\}	}
||
}|	dddd}|| t|
|g| j| jg||gD ]\}}}|durZ|durU||| q@|i  q@qt	g t	g }}|D ]}|durt|
|  qg|D ]}|dur|
|  qwt|st|rd	d
lm} | }|d| tddg||g||gD ]0\}}}|D ](}g }|D ]}|dur|||d q|d q|g kr||| | qqd|_td|   dS dS )z
        Iterate over model parameters, find gradient tensor, apply and collect outputs of
        param_fn and grad_fn, and log outputs in a table.
        paramr   c                    sB   t t ddts| jS  jD ]}| |jv r|j|  j  S qdS )zJIf using MCore optimizer, search the grad buckets for param's grad tensor.optimN)r   getattrr   gradbuffersparam_to_bucket	grad_data)rK   bufrJ   r   r   find_grad_tensor   s   

z=ParameterDebugger._apply_user_funcs.<locals>.find_grad_tensorzmodule. z.weightNr   )PrettyTable	ParameterzParam zGrad l
)r   r   r   named_parametersreplaceappendzipr   r   rA   updatekeysanyprettytablerV   
add_columngetalignr	   info
get_string)rE   rI   rJ   argskwargsrT   	names_colparams_outputgrads_output
param_nameparam_tensorgrad_tensor
short_namer
   fnout_col
param_keys	grad_keysoutputrV   debug_tableprefixr_   output_listk
col_to_logr   rS   r   rD   z   sX   



z#ParameterDebugger._apply_user_funcs)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r   rH   plTrainerLightningModulerD   r   r   r   r   r   )   s    
9r   )typingr   r   r   r   r   lightning.pytorchpytorchr~   r   lightning.pytorch.callbacksr   %nemo.lightning.pytorch.optim.megatronr   
nemo.utilsr	   r   r   r   r   r   r   r   r   r   <module>   s   