o
    wil                     @   sb   d dl mZmZmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZ dgZG dd deZdS )    )AnyDictListOptionalN)Callback)read_tb_log)flops_formulasloggingFLOPsMeasurementCallbackc                   @   sj   e Zd ZdZdZ		ddeeef dee dee fddZ	d	d
 Z
deeB eB defddZdd ZdS )r
   a  
    Calculate FLOPs per second after last train step for a given job run.

    Args:
        model_config (Dict[str, Any]): params for running the experiment/job.
        Expects a nested dictionary with parent keys
            1. run- for assessing model name (Eg. 'gpt3', 'llama2', etc.) from sub-key 'name'.
                'name' usually has value like- train_gpt3_5b_*, which is matched to model name 'gpt3'.
            2. exp_manager- for accessing 'explicit_log_dir'. tensorboard log file is stored here,
                used for accessing step time needed for calculating TFLOPs per sec per GPU
            3. trainer- for accessing 'num_nodes' and 'devices' needed for calculating
                TFLOPs per sec per GPU
            4. model- Hyperparams for the model. Specifically- global batch size, sequence length,
                hidden size,  ffn hidden size, num_layers, num_attention_heads, num_query_groups,
                moe_router_topk. (list might increase with new models as required)
        log_dir (Optional[str]): Directory with tenbsorboard log file. If present, will overrride
            'explicit_log_dir' in model_config. Defaults to None.
        model_name (Optional[str]): If present, will override 'name' under 'run' in model_config.
            Defaults to None.
    TNmodel_configlog_dir
model_namec              
   C   s`  || _ | j di | _| j di pi | _| j di | _| j di | _|d u r0| jddn|| _|d u r>| jdd n|| _| jdd | _| jd	d | _	| jd
d }| jdd }| jdd }| jdd }| jdd }| jdd }	| jdd }
| jdd }|d u r|	}t
j||||||	|
|d| _| jd ur| j | _d S | j| _d S )Nrunexp_managertrainermodelname explicit_log_dir	num_nodesdevicesglobal_batch_sizeencoder_seq_lengthhidden_size
num_layersffn_hidden_sizenum_attention_headsmoe_router_topknum_query_groups)gbsenc_seq_lenhslayersffn_hsattention_headsr   query_groups)cfggetrun_cfgexp_cfg	train_cfg	model_cfgr   r   r   num_gpus_per_noder   FLOPSConfigflops_configlower)selfr   r   r   r   r    r!   r"   r#   r$   r   r%    r1   i/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/common/metrics/perf_metrics.py__init__2   s<   &z!FLOPsMeasurementCallback.__init__c              
   C   s   d}zd| j d v rtdt| jd}| |}W n ty4 } ztd|  W Y d}~nd}~ww td|d	 |j	rK|j	j
d
| dS dS )zd
        PyTorch Lightning callback hook to calculate TFLOPs per sec per GPU after training
        peftr   z3FLOPs measurement not supported for finetuning jobsztrain_step_timing in sz,Failed to calculate TFLOPs per sec per GPU.
NzTFLOPs per sec per GPU=z.2ftflops_per_sec_per_gpu)r&   NotImplementedErrorr   r   eval_tflops_per_sec_per_gpu	Exceptionr	   errorinfologger
experiment
add_scalar)r0   r   	pl_moduler6   step_time_listexcr1   r1   r2   on_train_enda   s   z%FLOPsMeasurementCallback.on_train_endtrain_step_timereturnc                 C   sL   |   \}}t|ts|g}t|}t|t|d d }|d|  S )ae  
        Args:
            train_step_time (Any[List, float, int]): Train step time (in seconds).
            Step time will be less stable for initial steps (~10 steps)- less
            accurate measurement
            Use average step time over several steps for higher accuracy
        Returns:
            (float): Model TFLOPs per sec per gpu
           Ng   mB)eval_model_flops
isinstancelistnparraymeanlen)r0   rC   total_flopsflops_per_gpustep_time_arrr1   r1   r2   r8   t   s   


z4FLOPsMeasurementCallback.eval_tflops_per_sec_per_gpuc                    s   t jt jt jt jt jt jd} jdur+ fdd|D }t|dkr'|d n j _ j|vrDt	
dt|   td j | j  j}| j j  }||fS )z9
        Calculate model FLOPs for a given model
        )gpt3llama2llama3nemotronmixtralbertNc                    s   g | ]	}| j v r|qS r1   )r   ).0r   r0   r1   r2   
<listcomp>   s    z=FLOPsMeasurementCallback.eval_model_flops.<locals>.<listcomp>r   z FLOPs measurement supported for zJFailed to extract valid model name from or missing FLOPs calculations for )r   rP   rQ   rR   rS   rT   rU   r   rL   r	   r;   rH   keysKeyErrorr.   r   r,   )r0   model_flops_mapmodel_matchesrM   rN   r1   rW   r2   rF      s    
	
z)FLOPsMeasurementCallback.eval_model_flops)NN)__name__
__module____qualname____doc__higher_is_betterr   strr   r   r3   rB   r   floatintr8   rF   r1   r1   r1   r2   r
      s    

/)typingr   r   r   r   numpyrI   lightning.pytorch.callbacksr   0nemo.collections.common.parts.perf_metrics_utilsr   
nemo.utilsr   r	   __all__r
   r1   r1   r1   r2   <module>   s   