o
    }o™i„  ã                   @   s@   d dl mZ d dlZd dlmZ d dlmZ G dd„ deƒZdS )é    N)ÚCallback)ÚTransformerConfigc                   @   s6   e Zd ZdZdejdejdeddfdd„Zd	d
„ Z	dS )ÚDeepEPCallbackaj  
    A PyTorch Lightning callback to enable DeepEP if the hardware is supported.
    Per official documentation https://github.com/deepseek-ai/DeepEP,
    DeepEP is supported for Ampere (SM80) and Hopper (SM90) GPUs.

    Adding this callback is equivalent to setting the following flags in the recipe function:

    recipe.model.config.moe_token_dispatcher_type = "flex"
    recipe.model.config.moe_enable_deepep = True
    recipe.model.config.moe_shared_expert_overlap = False

    Since the recipe function may be run on a different machine, this callback is needed so that
    configs are set during run time.
    ÚtrainerÚ	pl_moduleÚstageÚreturnNc                 C   sj   t j d¡jdvrdS t|jdƒr/t|jjtƒr1|  	|jj¡ t|jdƒr3|  	|jj
j¡ dS dS dS dS )z(Enable DeepEP if GPU is Ampere or Hopperr   )é   é	   NÚconfigÚ__io__)ÚtorchÚcudaÚget_device_propertiesÚmajorÚhasattrÚmodelÚ
isinstancer   r   Ú_apply_deepep_cfgsr   )Úselfr   r   r   © r   ú[/home/ubuntu/.local/lib/python3.10/site-packages/nemo/lightning/pytorch/callbacks/deepep.pyÚsetup%   s   ýzDeepEPCallback.setupc                 C   s   d|_ d|_d|_d S )NÚflexTF)Úmoe_token_dispatcher_typeÚmoe_enable_deepepÚmoe_shared_expert_overlap)r   Údest_cfgr   r   r   r   /   s   
z!DeepEPCallback._apply_deepep_cfgs)
Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚplÚTrainerÚLightningModuleÚstrr   r   r   r   r   r   r      s    
r   )	Úlightning.pytorchÚpytorchr"   r   Ú$lightning.pytorch.callbacks.callbackr   Úmegatron.core.transformerr   r   r   r   r   r   Ú<module>   s
   