o
    TÃi"  ã                   @   sŒ   d dl mZ d dlmZmZmZmZ d dlZd dlm	Z	 ddl
mZ ddlmZ ddlmZ d	d
lmZ G dd„ deƒZG dd„ deƒZdS )é    )Úabstractmethod)ÚAnyÚDictÚOptionalÚTypeN)ÚDeepSpeedConfigModelé   )ÚDSModuleBase)ÚDSModuleRegistryBase©ÚDSMoEConfigé   )ÚInferenceParameterc                       sî   e Zd ZdZedee fdd„ƒZdede	e
ef ddf‡ fdd	„Zed
ejdefdd„ƒZed
ejdefdd„ƒZed
ejdefdd„ƒZ		ddejdejdejdejdeej deej dejfdd„Zeedejfdd„ƒƒZ‡  ZS )Ú	DSMoEBasea  
    Base mixing for MoE modules. The interface represented by this module is:

    expert_assignments = gate(hidden_states)
    intermediate = ragged_linear(hidden_states, expert_assignments)
    output = ragged_linear(intermediate, expert_assignments)
    Úreturnc                   C   ó   t S ©Nr   © r   r   úf/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/modules/interfaces/moe_base.pyÚconfig_class   ó   zDSMoEBase.config_classÚconfigÚimplementation_configNc                    s   t ƒ  ||¡ d S r   )ÚsuperÚ__init__)Úselfr   r   ©Ú	__class__r   r   r      s   zDSMoEBase.__init__Úparamc                 C   ó   dS )z¨
        Perform any necessary transformations of the gate parameter.

        Args:
            param (torch.Tensor): gate_w (shape: [num_experts, model_dim])
        Nr   ©r   r   r   r   r   Útransform_gate_param"   s   zDSMoEBase.transform_gate_paramc                 C   r   )zþ
        Perform any necessary transformations of the parameter. The specific component
        being transformed should be inferred from the shape of the parameter.

        Args:
            param (torch.Tensor): One of either mlp_1_w, mlp_1_b
        Nr   r    r   r   r   Útransform_moe_mlp_1_param,   s   	z#DSMoEBase.transform_moe_mlp_1_paramc                 C   r   )a}  
        Perform any necessary transformations of the parameter. The specified component being
        transformed should be inferred from the shape of the parameter. This interface is
        separate from transform_moe_1_param because the two components may have identical
        shapes.

        Args:
            param (torch.Tensor): One of either mlp_2_w or mlp_2_b
        Nr   r    r   r   r   Útransform_moe_mlp_2_param7   s   z#DSMoEBase.transform_moe_mlp_2_paramÚhidden_statesÚgate_wÚmlp_1_wÚmlp_2_wÚmlp_1_bÚmlp_2_bc                 C   s   t ƒ ‚r   )ÚNotImplementedError)r   r$   r%   r&   r'   r(   r)   r   r   r   ÚforwardD   s   zDSMoEBase.forwardc                 C   r   )zB
        Returns the pre-allocated, padded output Tensor.
        Nr   )r   r   r   r   ÚoutputM   s   zDSMoEBase.output)NN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ústaticmethodr   r   r   r   r   Ústrr   r   r   ÚtorchÚTensorr   r!   r"   r#   r   r+   Úpropertyr,   Ú__classcell__r   r   r   r   r      s>    "	
úÿþýüûú
ú	r   c                   @   s0   e Zd ZU i Zeed< edee fdd„ƒZ	dS )ÚDSMoERegistryÚregistryr   c                   C   r   r   )r   r   r   r   r   Úassociated_classY   r   zDSMoERegistry.associated_classN)
r-   r.   r/   r8   r   Ú__annotations__r1   r   r	   r9   r   r   r   r   r7   V   s   
 r7   )Úabcr   Útypingr   r   r   r   r3   Údeepspeed.runtime.config_utilsr   Ú	ds_moduler	   Úmodule_registryr
   Úconfigsr   Úinference_parameterr   r   r7   r   r   r   r   Ú<module>   s   D