o
    Ti*                     @   s"  d dl mZ d dlmZ ddlmZ d dlmZmZm	Z	m
Z
mZmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ deded	efd
dZdeded	efddZdeded	efddZde	ded	efddZde
ded	efddZ de
ded	efddZ!deded	efddZ"dS )   )RaggedInferenceEngineConfig)NormTypeEnum   )ConfigBundle)DSEmbeddingsConfigDSLinearConfigDSMoEConfigDSNormConfigDSSelfAttentionConfigDSUnembedConfig)DSEmbeddingBaseDSEmbeddingRegistryDSLinearBaseDSLinearRegistry	DSMoEBaseDSMoERegistryDSPostNormBaseDSPostNormRegistryDSPreNormBaseDSPreNormRegistryDSSelfAttentionBaseDSSelfAttentionRegistryDSUnembedBaseDSUnembedRegistryattention_configengine_configreturnc                 C      t d| d}t|S )a  
    Choose an appropriate attention implementation based on the given configurations. This
    method is currently a stub, but as more implementations may be developed  we can centralize
    the logic for choosing between them here.

    Arguments:
        attention_config (DSSelfAttentionConfig): Configuration for the attention module.
        engine_config (RaggedInferenceEngineConfig): Configuration for the inference engine.

    Returns:
        An attention module implementing the given configuration.
    dense_blocked_attentionnameconfig)r   r   instantiate_config)r   r   r!    r#   ]/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/modules/heuristics.pyinstantiate_attention$   s   
r%   embed_configc                 C   r   )a  
    Choose an appropriate embedding implementation based on the given configurations. This
    method is currently a stub, but as more implementations may be developed  we can centralize
    the logic for choosing between them here.

    Arguments:
        embed_config (DSEmbeddingsConfig): Configuration for the embedding module.
        engine_config (RaggedInferenceEngineConfig): Configuration for the inference engine.

    Returns:
        An embedding module implementing the given configuration.
    ragged_embeddingr   )r   r   r"   )r&   r   r!   r#   r#   r$   instantiate_embed8      
r(   linear_configc                 C   s   |j j}|du rtd| d}n>|dkrFddl}|j s tdt|jdo+|jj	du}|r2td|j
djd	kr?td
td| d}ntd| t|S )a  
    Choose an appropriate linear implementation based on the given configurations. This
    method is currently a stub, but as more implementations may be developed  we can centralize
    the logic for choosing between them here.

    Arguments:
        linear_config (DSLinearConfig): Configuration for the linear module.
        engine_config (RaggedInferenceEngineConfig): Configuration for the inference engine.

    Returns:
        A linear module implementing the given configuration.
    Nblas_fp_linearr   wf6af16    z.WF6AF16 quantization is only supported on CUDAhipz5WF6AF16 quantization is only supported on NVIDIA GPUs   z>WF6AF16 quantization is only supported on Ampere architecturesquantized_wf6af16_linearzUnsupported quantization mode: )quantizationquantization_moder   torchcudais_available
ValueErrorhasattrversionr.   get_device_propertiesmajorr   r"   )r*   r   r2   r!   r3   is_rocm_pytorchr#   r#   r$   instantiate_linearK   s   

r<   
moe_configc                 C   s.   d}|dkrd| j i}td| |d}t|S )a  
    Choose an appropriate MoE implementation based on the given configurations. This
    method is currently a stub, but as more implementations may be developed  we can centralize
    the logic for choosing between them here.

    Arguments:
        moe_config (DSMoEConfig): Configuration for the MoE module.
        engine_config (RaggedInferenceEngineConfig): Configuration for the inference engine.

    Returns:
        A MoE module implementing the given configuration.
    cutlass_multi_gemm_moeweight_dtype)r    r!   implementation_config)input_dtyper   r   r"   )r=   r   moe_typer@   r!   r#   r#   r$   instantiate_moen   s   
rC   norm_configc                 C   r   )a  
    Choose an appropriate post-norm implementation based on the given configurations. This
    method is currently a stub, but as more implementations may be developed  we can centralize
    the logic for choosing between them here.

    Arguments:
        norm_config (DSNormConfig): Configuration for the post-norm module.
        engine_config (RaggedInferenceEngineConfig): Configuration for the inference engine.

    Returns:
        A post-norm module implementing the given configuration.
    cuda_post_lnr   )r   r   r"   )rD   r   r!   r#   r#   r$   instantiate_post_norm   r)   rF   c                 C   s@   t | jt jkrd}n
t | jt jkrd}t|| d}t|S )a  
    Choose an appropriate pre-norm implementation based on the given configurations. Currently,
    this will select between two CUDA implementations, one for LayerNorm and one for RMSNorm.

    Arguments:
        norm_config (DSNormConfig): Configuration for the pre-norm module.
        engine_config (RaggedInferenceEngineConfig): Configuration for the inference engine.

    Returns:
        A pre-norm module implementing the given configuration.
    cuda_pre_lncuda_pre_rmsr   )r   type	LayerNormRMSNormr   r   r"   )rD   r   module_namer!   r#   r#   r$   instantiate_pre_norm   s   
rM   unembed_configc                 C   r   )a  
    Choose an appropriate unembedding implementation based on the given configurations. This
    method is currently a stub, but as more implementations may be developed  we can centralize
    the logic for choosing between them here.

    Arguments:
        unembed_config (DSUnembedConfig): Configuration for the unembed module.
        engine_config (RaggedInferenceEngineConfig): Configuration for the inference engine.

    Returns:
        An unembed module implementing the given configuration.
    ragged_unembedr   )r   r   r"   )rN   r   r!   r#   r#   r$   instantiate_unembed   r)   rP   N)#	config_v2r   inference_utilsr   module_registryr   modules.configsr   r   r   r	   r
   r   modules.interfacesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r%   r(   r<   rC   rF   rM   rP   r#   r#   r#   r$   <module>   s     @
#