o
    Ti                  	   @   s   d dl Z d dlZd dlZd dlZd dlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ dd	lmZmZ dd
lmZmZ ejfdede	de defddZ!ejfdede	de defddZ"dS )    N)version   )InferenceEngineV2)RaggedInferenceEngineConfig)HuggingFaceCheckpointEngine)inference_logger)
	OPTPolicyLlama2PolicyMistralPolicyMixtralPolicyFalconPolicy	PhiPolicy
Phi3Policy
QwenPolicyQwen2PolicyQwen2MoePolicy)POLICIESInferenceV2Policy)make_metadata_filenameModelMetadatapathengine_configdebug_levelreturnc                 C   s   t |d t| d|jj}tt|d}t|}zt	|j
 }W n ty3   td|j
 d|  w tttj| dd}||| d}t||S )	a-  
    Creates an engine from a checkpoint saved by ``InferenceEngineV2``.

    Arguments:
        path: Path to the checkpoint. This does not need to point to any files in particular,
            just the directory containing the checkpoint.
        engine_config: Engine configuration. See ``RaggedInferenceEngineConfig`` for details.
        debug_level: Logging level to use. Unless you are actively seeing issues, the recommended
            value is ``logging.INFO``.

    Returns:
        Fully initialized inference engine ready to serve queries.
    levelr   rzUnknown policy z for model ds_model_config.pklrb)inf_checkpoint_path)r   r   tensor_paralleltp_sizejsonloadopenr   	parse_rawr   policyKeyError
ValueErrorpickleosr   joinr   )r   r   r   metadata_filenamemetadata
policy_clsmodel_configr&    r0   Y/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/engine_factory.pybuild_engine_from_ds_checkpoint    s   


r2   c                 C   s  t jt j| drt| ||dS t|d t| }|j}|jdkr1|j	s*t
dt||d}n|jdkr=t||d}n|jdkr`d	d
l}t|jtdksYJ d|j t||d}ns|jdkrd	d
l}t|jtdks|J d|j t||d}nP|jdkrt||d}nD|jdkrt||d}n8|jdkrt||d}n,|jdkrt||d}n |jdkrt||d}n|jdkrt||d}nt
d|j t||S )ax  
    Build an InferenceV2 engine for HuggingFace models. This can accept both a HuggingFace
    model name or a path to an Inference-V2 checkpoint.

    Arguments:
        path: Path to the checkpoint. This does not need to point to any files in particular,
            just the directory containing the checkpoint.
        engine_config: Engine configuration. See ``RaggedInferenceEngineConfig`` for details.
        debug_level: Logging level to use. Unless you are actively seeing issues, the recommended
            value is ``logging.INFO``.

    Returns:
        Fully initialized inference engine ready to serve queries.
    r   )r   r   optzDetected OPT-350m model. This model is not currently supported. If this is not the 350m model, please open an issue: https://github.com/deepspeedai/DeepSpeed-MII/issues)checkpoint_enginellamamistralr   Nz4.34.0z:Mistral requires transformers >= 4.34.0, you have version mixtralz4.36.1z:Mistral requires transformers >= 4.36.1, you have version falconphiphi3qwenqwen2	qwen2_moezUnsupported model type )r*   r   existsr+   r2   r   r   r/   
model_typedo_layer_norm_beforer(   r   r	   transformersr   parse__version__r
   r   r   r   r   r   r   r   r   )r   r   r   r4   r/   r&   rA   r0   r0   r1   build_hf_engineE   sN   













rD   )#r"   loggingr*   r)   	packagingr   	engine_v2r   	config_v2r   
checkpointr   r   model_implementationsr   r	   r
   r   r   r   r   r   r   r   +model_implementations.inference_policy_baser   r   (model_implementations.flat_model_helpersr   r   INFOstrintr2   rD   r0   r0   r0   r1   <module>   s8   0
'