o
    Ti,                     @   s   d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZ G dd deZG d	d
 d
eeZG dd deZG dd deZG dd deeZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZdS )    N)Fieldfield_validator)DeepSpeedConfigModel)DeepSpeedZeroConfig)DictUnionOptional)Enumc                   @   sZ   e Zd ZejddddfZejddddfZejd	d
ddfZ	ej
ddfZ
edefddZdS )	DtypeEnumztorch.float16fp16float16halfztorch.float32fp32float32floatztorch.bfloat16bf16bfloat16bfloatz
torch.int8int8valuec                 C   s,   | D ]}||j v r|  S qtd| d)N'z' is not a valid DtypeEnum)r   
ValueError)clsr   dtype r   N/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/config.pyfrom_str   s
   
zDtypeEnum.from_strN)__name__
__module____qualname__torchr   r   r   r   r   r   r   classmethodstrr   r   r   r   r   r
      s    r
   c                   @      e Zd ZdZdZdS )MoETypeEnumresidualstandardN)r   r   r   r%   r&   r   r   r   r   r$          r$   c                   @   sV   e Zd ZU dZdZeed< 	 dZeed< 	 dZ	eed< 	 dZ
eed	< 	 dZeed
< dS )DeepSpeedTPConfigz' Configure tensor parallelism settings Tenabled   tp_size@   tp_grain_sizeNmputp_group)r   r   r   __doc__r)   bool__annotations__r+   intr-   r.   objectr/   r   r   r   r   r(   "   s   
 r(   c                   @   st   e Zd ZU dZdZeed< dZeed< 	 e	dgddZ
eed< 	 ejZeed	< 	 d
Zeed< e	d
ddZeed< d
S )DeepSpeedMoEConfigz Sets parameters for MoE Tr)   r*   ep_sizenum_expertsaliasmoe_expertstypeNep_mp_groupexpert_groupep_group)r   r   r   r0   r)   r1   r2   r6   r3   r   r:   listr$   r&   r;   r<   r4   r>   r   r   r   r   r5   7   s   
 r5   c                   @   r#   )QuantTypeEnum
asymmetric	symmetricN)r   r   r   asymsymr   r   r   r   r@   N   r'   r@   c                   @   s@   e Zd ZU dZeed< dZeed< ej	Z
eed< dZeed< dS )	BaseQuantConfigTr)      num_bitsq_typer*   q_groupsN)r   r   r   r)   r1   r2   rG   r3   r@   rD   rH   rI   r   r   r   r   rE   S   s
   
 rE   c                   @   s2   e Zd ZU dZeed< i Zeed< i Zeed< dS )WeightQuantConfigTr)   quantized_initializationpost_init_quantN)	r   r   r   r)   r1   r2   rK   r   rL   r   r   r   r   rJ   Z   s   
 rJ   c                   @      e Zd ZU dZeed< dS )ActivationQuantConfigTr)   Nr   r   r   r)   r1   r2   r   r   r   r   rN   `      
 rN   c                   @   rM   )QKVQuantConfigTr)   NrO   r   r   r   r   rQ   d   rP   rQ   c                   @   sD   e Zd ZU dZeed< e Zeed< e Z	eed< e
 Ze
ed< dS )QuantizationConfigTr)   
activationweightqkvN)r   r   r   r)   r1   r2   rN   rS   rJ   rT   rQ   rU   r   r   r   r   rR   h   s
   
 rR   c                   @   s>   e Zd ZU dZee ed< dZee ed< dZee ed< dS )InferenceCheckpointConfigNcheckpoint_dirsave_mp_checkpoint_pathbase_dir)	r   r   r   rW   r   r"   r2   rX   rY   r   r   r   r   rV   p   s   
 rV   c                   @   s  e Zd ZU dZedddZeed< 	 ej	Z
ej
ed< 	 ei ddZeed< 	 dZeed	< 	 dZeed
< 	 dZeed< 	 i Zeed< 	 edddZeed< 	 i Zeeef ed< 	 dZeed< 	 i Zeed< 	 dZeeeef  ed< 	 dZeed< 	 dZeed< 	 dZee ed< 	 ei ddZ e!ed< 	 dZ"eed< 	 dZ#e$ed< 	 edddd d!Z%eed"< edd#dZ&ee ed$< 	 dZ'ee( ed%< 	 edd&dZ)ee ed'< ed(d)dZ*e$ed*< 	 edd+dZ+e$ed,< 	 edd-dZ,eed-< eddd.d/d!Z-e$ed0< 	 eddd1d/d!Z.e/ed2< eddd3d/d!Z0e$ed4< edd5dd6d/d7Z1e/ed8< edd9dd:d/d7Z2e/ed;< edgdd<d/d!Z3e4ed=< ee5j6dd>d/d!Z7e5ed?< e8dd@dAdBdC Z9e8ddDdE Z:e8d
dFdG Z;dS )HDeepSpeedInferenceConfigz1 Sets parameters for DeepSpeed Inference Engine. Fkernel_injectr8   replace_with_kernel_injectr   tptensor_parallelenable_cuda_graph
use_tritontriton_autotunezeroTtmtriangular_maskingmoekeep_module_on_hostquantN
checkpoint rY   set_empty_paramsrX   ckpt_configcheckpoint_configreturn_tupler*   training_mp_sizeautozWThis parameter is no longer needed, please remove from your call to DeepSpeed-inference)
deprecateddeprecated_msg)json_schema_extrareplace_methodinjection_dictinjection_policyinjection_policy_tupleargsconfigi   
max_tokensmax_out_tokens
min_tokensmin_out_tokenstransposed_modeztensor_parallel.tp_size)rp   	new_parammp_sizeztensor_parallel.mpur.   zmoe.ep_sizer6   r=   zmoe.ep_group)r9   rr   r>   expert_mp_groupzmoe.ep_mp_groupr<   zmoe.moe_expertsr:   zmoe.typemoe_typebefore)modec                 C   s<   t |trt|jd S t |tjr|S tdt| )Nr   zInvalid type for dtype: )	
isinstancer"   r
   r   r   r    r   	TypeErrorr;   r   field_valuevaluesr   r   r   validate_dtype1  s
   
z'DeepSpeedInferenceConfig.validate_dtypec                 C   s   t |tr
t|dS |S )N)re   )r   r1   r5   r   r   r   r   moe_backward_compat9  s   

z,DeepSpeedInferenceConfig.moe_backward_compatc                 C   s   |r	t js	td|S )NzATriton needs to be installed to use deepspeed with triton kernels)	deepspeed
HAS_TRITONr   r   r   r   r   
has_triton?  s   
z#DeepSpeedInferenceConfig.has_triton)<r   r   r   r0   r   r\   r1   r2   r    r   r   r^   r(   r_   r`   ra   rb   r   rd   re   r   r5   rf   rg   rR   rh   r   r"   r   rY   rj   rX   rl   rV   rm   rn   r3   rs   ru   rv   tuplerx   rz   r|   r}   r   r.   r4   r6   r>   r<   r:   r?   r$   r&   r   r   r   r   r   r   r   r   r   rZ   v   s   
 


rZ   )r    r   pydanticr   r   deepspeed.runtime.config_utilsr   deepspeed.runtime.zero.configr   typingr   r   r   enumr	   r
   r"   r$   r(   r5   r@   rE   rJ   rN   rQ   rR   rV   rZ   r   r   r   r   <module>   s&   