o
    ߥiB                     @   s<   d dl Z d dlmZ d dlmZ e ZG dd deZdS )    N)PretrainedConfig)loggingc                $       sx   e Zd ZdZdddddddddd	d
ddddddddddddddddddddgdddddf$ fdd	Zedd Z  ZS )GPTMoEConfigzgpt-moei d  i   N   i   gelug?i      g-q=TFg{Gz?   d   r   g?standardc%           (         s  t  jdd|i|% || _|| _|d u rd| n|| _|| _|| _|| _|| _|| _	|	| _
|
| _|| _|| _|| _|| _|| _|| _|| _|rL|rLJ || _|| _|d u rc|| dks^J || | _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _ | | _!|!| _"|"| _#|#| _$|$| _%| j d t&j'( krt&j'( | _)n| j d | _)t*t&j+,dd }&t*t&j+,dd }'|&dk p|&dko|'dk | _-d S )Nlayer_norm_eps   r   .       ).super__init__
vocab_sizehidden_sizeffn_hidden_sizenum_hidden_layersnum_attention_heads
hidden_actintermediate_sizehidden_dropout_probattention_probs_dropout_probmax_position_embeddingstype_vocab_sizelayernorm_epsilonbias_gelu_fusionfp32_residual_connectionsequence_parallelfp16bf16apply_query_key_layer_scalingattention_softmax_in_fp32kv_channelsmasked_softmax_fusionattention_dropoutbias_dropout_fusion(apply_residual_connection_post_layernormhidden_dropoutinit_method_stdeod_idtokens_to_generatetop_ktop_pnum_experts	use_tuteltop_k_linear_strategyuse_expert_residual_networkload_ds_ckpts	model_dirtorchcudadevice_countmoe_expert_parallel_sizeint__version__splitno_persist_layer_norm)(selfr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   kwargsTORCH_MAJORTORCH_MINOR	__class__r   _/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/gpt_moe/configuration.pyr      sf   (
zGPTMoEConfig.__init__c                 C   s   | j rtjS | jrtjS tjS )N)r"   r7   halfr#   bfloat16float)r?   r   r   rE   params_dtypey   s
   zGPTMoEConfig.params_dtype)__name__
__module____qualname__
model_typer   propertyrI   __classcell__r   r   rC   rE   r      sR    ^r   )r7    transformers.configuration_utilsr   transformers.utilsr   
get_loggerloggerr   r   r   r   rE   <module>   s
   