o
    }oi                     @   sV   d dl Z d dlm  mZ d dlmZmZmZm	Z	 dd Z
dd Zdd Zd	d
 ZdS )    N)MixtralConfigMixtralConfig8x3BMixtralConfig8x7BMixtralConfig8x22Bc                  C   s  t  } | jdks
J | jtjksJ | jdksJ | jdu s J | jdu s'J | jdks.J | j	dks5J | j
dks<J | jdksCJ | jdksJJ | jdksQJ | jdksXJ | jd	ks_J | jd	ksfJ | jdu smJ | jdkstJ | jd
ks{J | jdksJ | jdu sJ | jdksJ | jdksJ | jdksJ | jdksJ | jdksJ | jdksJ | jdu sJ | jtjksJ d S )NRMSNormropeFT           8  g        g{Gz?   alltoallaux_lossg{Gz?gh㈵>g      ?g    .A) r   normalizationactivation_funcFsiluposition_embedding_typeadd_bias_lineargated_linear_unit
num_layershidden_sizenum_attention_headsnum_query_groupsffn_hidden_sizemax_position_embeddings
seq_lengthattention_dropouthidden_dropout#share_embeddings_and_output_weightsnum_moe_expertsmoe_aux_loss_coeffmoe_router_topkmoe_router_pre_softmaxmoe_token_dispatcher_typemoe_router_load_balancing_typeinit_method_stdlayernorm_epsilonrotary_percentrotary_basebf16params_dtypetorchbfloat16config r0   `/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/gpt/model/test_mixtral.pytest_mixtral_config   s8   r2   c                  C   s^   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J d S )Nr   i 
  i #  r	   )r   r   r   r   r   r   r   r.   r0   r0   r1   test_mixtral_config_8x3b9      r3   c                  C   sP   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J d S )Nr   r	   r   )r   r   r   r   r   r   r.   r0   r0   r1   test_mixtral_config_8x7bC   s   r5   c                  C   s^   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J d S )N8   i   0   i @  r	   )r   r   r   r   r   r   r   r.   r0   r0   r1   test_mixtral_config_8x22bL   r4   r8   )r,   torch.nn.functionalnn
functionalr   &nemo.collections.llm.gpt.model.mixtralr   r   r   r   r2   r3   r5   r8   r0   r0   r0   r1   <module>   s   
	