o
    i                     @  sL   d dl mZ d dlmZ ddlmZmZmZmZ G dd dZ	dddZ
dS )    )annotations)Sequence   )
MODEL_ARCHMODEL_TENSORMODEL_TENSORSTENSOR_NAMESc                   @  s  e Zd ZU ejdejdejdejdejdej	dej
dejdejdejd	i
Zd
ed< i ejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdej di ej!dej"dej#dej$d ej%d!ej&d"ej'd#ej(d$ej)d%ej*d&ej+d'ej
d(ej,d)ej-d*ej.d+ej/d,ej0d-i ej1d.ej2d/ej3d0ej4d1ej5d2ej6d3ej7d4ej8d5ej9d6ej:d7ej;d8ej<d9ej=d:ej>d;ej?d<ej@d=ejAd>i ejBd?ejCd@ejDdAejEdBejFdCejGdDejHdEejIdFejJdGejKdHejLdIejMdJejNdKejOdLejPdMejQdNejRdOi ejSdPejTdQejUdRejVdSejWdTejXdUejYdVejZdWej[dXej\dYej]dZej^d[ej_d\ej`d]ejad^ejbd_ejcd`i ejddaejedbejfdcejgddejhdeejidfejjdgejkdhejldiejmdjejndkejodlejpdmejqdnejrdoejsdpejtdqi ejudrejvdsejwdtejxduejydvejzdwej{dxej|dyej}dzej~d{ejd|ejd}ejd~ejdejdejdejdi ejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdi ejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdi ejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdiZd
ed< ejejdejdiiZded< ded< ddd˄ZddddӄZddddքZddddلZdddۄZdddބZdddZdS )TensorNameMap)zgpt_neox.embed_inztransformer.wteztransformer.word_embeddingsword_embeddingszmodel.embed_tokenstok_embeddingszembeddings.word_embeddingsz(language_model.embedding.word_embeddingswteztransformer.embd.wtezmodel.tok_embeddingszmodel.embeddingzbackbone.embeddingzbackbone.embeddingsztransformer.in_out_embedzembedding.word_embeddingsztransformer.token_embeddingssharedzrwkv.embeddingszmodel.embeddingszmodel.word_embeddingsz!language_model.model.embed_tokensencoder)z embeddings.token_type_embeddings)	word_embeddings_layernormzembeddings.LayerNormemb_lntransformer.normrwkv.blocks.0.pre_lnr   zmodel.pre_lnzmodel.layers.0.pre_normzbackbone.norm)ztransformer.wpezembeddings.position_embeddingswpe)		embed_outlm_headoutputword_embeddings_for_headzlm_head.linearoutput_layerheadzhead.outr   )zgpt_neox.final_layer_normztransformer.ln_f
model.normnormztransformer.norm_fln_fz&language_model.encoder.final_layernormzmodel.final_layernormz
lm_head.lnzmodel.norm_fzbackbone.norm_fztransformer.rms_normzencoder.final_layernormr   r   zrwkv.ln_outzmodel.ln_outzbackbone.final_layer_normr   )z
rope.freqszrotary_pos_emb.inv_freq )zbackbone.embedz#dict[MODEL_TENSOR, tuple[str, ...]]mappings_cfg)z%gpt_neox.layers.{bid}.input_layernormztransformer.h.{bid}.ln_1ztransformer.blocks.{bid}.norm_1z#transformer.h.{bid}.input_layernormzh.{bid}.input_layernormztransformer.h.{bid}.ln_mlp"model.layers.{bid}.input_layernormzlayers.{bid}.attention_normz3language_model.encoder.layers.{bid}.input_layernormmodel.layers.{bid}.ln1zh.{bid}.ln_1ztransformer.h.{bid}.lnzmodel.layers.layers.{bid}.normz!model.layers.{bid}.attention_normzmodel.layers.{bid}.normzbackbone.layers.{bid}.normz(transformer.decoder_layer.{bid}.rms_normz.transformer.blocks.{bid}.norm_attn_norm.norm_1z$encoder.layers.{bid}.input_layernormz"transformer.layers.{bid}.attn_normzrwkv.blocks.{bid}.ln1r    r   z(transformer_encoder.{bid}.attention_norm)ztransformer.h.{bid}.ln_attnz encoder.layer.{bid}.layer_norm_1zrwkv.blocks.{bid}.ln2model.layers.{bid}.ln2)z/gpt_neox.layers.{bid}.attention.query_key_valueztransformer.h.{bid}.attn.c_attnz"transformer.blocks.{bid}.attn.Wqkvz1transformer.blocks.{bid}.norm_attn_norm.attn.Wqkvz2transformer.h.{bid}.self_attention.query_key_valuez&h.{bid}.self_attention.query_key_valuezBlanguage_model.encoder.layers.{bid}.self_attention.query_key_valuez,model.layers.{bid}.self_attn.query_key_valuezh.{bid}.attn.c_attnztransformer.h.{bid}.mixer.Wqkvzencoder.layers.{bid}.attn.Wqkvzencoder.layers.{bid}.mixer.Wqkvz%model.layers.{bid}.self_attn.qkv_projz3encoder.layers.{bid}.self_attention.query_key_valuez&transformer.layers.{bid}.attn.qkv_projztransformer_encoder.{bid}.qkv)#model.layers.{bid}.self_attn.q_projz+model.layers.{bid}.self_attn.q_proj_no_permzlayers.{bid}.attention.wqz(encoder.layer.{bid}.attention.self.queryz'transformer.layer.{bid}.attention.q_linztransformer.h.{bid}.attn.q_projz*model.layers.layers.{bid}.self_attn.q_projzmodel.layers.{bid}.attention.wqz:transformer.decoder_layer.{bid}.multi_head_attention.queryz)transformer.h.{bid}.attn.attention.q_projr"   )#model.layers.{bid}.self_attn.k_projz+model.layers.{bid}.self_attn.k_proj_no_permzlayers.{bid}.attention.wkz&encoder.layer.{bid}.attention.self.keyz'transformer.layer.{bid}.attention.k_linztransformer.h.{bid}.attn.k_projztransformer.h.{bid}.attn.kz*model.layers.layers.{bid}.self_attn.k_projzmodel.layers.{bid}.attention.wkz8transformer.decoder_layer.{bid}.multi_head_attention.keyz)transformer.h.{bid}.attn.attention.k_projr#   )#model.layers.{bid}.self_attn.v_projzlayers.{bid}.attention.wvz(encoder.layer.{bid}.attention.self.valuez'transformer.layer.{bid}.attention.v_linztransformer.h.{bid}.attn.v_projztransformer.h.{bid}.attn.vz*model.layers.layers.{bid}.self_attn.v_projzmodel.layers.{bid}.attention.wvz:transformer.decoder_layer.{bid}.multi_head_attention.valuez)transformer.h.{bid}.attn.attention.v_projr$   )z%gpt_neox.layers.{bid}.attention.denseztransformer.h.{bid}.attn.c_projz&transformer.blocks.{bid}.attn.out_projz(transformer.h.{bid}.self_attention.densezh.{bid}.self_attention.dense#model.layers.{bid}.self_attn.o_projz(model.layers.{bid}.self_attn.linear_attnzlayers.{bid}.attention.woz*encoder.layer.{bid}.attention.output.densez)transformer.layer.{bid}.attention.out_linz!transformer.h.{bid}.attn.out_projz8language_model.encoder.layers.{bid}.self_attention.densez"model.layers.{bid}.self_attn.densezh.{bid}.attn.c_projz"transformer.h.{bid}.mixer.out_projz*model.layers.layers.{bid}.self_attn.o_projzmodel.layers.{bid}.attention.woz"encoder.layers.{bid}.attn.out_projz#encoder.layers.{bid}.mixer.out_projz;transformer.decoder_layer.{bid}.multi_head_attention.linearz5transformer.blocks.{bid}.norm_attn_norm.attn.out_projz)encoder.layers.{bid}.self_attention.densez&transformer.layers.{bid}.attn.out_projz+transformer.h.{bid}.attn.attention.out_projr%   ztransformer_encoder.{bid}.wo)z.encoder.layer.{bid}.attention.output.LayerNormz%transformer.layer.{bid}.sa_layer_normzencoder.layers.{bid}.norm1z*transformer.decoder_layer.{bid}.rms_norm_1z.transformer.blocks.{bid}.norm_attn_norm.norm_2)+model.layers.{bid}.post_attention_layernormz+model.layers.{bid}.post_self_attn_layernorm)z0model.layers.{bid}.self_attn.rotary_emb.inv_freqz1layers.{bid}.attention.inner_attention.rope.freqsz7model.layers.layers.{bid}.self_attn.rotary_emb.inv_freqz,transformer.h.{bid}.attn.rotary_emb.inv_freq)z.gpt_neox.layers.{bid}.post_attention_layernormztransformer.h.{bid}.ln_2z h.{bid}.post_attention_layernormztransformer.blocks.{bid}.norm_2r&   zlayers.{bid}.ffn_normz<language_model.encoder.layers.{bid}.post_attention_layernormr!   zh.{bid}.ln_2zmodel.layers.{bid}.ffn_normz*transformer.decoder_layer.{bid}.rms_norm_2z-encoder.layers.{bid}.post_attention_layernormz!transformer.layers.{bid}.ffn_normr&   z"transformer_encoder.{bid}.ffn_norm)z,model.layers.{bid}.pre_feedforward_layernorm)z-model.layers.{bid}.post_feedforward_layernormz%model.layers.{bid}.post_mlp_layernorm)zlayers.{bid}.feed_forward.gatez(model.layers.{bid}.block_sparse_moe.gatezmodel.layers.{bid}.mlp.gatez&transformer.decoder_layer.{bid}.routerz)transformer.blocks.{bid}.ffn.router.layerz0model.layers.{bid}.block_sparse_moe.router.layerz&model.layers.{bid}.feed_forward.routerz%encoder.layers.{bid}.mlp.router.layer)z)model.layers.{bid}.mlp.shared_expert_gate)z.model.layers.{bid}.mlp.gate.e_score_correction)z'gpt_neox.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fcz$transformer.blocks.{bid}.ffn.up_projz%transformer.h.{bid}.mlp.dense_h_to_4hzh.{bid}.mlp.dense_h_to_4hzmodel.layers.{bid}.mlp.up_projzlayers.{bid}.feed_forward.w3z&encoder.layer.{bid}.intermediate.densez transformer.layer.{bid}.ffn.lin1ztransformer.h.{bid}.mlp.fc_inz transformer.h.{bid}.mlp.linear_3z5language_model.encoder.layers.{bid}.mlp.dense_h_to_4hz$model.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.w1zh.{bid}.mlp.c_fcztransformer.h.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.fc1z#model.layers.{bid}.mlp.gate_up_projz%model.layers.layers.{bid}.mlp.up_projz"model.layers.{bid}.feed_forward.w3zencoder.layers.{bid}.mlp.fc11zencoder.layers.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.c_fcz&encoder.layer.{bid}.mlp.gated_layers_vz$encoder.layer.{bid}.mlp.gated_layersz&encoder.layer.{bid}.mlp.up_gated_layerz"model.layers.{bid}.residual_mlp.w3z&encoder.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fc_1z'model.layers.{bid}.feed_forward.up_projz!transformer_encoder.{bid}.ffn.w12)z$layers.{bid}.feed_forward.experts.w3z,transformer.decoder_layer.{bid}.moe.linear_vz+transformer.blocks.{bid}.ffn.experts.mlp.v1z&model.layers.{bid}.mlp.experts.up_projz.model.layers.{bid}.block_sparse_moe.experts.w3z/model.layers.{bid}.feed_forward.experts.up_projz'encoder.layers.{bid}.mlp.experts.mlp.w1)z,model.layers.{bid}.mlp.shared_expert.up_projz-model.layers.{bid}.mlp.shared_experts.up_projz5model.layers.{bid}.feed_forward.shared_expert.up_proj)z transformer.blocks.{bid}.ffn.act)z model.layers.{bid}.mlp.gate_projzlayers.{bid}.feed_forward.w1ztransformer.h.{bid}.mlp.w2ztransformer.h.{bid}.mlp.c_fc2z'model.layers.layers.{bid}.mlp.gate_projz"model.layers.{bid}.feed_forward.w1zencoder.layers.{bid}.mlp.fc12z&encoder.layer.{bid}.mlp.gated_layers_wz transformer.h.{bid}.mlp.linear_1z"model.layers.{bid}.residual_mlp.w1ztransformer.h.{bid}.mlp.c_fc_0z)model.layers.{bid}.feed_forward.gate_proj)z$layers.{bid}.feed_forward.experts.w1z*transformer.decoder_layer.{bid}.moe.linearz+transformer.blocks.{bid}.ffn.experts.mlp.w1z(model.layers.{bid}.mlp.experts.gate_projz.model.layers.{bid}.block_sparse_moe.experts.w1z1model.layers.{bid}.feed_forward.experts.gate_proj)z.model.layers.{bid}.mlp.shared_expert.gate_projz/model.layers.{bid}.mlp.shared_experts.gate_projz7model.layers.{bid}.feed_forward.shared_expert.gate_proj)z'gpt_neox.layers.{bid}.mlp.dense_4h_to_hztransformer.h.{bid}.mlp.c_projz&transformer.blocks.{bid}.ffn.down_projz%transformer.h.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.dense_4h_to_hz model.layers.{bid}.mlp.down_projzlayers.{bid}.feed_forward.w2z encoder.layer.{bid}.output.densez transformer.layer.{bid}.ffn.lin2ztransformer.h.{bid}.mlp.fc_outz5language_model.encoder.layers.{bid}.mlp.dense_4h_to_hz$model.layers.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.c_projztransformer.h.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.fc2z'model.layers.layers.{bid}.mlp.down_projz"model.layers.{bid}.feed_forward.w2zencoder.layers.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.c_projzencoder.layer.{bid}.mlp.woz#transformer.layers.{bid}.ffn.proj_2z"model.layers.{bid}.residual_mlp.w2z"encoder.layer.{bid}.mlp.down_layerz&encoder.layers.{bid}.mlp.dense_4h_to_hzmodel.layers.h.{bid}.mlp.c_projz)model.layers.{bid}.feed_forward.down_projz transformer_encoder.{bid}.ffn.w3)z$layers.{bid}.feed_forward.experts.w2z,transformer.decoder_layer.{bid}.moe.linear_1z+transformer.blocks.{bid}.ffn.experts.mlp.w2z(model.layers.{bid}.mlp.experts.down_projz1model.layers.{bid}.block_sparse_moe.output_linearz.model.layers.{bid}.block_sparse_moe.experts.w2z1model.layers.{bid}.feed_forward.experts.down_projz'encoder.layers.{bid}.mlp.experts.mlp.w2)z.model.layers.{bid}.mlp.shared_expert.down_projz/model.layers.{bid}.mlp.shared_experts.down_projz7model.layers.{bid}.feed_forward.shared_expert.down_projz+model.layers.{bid}.shared_mlp.output_linear)z>language_model.encoder.layers.{bid}.self_attention.q_layernormz(model.layers.{bid}.self_attn.q_layernormz#model.layers.{bid}.self_attn.q_normz"transformer.blocks.{bid}.attn.q_lnz/encoder.layer.{bid}.attention.self.layer_norm_qz$transformer.layers.{bid}.attn.q_norm)z>language_model.encoder.layers.{bid}.self_attention.k_layernormz(model.layers.{bid}.self_attn.k_layernormz#model.layers.{bid}.self_attn.k_normz"transformer.blocks.{bid}.attn.k_lnz/encoder.layer.{bid}.attention.self.layer_norm_kz$transformer.layers.{bid}.attn.k_norm)zFlanguage_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq)z$encoder.layer.{bid}.output.LayerNormz)transformer.layer.{bid}.output_layer_normzencoder.layers.{bid}.norm2z*transformer.decoder_layer.{bid}.rms_norm_3z!encoder.layer.{bid}.mlp.layernormz encoder.layer.{bid}.layer_norm_2)zmodel.layers.{bid}.in_projz#backbone.layers.{bid}.mixer.in_proj)zmodel.layers.{bid}.conv1dz"backbone.layers.{bid}.mixer.conv1d)zmodel.layers.{bid}.x_projz"backbone.layers.{bid}.mixer.x_proj)zmodel.layers.{bid}.dt_projz#backbone.layers.{bid}.mixer.dt_proj)zmodel.layers.{bid}.A_logz!backbone.layers.{bid}.mixer.A_log)zmodel.layers.{bid}.Dzbackbone.layers.{bid}.mixer.D)zmodel.layers.{bid}.out_projz$backbone.layers.{bid}.mixer.out_proj)zmodel.layers.{bid}.attention.w0)z'rwkv.blocks.{bid}.attention.time_maa_w1z(model.layers.{bid}.self_attn.time_maa_w1zmodel.layers.{bid}.attention.w1)z'rwkv.blocks.{bid}.attention.time_maa_w2z(model.layers.{bid}.self_attn.time_maa_w2zmodel.layers.{bid}.attention.w2)zmodel.layers.{bid}.attention.a0)zmodel.layers.{bid}.attention.a1)zmodel.layers.{bid}.attention.a2)zmodel.layers.{bid}.attention.v0)zmodel.layers.{bid}.attention.v1)zmodel.layers.{bid}.attention.v2)zmodel.layers.{bid}.attention.g1)zmodel.layers.{bid}.attention.g2)z model.layers.{bid}.attention.k_k)z model.layers.{bid}.attention.k_a)z model.layers.{bid}.attention.r_k)z&rwkv.blocks.{bid}.attention.time_maa_xz'model.layers.{bid}.self_attn.time_maa_x)z&rwkv.blocks.{bid}.attention.time_maa_kz'model.layers.{bid}.self_attn.time_maa_k)z&rwkv.blocks.{bid}.attention.time_maa_vz'model.layers.{bid}.self_attn.time_maa_v)z&rwkv.blocks.{bid}.attention.time_maa_rz'model.layers.{bid}.self_attn.time_maa_r)z&rwkv.blocks.{bid}.attention.time_maa_gz'model.layers.{bid}.self_attn.time_maa_g)z&rwkv.blocks.{bid}.attention.time_maa_wz'model.layers.{bid}.self_attn.time_maa_w)z&rwkv.blocks.{bid}.attention.time_faaaa)z&rwkv.blocks.{bid}.attention.time_decayz'model.layers.{bid}.self_attn.time_decay)z)rwkv.blocks.{bid}.attention.time_decay_w1z*model.layers.{bid}.self_attn.time_decay_w1)z)rwkv.blocks.{bid}.attention.time_decay_w2z*model.layers.{bid}.self_attn.time_decay_w2)zrwkv.blocks.{bid}.attention.keyr#   z model.layers.{bid}.attention.keyz#model.layers.{bid}.attention.k_proj)z!rwkv.blocks.{bid}.attention.valuer$   z"model.layers.{bid}.attention.valuez#model.layers.{bid}.attention.v_proj)z&rwkv.blocks.{bid}.attention.receptancer"   z'model.layers.{bid}.attention.receptancez#model.layers.{bid}.attention.r_proj)z rwkv.blocks.{bid}.attention.gatez!model.layers.{bid}.self_attn.gate)z rwkv.blocks.{bid}.attention.ln_xz!model.layers.{bid}.attention.ln_x)z"rwkv.blocks.{bid}.attention.outputr%   z#model.layers.{bid}.attention.outputz#model.layers.{bid}.attention.o_proj)z)rwkv.blocks.{bid}.feed_forward.time_maa_kz#model.layers.{bid}.feed_forward.x_k)z)rwkv.blocks.{bid}.feed_forward.time_maa_r)z"rwkv.blocks.{bid}.feed_forward.keyz#model.layers.{bid}.feed_forward.key)z)rwkv.blocks.{bid}.feed_forward.receptance)z$rwkv.blocks.{bid}.feed_forward.valuez%model.layers.{bid}.feed_forward.value)z%model.layers.{bid}.self_attn.q_a_proj)z%model.layers.{bid}.self_attn.q_b_proj)z/model.layers.{bid}.self_attn.kv_a_proj_with_mqa)z&model.layers.{bid}.self_attn.kv_b_proj)z%model.layers.{bid}.self_attn.k_b_proj)z%model.layers.{bid}.self_attn.v_b_proj)z*model.layers.{bid}.self_attn.q_a_layernorm)z+model.layers.{bid}.self_attn.kv_a_layernorm)z*model.layers.{bid}.self_attn.inner_attn_ln)z$model.layers.{bid}.mlp.ffn_layernorm)z&decoder.block.{bid}.layer.0.layer_norm)z+decoder.block.{bid}.layer.0.SelfAttention.q)z+decoder.block.{bid}.layer.0.SelfAttention.k)z+decoder.block.{bid}.layer.0.SelfAttention.v)z+decoder.block.{bid}.layer.0.SelfAttention.o)zAdecoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&decoder.block.{bid}.layer.1.layer_norm)z-decoder.block.{bid}.layer.1.EncDecAttention.q)z-decoder.block.{bid}.layer.1.EncDecAttention.k)z-decoder.block.{bid}.layer.1.EncDecAttention.v)z-decoder.block.{bid}.layer.1.EncDecAttention.o)zCdecoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias)z&decoder.block.{bid}.layer.2.layer_norm)z/decoder.block.{bid}.layer.2.DenseReluDense.wi_0)z-decoder.block.{bid}.layer.2.DenseReluDense.wiz/decoder.block.{bid}.layer.2.DenseReluDense.wi_1)z-decoder.block.{bid}.layer.2.DenseReluDense.wo)zdecoder.final_layer_norm)z&encoder.block.{bid}.layer.0.layer_norm)z+encoder.block.{bid}.layer.0.SelfAttention.q)z+encoder.block.{bid}.layer.0.SelfAttention.k)z+encoder.block.{bid}.layer.0.SelfAttention.v)z+encoder.block.{bid}.layer.0.SelfAttention.o)zAencoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&encoder.block.{bid}.layer.1.layer_norm)z/encoder.block.{bid}.layer.1.DenseReluDense.wi_0)z-encoder.block.{bid}.layer.1.DenseReluDense.wiz/encoder.block.{bid}.layer.1.DenseReluDense.wi_1)z-encoder.block.{bid}.layer.1.DenseReluDense.wo)zencoder.final_layer_norm
layer_norm)
classifierzclassifier.densepre_classifierdense)zclassifier.out_proj)zbackbone.convnext.{bid}.dwconv)zbackbone.convnext.{bid}.norm)zbackbone.convnext.{bid}.pwconv1)zbackbone.convnext.{bid}.pwconv2)zbackbone.convnext.{bid}.gamma)zbackbone.posnet.{bid}.conv1)zbackbone.posnet.{bid}.conv2)zbackbone.posnet.{bid}.norm)zbackbone.posnet.{bid}.norm1)zbackbone.posnet.{bid}.norm2)zbackbone.posnet.{bid}.q)zbackbone.posnet.{bid}.k)zbackbone.posnet.{bid}.v)zbackbone.posnet.{bid}.proj_out)z"multi_modal_projector.linear_{bid}zvisual.merger.mlp.{bid})z(model.connector.modality_projection.proj)z model.mm_projector.mlp.mlp.{bid}z'vision_model.vision_adapter.mlp.fc{bid}z
mlp1.{bid})z model.mm_projector.peg.peg.{bid})z4vision_tower.vision_model.embeddings.class_embeddingzvision_model.class_embedding)z4vision_tower.vision_model.embeddings.patch_embeddingzvpm.embeddings.patch_embeddingz-model.vision_model.embeddings.patch_embeddingzvision_tower.patch_convz#vision_model.patch_embedding.linearzvisual.patch_embed.proj)z7vision_tower.vision_model.embeddings.position_embeddingz!vpm.embeddings.position_embeddingz0model.vision_model.embeddings.position_embeddingz%vision_model.positional_embedding_vlm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_projz)vpm.encoder.layers.{bid}.self_attn.q_projz8model.vision_model.encoder.layers.{bid}.self_attn.q_projz0vision_model.model.layers.{bid}.self_attn.q_projz6vision_tower.transformer.layers.{bid}.attention.q_projzvisual.blocks.{bid}.attn.q)z:vision_tower.vision_model.encoder.layers.{bid}.attn.q_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_projz)vpm.encoder.layers.{bid}.self_attn.k_projz8model.vision_model.encoder.layers.{bid}.self_attn.k_projz0vision_model.model.layers.{bid}.self_attn.k_projz6vision_tower.transformer.layers.{bid}.attention.k_projzvisual.blocks.{bid}.attn.k)z:vision_tower.vision_model.encoder.layers.{bid}.attn.k_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_projz)vpm.encoder.layers.{bid}.self_attn.v_projz8model.vision_model.encoder.layers.{bid}.self_attn.v_projz0vision_model.model.layers.{bid}.self_attn.v_projz6vision_tower.transformer.layers.{bid}.attention.v_projzvisual.blocks.{bid}.attn.v)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.vision_model.encoder.layers.{bid}.norm1z$vpm.encoder.layers.{bid}.layer_norm1z3model.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.transformer.layers.{bid}.attention_normz/vision_model.model.layers.{bid}.input_layernormzvisual.blocks.{bid}.norm1)zAvision_tower.vision_model.encoder.layers.{bid}.self_attn.out_projz8vision_tower.vision_model.encoder.layers.{bid}.attn.projz+vpm.encoder.layers.{bid}.self_attn.out_projz:model.vision_model.encoder.layers.{bid}.self_attn.out_projz0vision_model.model.layers.{bid}.self_attn.o_projz6vision_tower.transformer.layers.{bid}.attention.o_projzvisual.blocks.{bid}.attn.proj)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm2z4vision_tower.vision_model.encoder.layers.{bid}.norm2z$vpm.encoder.layers.{bid}.layer_norm2z3model.vision_model.encoder.layers.{bid}.layer_norm2z8vision_model.model.layers.{bid}.post_attention_layernormz.vision_tower.transformer.layers.{bid}.ffn_normzvisual.blocks.{bid}.norm2)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1z vpm.encoder.layers.{bid}.mlp.fc1z/model.vision_model.encoder.layers.{bid}.mlp.fc1z:vision_tower.transformer.layers.{bid}.feed_forward.up_projz'vision_model.model.layers.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.up_proj)z<vision_tower.transformer.layers.{bid}.feed_forward.gate_projz!visual.blocks.{bid}.mlp.gate_proj)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2z vpm.encoder.layers.{bid}.mlp.fc2z/model.vision_model.encoder.layers.{bid}.mlp.fc2z<vision_tower.transformer.layers.{bid}.feed_forward.down_projz'vision_model.model.layers.{bid}.mlp.fc2zvisual.blocks.{bid}.mlp.fc2z!visual.blocks.{bid}.mlp.down_proj)z2vision_tower.vision_model.encoder.layers.{bid}.ls1)z2vision_tower.vision_model.encoder.layers.{bid}.ls2)z&vision_tower.vision_model.pre_layrnormzvision_tower.ln_prezvision_model.layernorm_pre)z(vision_tower.vision_model.post_layernormz!model.vision_model.post_layernormzvision_model.layernorm_postzvisual.merger.ln_q)z)multi_modal_projector.mm_input_projection)zmulti_modal_projector.norm)z&multi_modal_projector.mm_soft_emb_norm)zresampler.pos_embed_k)zresampler.attn.in_proj_q)zresampler.attn.in_proj_k)zresampler.attn.in_proj_v)zresampler.attn.out_proj)zresampler.kv_proj)zresampler.ln_post)zresampler.ln_kv)zresampler.ln_q)zresampler.proj)zresampler.query)zv.token_embd.img_break)z0multi_modal_projector.patch_merger.merging_layer)zaudio_tower.embed_positions)zaudio_tower.conv{bid})zaudio_tower.layer_normzaudio_tower.ln_post)z)audio_tower.layers.{bid}.self_attn.q_proj)z)audio_tower.layers.{bid}.self_attn.k_proj)z)audio_tower.layers.{bid}.self_attn.v_proj)z-audio_tower.layers.{bid}.self_attn_layer_norm)z+audio_tower.layers.{bid}.self_attn.out_proj)z)audio_tower.layers.{bid}.final_layer_norm)zaudio_tower.layers.{bid}.fc1)zaudio_tower.layers.{bid}.fc2)z(audio.multi_modal_projector.linear_{bid})z"audio.multi_modal_projector.linearzaudio_tower.proj)z"audio.multi_modal_projector.ln_pre)z"audio.multi_modal_projector.ln_midblock_mappings_cfg)z%model.layers.{bid}.residual_layernorm)r&   z5dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]]arch_block_mappings_cfgz#dict[str, tuple[MODEL_TENSOR, str]]mappingarchr   n_blocksintc                 C  s   i | _ | j D ]"\}}|t| vrqt| }||f| j |< |D ]	}||f| j |< q q|| jv r9| j| j|  t|D ]4}| j D ],\}}|t| vrOqDt| j	|d}||f| j |< |D ]}|j	|d}||f| j |< q`qDq=d S )N)bid)
r-   r   itemsr   r   r,   r+   updaterangeformat)selfr.   r/   tensorkeystensor_namekeyr1   r   r   P/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/gguf/tensor_mapping.py__init__  s.   
zTensorNameMap.__init__r:   strtry_suffixesSequence[str]returntuple[MODEL_TENSOR, str] | Nonec                 C  sj   | j |}|d ur|S |D ]$}||r2| j |d t|  }|d ur2|d |d | f  S qd S )Nr   r   )r-   getendswithlen)r6   r:   r>   resultsuffixr   r   r;   get_type_and_name  s   
zTensorNameMap.get_type_and_name
str | Nonec                 C  "   | j ||d}|d u rd S |d S )Nr>   r   rG   r6   r:   r>   rE   r   r   r;   get_name     zTensorNameMap.get_nameMODEL_TENSOR | Nonec                 C  rI   )NrJ   r   rK   rL   r   r   r;   get_type  rN   zTensorNameMap.get_typec                 C  s(   z| j | d W S  ty   t|w )Nr   )r-   KeyErrorr6   r:   r   r   r;   __getitem__  s
   zTensorNameMap.__getitem__boolc                 C  s
   || j v S N)r-   rR   r   r   r;   __contains__     
zTensorNameMap.__contains__c                 C  s
   t | jS rU   )reprr-   )r6   r   r   r;   __repr__  rW   zTensorNameMap.__repr__N)r.   r   r/   r0   )r   )r:   r=   r>   r?   r@   rA   )r:   r=   r>   r?   r@   rH   )r:   r=   r>   r?   r@   rO   )r:   r=   r@   r=   )r:   r=   r@   rT   )r@   r=   )__name__
__module____qualname__r   
TOKEN_EMBDTOKEN_TYPESTOKEN_EMBD_NORMPOS_EMBDOUTPUTOUTPUT_NORM
ROPE_FREQSROPE_FACTORS_LONGROPE_FACTORS_SHORTCONV1Dr   __annotations__	ATTN_NORMATTN_NORM_2ATTN_QKVATTN_QATTN_KATTN_VATTN_OUTATTN_OUT_NORMATTN_POST_NORMATTN_ROT_EMBDFFN_NORMFFN_PRE_NORMFFN_POST_NORMFFN_GATE_INPFFN_GATE_INP_SHEXPFFN_EXP_PROBS_BFFN_UP
FFN_UP_EXPFFN_UP_SHEXPFFN_ACTFFN_GATEFFN_GATE_EXPFFN_GATE_SHEXPFFN_DOWNFFN_DOWN_EXPFFN_DOWN_SHEXPATTN_Q_NORMATTN_K_NORMLAYER_OUT_NORMSSM_IN
SSM_CONV1DSSM_XSSM_DTSSM_ASSM_DSSM_OUTTIME_MIX_W0TIME_MIX_W1TIME_MIX_W2TIME_MIX_A0TIME_MIX_A1TIME_MIX_A2TIME_MIX_V0TIME_MIX_V1TIME_MIX_V2TIME_MIX_G1TIME_MIX_G2TIME_MIX_K_KTIME_MIX_K_ATIME_MIX_R_KTIME_MIX_LERP_XTIME_MIX_LERP_KTIME_MIX_LERP_VTIME_MIX_LERP_RTIME_MIX_LERP_GTIME_MIX_LERP_WTIME_MIX_FIRSTTIME_MIX_DECAYTIME_MIX_DECAY_W1TIME_MIX_DECAY_W2TIME_MIX_KEYTIME_MIX_VALUETIME_MIX_RECEPTANCETIME_MIX_GATETIME_MIX_LNTIME_MIX_OUTPUTCHANNEL_MIX_LERP_KCHANNEL_MIX_LERP_RCHANNEL_MIX_KEYCHANNEL_MIX_RECEPTANCECHANNEL_MIX_VALUEATTN_Q_AATTN_Q_BATTN_KV_A_MQA	ATTN_KV_BATTN_K_BATTN_V_BATTN_Q_A_NORMATTN_KV_A_NORMATTN_SUB_NORMFFN_SUB_NORMDEC_ATTN_NORM
DEC_ATTN_Q
DEC_ATTN_K
DEC_ATTN_VDEC_ATTN_OUTDEC_ATTN_REL_BDEC_CROSS_ATTN_NORMDEC_CROSS_ATTN_QDEC_CROSS_ATTN_KDEC_CROSS_ATTN_VDEC_CROSS_ATTN_OUTDEC_CROSS_ATTN_REL_BDEC_FFN_NORMDEC_FFN_GATE
DEC_FFN_UPDEC_FFN_DOWNDEC_OUTPUT_NORMENC_ATTN_NORM
ENC_ATTN_Q
ENC_ATTN_K
ENC_ATTN_VENC_ATTN_OUTENC_ATTN_REL_BENC_FFN_NORMENC_FFN_GATE
ENC_FFN_UPENC_FFN_DOWNENC_OUTPUT_NORMCLSCLS_OUTCONVNEXT_DWCONVNEXT_NORMCONVNEXT_PW1CONVNEXT_PW2CONVNEXT_GAMMAPOSNET_CONV1POSNET_CONV2POSNET_NORMPOSNET_NORM1POSNET_NORM2POSNET_ATTN_NORMPOSNET_ATTN_QPOSNET_ATTN_KPOSNET_ATTN_VPOSNET_ATTN_OUTV_MMPROJV_MMPROJ_FCV_MMPROJ_MLPV_MMPROJ_PEGV_ENC_EMBD_CLSV_ENC_EMBD_PATCHV_ENC_EMBD_POSV_ENC_ATTN_QV_ENC_ATTN_Q_NORMV_ENC_ATTN_KV_ENC_ATTN_K_NORMV_ENC_ATTN_VV_ENC_INPUT_NORMV_ENC_ATTN_OV_ENC_POST_ATTN_NORMV_ENC_FFN_UPV_ENC_FFN_GATEV_ENC_FFN_DOWNV_LAYER_SCALE_1V_LAYER_SCALE_2
V_PRE_NORMV_POST_NORMV_MM_INP_PROJV_MM_INP_NORMV_MM_SOFT_EMB_NORMV_RESMPL_POS_EMBD_KV_RESMPL_ATTN_QV_RESMPL_ATTN_KV_RESMPL_ATTN_VV_RESMPL_ATTN_OUTV_RESMPL_KVV_RESMPL_POST_NORMV_RESMPL_KV_NORMV_RESMPL_Q_NORMV_RESMPL_PROJV_RESMPL_QUERYV_TOK_EMBD_IMG_BREAKV_MM_PATCH_MERGERA_ENC_EMBD_POSA_ENC_CONV1D
A_PRE_NORMA_POST_NORMA_ENC_ATTN_QA_ENC_ATTN_KA_ENC_ATTN_VA_ENC_INPUT_NORMA_ENC_OUTPUTA_ENC_OUTPUT_NORMA_ENC_FFN_UPA_ENC_FFN_GATEA_ENC_FFN_DOWNA_MMPROJA_MMPROJ_FCA_MM_NORM_PREA_MM_NORM_MIDr+   r   ARCTICFFN_NORM_EXPr,   r<   rG   rM   rP   rS   rV   rY   r   r   r   r;   r	      s  
 g&:IYh         0  5  :  E  I  N  p  z                   &    D    O    V    _    h    l    u    z               
                              #      )      -      1      5      9      =      A      E      I      M      Q      U      Z      _      d      i      n      s      w      |                                               !        &        -        2        6        ;        ?        D        H        L        P        T        X        \        `        d        h        l        p        t        x        |                             	                                                            !          %          *          .          2          6          :          >          B          F          J          N          R          W          ]          b          i          n          r          v          z          ~                                                                                                           #            '            .            3            7            =            A            F            O            V            _            c            l            p            y                                                       "              '              1              5              9              ?              F              J              N              R              V              Z              ^              b              f              j              n              r              v              z              ~                               	                                                                                                Q


r	   r.   r   r/   r0   r@   c                 C  s
   t | |S rU   )r	   )r.   r/   r   r   r;   get_tensor_name_map  rW   r  N)r.   r   r/   r0   r@   r	   )
__future__r   typingr   	constantsr   r   r   r   r	   r  r   r   r   r;   <module>   s              