gguf-py : avoid adding duplicate tensor mappings for Jamba

Some of the tensor names are common with Llama4
This commit is contained in:
Francis Couture-Harpin
2025-07-09 11:58:35 -04:00
parent 452207f318
commit 4d6a179c68

View File

@@ -304,9 +304,8 @@ class TensorNameMap:
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe "model.layers.{bid}.mlp.gate", # qwen2moe olmoe
"transformer.decoder_layer.{bid}.router", # Grok "transformer.decoder_layer.{bid}.router", # Grok
"transformer.blocks.{bid}.ffn.router.layer", # dbrx "transformer.blocks.{bid}.ffn.router.layer", # dbrx
"model.layers.{bid}.feed_forward.router", # jamba
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
"model.layers.{bid}.feed_forward.router", # llama4 "model.layers.{bid}.feed_forward.router", # llama4 jamba
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe "encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
"model.layers.{bid}.mlp.gate.wg", # hunyuan "model.layers.{bid}.mlp.gate.wg", # hunyuan
), ),
@@ -348,10 +347,9 @@ class TensorNameMap:
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU) "encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU)
"encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU) "encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU)
"model.layers.{bid}.residual_mlp.w3", # arctic "model.layers.{bid}.residual_mlp.w3", # arctic
"model.layers.{bid}.feed_forward.up_proj", # jamba
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
"transformer.h.{bid}.mlp.c_fc_1", # exaone "transformer.h.{bid}.mlp.c_fc_1", # exaone
"model.layers.{bid}.feed_forward.up_proj", # llama4 "model.layers.{bid}.feed_forward.up_proj", # llama4 jamba
"transformer_encoder.{bid}.ffn.w12", # neobert "transformer_encoder.{bid}.ffn.w12", # neobert
), ),
@@ -390,9 +388,8 @@ class TensorNameMap:
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used) "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
"transformer.h.{bid}.mlp.linear_1", # refact "transformer.h.{bid}.mlp.linear_1", # refact
"model.layers.{bid}.residual_mlp.w1", # arctic "model.layers.{bid}.residual_mlp.w1", # arctic
"model.layers.{bid}.feed_forward.gate_proj", # jamba
"transformer.h.{bid}.mlp.c_fc_0", # exaone "transformer.h.{bid}.mlp.c_fc_0", # exaone
"model.layers.{bid}.feed_forward.gate_proj", # llama4 "model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba
), ),
MODEL_TENSOR.FFN_GATE_EXP: ( MODEL_TENSOR.FFN_GATE_EXP: (
@@ -436,10 +433,9 @@ class TensorNameMap:
"transformer.layers.{bid}.ffn.proj_2", # openelm "transformer.layers.{bid}.ffn.proj_2", # openelm
"model.layers.{bid}.residual_mlp.w2", # arctic "model.layers.{bid}.residual_mlp.w2", # arctic
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2 "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
"model.layers.{bid}.feed_forward.down_proj", # jamba
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
"model.layers.h.{bid}.mlp.c_proj", # exaone "model.layers.h.{bid}.mlp.c_proj", # exaone
"model.layers.{bid}.feed_forward.down_proj", # llama4 "model.layers.{bid}.feed_forward.down_proj", # llama4 jamba
"transformer_encoder.{bid}.ffn.w3", # neobert "transformer_encoder.{bid}.ffn.w3", # neobert
), ),