mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
gguf-py : avoid adding duplicate tensor mappings for Jamba
Some of the tensor names are common with Llama4
This commit is contained in:
@@ -304,9 +304,8 @@ class TensorNameMap:
|
|||||||
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
|
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
|
||||||
"transformer.decoder_layer.{bid}.router", # Grok
|
"transformer.decoder_layer.{bid}.router", # Grok
|
||||||
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
||||||
"model.layers.{bid}.feed_forward.router", # jamba
|
|
||||||
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
||||||
"model.layers.{bid}.feed_forward.router", # llama4
|
"model.layers.{bid}.feed_forward.router", # llama4 jamba
|
||||||
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
|
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
|
||||||
"model.layers.{bid}.mlp.gate.wg", # hunyuan
|
"model.layers.{bid}.mlp.gate.wg", # hunyuan
|
||||||
),
|
),
|
||||||
@@ -348,10 +347,9 @@ class TensorNameMap:
|
|||||||
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU)
|
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU)
|
||||||
"encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU)
|
"encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU)
|
||||||
"model.layers.{bid}.residual_mlp.w3", # arctic
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
||||||
"model.layers.{bid}.feed_forward.up_proj", # jamba
|
|
||||||
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
||||||
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
||||||
"model.layers.{bid}.feed_forward.up_proj", # llama4
|
"model.layers.{bid}.feed_forward.up_proj", # llama4 jamba
|
||||||
"transformer_encoder.{bid}.ffn.w12", # neobert
|
"transformer_encoder.{bid}.ffn.w12", # neobert
|
||||||
),
|
),
|
||||||
|
|
||||||
@@ -390,9 +388,8 @@ class TensorNameMap:
|
|||||||
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
|
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
|
||||||
"transformer.h.{bid}.mlp.linear_1", # refact
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
||||||
"model.layers.{bid}.residual_mlp.w1", # arctic
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
||||||
"model.layers.{bid}.feed_forward.gate_proj", # jamba
|
|
||||||
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
||||||
"model.layers.{bid}.feed_forward.gate_proj", # llama4
|
"model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP: (
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
||||||
@@ -436,10 +433,9 @@ class TensorNameMap:
|
|||||||
"transformer.layers.{bid}.ffn.proj_2", # openelm
|
"transformer.layers.{bid}.ffn.proj_2", # openelm
|
||||||
"model.layers.{bid}.residual_mlp.w2", # arctic
|
"model.layers.{bid}.residual_mlp.w2", # arctic
|
||||||
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
||||||
"model.layers.{bid}.feed_forward.down_proj", # jamba
|
|
||||||
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
||||||
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
||||||
"model.layers.{bid}.feed_forward.down_proj", # llama4
|
"model.layers.{bid}.feed_forward.down_proj", # llama4 jamba
|
||||||
"transformer_encoder.{bid}.ffn.w3", # neobert
|
"transformer_encoder.{bid}.ffn.w3", # neobert
|
||||||
),
|
),
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user