mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	gguf-py : avoid adding duplicate tensor mappings for Jamba
Some of the tensor names are common with Llama4
This commit is contained in:
		| @@ -304,9 +304,8 @@ class TensorNameMap: | ||||
|             "model.layers.{bid}.mlp.gate",                      # qwen2moe olmoe | ||||
|             "transformer.decoder_layer.{bid}.router",           # Grok | ||||
|             "transformer.blocks.{bid}.ffn.router.layer",        # dbrx | ||||
|             "model.layers.{bid}.feed_forward.router",           # jamba | ||||
|             "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe | ||||
|             "model.layers.{bid}.feed_forward.router",           # llama4 | ||||
|             "model.layers.{bid}.feed_forward.router",           # llama4 jamba | ||||
|             "encoder.layers.{bid}.mlp.router.layer",            # nomic-bert-moe | ||||
|             "model.layers.{bid}.mlp.gate.wg",                   # hunyuan | ||||
|         ), | ||||
| @@ -348,10 +347,9 @@ class TensorNameMap: | ||||
|             "encoder.layer.{bid}.mlp.gated_layers",                   # jina-bert-v2 (GEGLU) | ||||
|             "encoder.layer.{bid}.mlp.up_gated_layer",                 # jina-v2-code (GEGLU) | ||||
|             "model.layers.{bid}.residual_mlp.w3",                     # arctic | ||||
|             "model.layers.{bid}.feed_forward.up_proj",                # jamba | ||||
|             "encoder.layers.{bid}.mlp.dense_h_to_4h",                 # chatglm | ||||
|             "transformer.h.{bid}.mlp.c_fc_1",                         # exaone | ||||
|             "model.layers.{bid}.feed_forward.up_proj",                # llama4 | ||||
|             "model.layers.{bid}.feed_forward.up_proj",                # llama4 jamba | ||||
|             "transformer_encoder.{bid}.ffn.w12",                      # neobert | ||||
|         ), | ||||
|  | ||||
| @@ -390,9 +388,8 @@ class TensorNameMap: | ||||
|             "encoder.layer.{bid}.mlp.gated_layers_w",     # jina-bert-v2 (split up/gate, no longer used) | ||||
|             "transformer.h.{bid}.mlp.linear_1",           # refact | ||||
|             "model.layers.{bid}.residual_mlp.w1",         # arctic | ||||
|             "model.layers.{bid}.feed_forward.gate_proj",  # jamba | ||||
|             "transformer.h.{bid}.mlp.c_fc_0",             # exaone | ||||
|             "model.layers.{bid}.feed_forward.gate_proj",  # llama4 | ||||
|             "model.layers.{bid}.feed_forward.gate_proj",  # llama4 jamba | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.FFN_GATE_EXP: ( | ||||
| @@ -436,10 +433,9 @@ class TensorNameMap: | ||||
|             "transformer.layers.{bid}.ffn.proj_2",                    # openelm | ||||
|             "model.layers.{bid}.residual_mlp.w2",                     # arctic | ||||
|             "encoder.layer.{bid}.mlp.down_layer",                     # jina-bert-v2 | ||||
|             "model.layers.{bid}.feed_forward.down_proj",              # jamba | ||||
|             "encoder.layers.{bid}.mlp.dense_4h_to_h",                 # chatglm | ||||
|             "model.layers.h.{bid}.mlp.c_proj",                        # exaone | ||||
|             "model.layers.{bid}.feed_forward.down_proj",              # llama4 | ||||
|             "model.layers.{bid}.feed_forward.down_proj",              # llama4 jamba | ||||
|             "transformer_encoder.{bid}.ffn.w3",                       # neobert | ||||
|         ), | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin