mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	Merge branch 'master' into compilade/refactor-kv-cache
This commit is contained in:
		| @@ -288,12 +288,14 @@ class TensorNameMap: | ||||
|         # Post feed-forward norm | ||||
|         MODEL_TENSOR.FFN_PRE_NORM: ( | ||||
|             "model.layers.{bid}.pre_feedforward_layernorm", # gemma2 | ||||
|             "model.layers.{bid}.pre_ff_layernorm.weight", | ||||
|         ), | ||||
|  | ||||
|         # Post feed-forward norm | ||||
|         MODEL_TENSOR.FFN_POST_NORM: ( | ||||
|             "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2 | ||||
|             "model.layers.{bid}.post_mlp_layernorm", # glm-4-0414 | ||||
|             "model.layers.{bid}.feed_forward.up_proj", | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.FFN_GATE_INP: ( | ||||
| @@ -367,6 +369,7 @@ class TensorNameMap: | ||||
|             "model.layers.{bid}.mlp.shared_expert.up_proj",          # qwen2moe | ||||
|             "model.layers.{bid}.mlp.shared_experts.up_proj",         # deepseek deepseek2 | ||||
|             "model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4 | ||||
|             "model.layers.{bid}.feed_forward.down_proj", | ||||
|             "model.layers.{bid}.mlp.shared_mlp.up_proj",             # hunyuan | ||||
|         ), | ||||
|  | ||||
| @@ -559,13 +562,13 @@ class TensorNameMap: | ||||
|         MODEL_TENSOR.SSM_IN: ( | ||||
|             "model.layers.{bid}.in_proj",           # mamba-hf | ||||
|             "backbone.layers.{bid}.mixer.in_proj",  # mamba | ||||
|             "model.layers.{bid}.mamba.in_proj",     # jamba | ||||
|             "model.layers.{bid}.mamba.in_proj",     # jamba falcon-h1 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.SSM_CONV1D: ( | ||||
|             "model.layers.{bid}.conv1d",           # mamba-hf | ||||
|             "backbone.layers.{bid}.mixer.conv1d",  # mamba | ||||
|             "model.layers.{bid}.mamba.conv1d",     # jamba | ||||
|             "model.layers.{bid}.mamba.conv1d",     # jamba falcon-h1 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.SSM_X: ( | ||||
| @@ -577,7 +580,7 @@ class TensorNameMap: | ||||
|         MODEL_TENSOR.SSM_DT: ( | ||||
|             "model.layers.{bid}.dt_proj",           # mamba-hf | ||||
|             "backbone.layers.{bid}.mixer.dt_proj",  # mamba | ||||
|             "model.layers.{bid}.mamba.dt_proj",     # jamba | ||||
|             "model.layers.{bid}.mamba.dt_proj",     # jamba falcon-h1 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.SSM_DT_NORM: ( | ||||
| @@ -587,7 +590,7 @@ class TensorNameMap: | ||||
|         MODEL_TENSOR.SSM_A: ( | ||||
|             "model.layers.{bid}.A_log",           # mamba-hf | ||||
|             "backbone.layers.{bid}.mixer.A_log",  # mamba | ||||
|             "model.layers.{bid}.mamba.A_log",     # jamba | ||||
|             "model.layers.{bid}.mamba.A_log",     # jamba falcon-h1 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.SSM_B_NORM: ( | ||||
| @@ -603,17 +606,18 @@ class TensorNameMap: | ||||
|         MODEL_TENSOR.SSM_D: ( | ||||
|             "model.layers.{bid}.D",           # mamba-hf | ||||
|             "backbone.layers.{bid}.mixer.D",  # mamba | ||||
|             "model.layers.{bid}.mamba.D",     # jamba | ||||
|             "model.layers.{bid}.mamba.D",     # jamba falcon-h1 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.SSM_NORM: ( | ||||
|             "model.layers.{bid}.mamba.norm", # falcon-h1 | ||||
|             "backbone.layers.{bid}.mixer.norm",  # mamba2 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.SSM_OUT: ( | ||||
|             "model.layers.{bid}.out_proj",           # mamba-hf | ||||
|             "backbone.layers.{bid}.mixer.out_proj",  # mamba | ||||
|             "model.layers.{bid}.mamba.out_proj",     # jamba | ||||
|             "model.layers.{bid}.mamba.out_proj",     # jamba falcon-h1 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.TIME_MIX_W0: ( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin