mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : Support llama 4 text-only (#12791)
* llama4 conversion * initial support, no chat template * clean up a bit * fix tokenizer conversion * correct hparams * try this * fix shexp * ffn_inp_normed * chat template * clean up model conversion * add_bos * add scale_before_ffn * fix order * weight_before_ffn * llm_graph_input_attn_temp * add chunk attn mask * build_inp_attn_scale() * add comment about ggml_repeat * clarify comments * fix build
This commit is contained in:
		| @@ -6,6 +6,7 @@ | ||||
|  | ||||
| static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | ||||
|     { LLM_ARCH_LLAMA,            "llama"            }, | ||||
|     { LLM_ARCH_LLAMA4,           "llama4"           }, | ||||
|     { LLM_ARCH_DECI,             "deci"             }, | ||||
|     { LLM_ARCH_FALCON,           "falcon"           }, | ||||
|     { LLM_ARCH_GROK,             "grok"             }, | ||||
| @@ -114,6 +115,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = { | ||||
|     { LLM_KV_RESIDUAL_SCALE,                    "%s.residual_scale"                    }, | ||||
|     { LLM_KV_EMBEDDING_SCALE,                   "%s.embedding_scale"                   }, | ||||
|     { LLM_KV_TOKEN_SHIFT_COUNT,                 "%s.token_shift_count"                 }, | ||||
|     { LLM_KV_INTERLEAVE_MOE_LAYER_STEP,         "%s.interleave_moe_layer_step"         }, | ||||
|  | ||||
|     { LLM_KV_ATTENTION_HEAD_COUNT,                   "%s.attention.head_count"                   }, | ||||
|     { LLM_KV_ATTENTION_HEAD_COUNT_KV,                "%s.attention.head_count_kv"                }, | ||||
| @@ -233,6 +235,35 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_LLAMA4, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" }, | ||||
|             { LLM_TENSOR_OUTPUT,          "output" }, | ||||
|             { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" }, | ||||
|             { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" }, | ||||
|             { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" }, | ||||
|             { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" }, | ||||
|             { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" }, | ||||
|             { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" }, | ||||
|             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" }, | ||||
|             { LLM_TENSOR_FFN_GATE_EXP,    "blk.%d.ffn_gate.%d" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_EXP,    "blk.%d.ffn_down.%d" }, | ||||
|             { LLM_TENSOR_FFN_UP_EXP,      "blk.%d.ffn_up.%d" }, | ||||
|             { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" }, | ||||
|             { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" }, | ||||
|             { LLM_TENSOR_FFN_GATE_SHEXP,  "blk.%d.ffn_gate_shexp" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_SHEXP,  "blk.%d.ffn_down_shexp" }, | ||||
|             { LLM_TENSOR_FFN_UP_SHEXP,    "blk.%d.ffn_up_shexp" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_DECI, | ||||
|         { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Xuan-Son Nguyen
					Xuan-Son Nguyen