mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	model : add support for SmallThinker series (#14898)
* support smallthinker * support 20b softmax, 4b no sliding window * new build_moe_ffn_from_probs, and can run 4b * fix 4b rope bug * fix python type check * remove is_moe judge * remove set_dense_start_swa_pattern function and modify set_swa_pattern function * trim trailing whitespace * remove get_vocab_base of SmallThinkerModel in convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * better whitespace Apply suggestions from code review Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * use GGML_ASSERT for expert count validation Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Improve null pointer check for probs Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * use template parameter for SWA attention logic * better whitespace Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * move the creation of inp_out_ids before the layer loop * remove redundant judge for probs --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		| @@ -88,6 +88,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | ||||
|     { LLM_ARCH_SMOLLM3,          "smollm3"          }, | ||||
|     { LLM_ARCH_LFM2,             "lfm2"             }, | ||||
|     { LLM_ARCH_DREAM,            "dream"            }, | ||||
|     { LLM_ARCH_SMALLTHINKER,     "smallthinker"     }, | ||||
|     { LLM_ARCH_UNKNOWN,          "(unknown)"        }, | ||||
| }; | ||||
|  | ||||
| @@ -1933,6 +1934,27 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_TOKEN_EMBD_NORM,   "token_embd_norm" }, | ||||
|         } | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_SMALLTHINKER, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,         "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,        "output_norm" }, | ||||
|             { LLM_TENSOR_OUTPUT,             "output" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" }, | ||||
|             { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" }, | ||||
|             { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" }, | ||||
|             { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" }, | ||||
|             { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" }, | ||||
|             { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" }, | ||||
|             { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" }, | ||||
|             { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" } | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_DREAM, | ||||
|         { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Dongliang Wei
					Dongliang Wei