mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	| @@ -42,6 +42,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | ||||
|     { LLM_ARCH_GEMMA,            "gemma"            }, | ||||
|     { LLM_ARCH_GEMMA2,           "gemma2"           }, | ||||
|     { LLM_ARCH_GEMMA3,           "gemma3"           }, | ||||
|     { LLM_ARCH_GEMMA3N,          "gemma3n"          }, | ||||
|     { LLM_ARCH_STARCODER2,       "starcoder2"       }, | ||||
|     { LLM_ARCH_MAMBA,            "mamba"            }, | ||||
|     { LLM_ARCH_XVERSE,           "xverse"           }, | ||||
| @@ -932,6 +933,42 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_GEMMA3N, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,           "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,          "output_norm" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,            "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q,               "blk.%d.attn_q" }, | ||||
|             { LLM_TENSOR_ATTN_Q_NORM,          "blk.%d.attn_q_norm" }, | ||||
|             { LLM_TENSOR_ATTN_K,               "blk.%d.attn_k" }, | ||||
|             { LLM_TENSOR_ATTN_K_NORM,          "blk.%d.attn_k_norm" }, | ||||
|             { LLM_TENSOR_ATTN_V,               "blk.%d.attn_v" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,             "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_ATTN_POST_NORM,       "blk.%d.post_attention_norm" }, | ||||
|             { LLM_TENSOR_FFN_NORM,             "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE,             "blk.%d.ffn_gate" }, | ||||
|             { LLM_TENSOR_FFN_DOWN,             "blk.%d.ffn_down" }, | ||||
|             { LLM_TENSOR_FFN_UP,               "blk.%d.ffn_up" }, | ||||
|             { LLM_TENSOR_FFN_POST_NORM,        "blk.%d.post_ffw_norm" }, | ||||
|             { LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" }, | ||||
|             { LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" }, | ||||
|             { LLM_TENSOR_PER_LAYER_PROJ_NORM,  "per_layer_proj_norm" }, | ||||
|             { LLM_TENSOR_ALTUP_UNEMBD_PROJ,    "altup_unembd_proj" }, | ||||
|             { LLM_TENSOR_ALTUP_PROJ,           "altup_proj" }, | ||||
|             { LLM_TENSOR_PER_LAYER_INP_GATE,   "blk.%d.inp_gate" }, | ||||
|             { LLM_TENSOR_PER_LAYER_PROJ,       "blk.%d.proj" }, | ||||
|             { LLM_TENSOR_PER_LAYER_POST_NORM,  "blk.%d.post_norm" }, | ||||
|             { LLM_TENSOR_ALTUP_CORRECT_COEF,   "blk.%d.altup_correct_coef" }, | ||||
|             { LLM_TENSOR_ALTUP_CORRECT_SCALE,  "blk.%d.altup_correct_scale" }, | ||||
|             { LLM_TENSOR_ALTUP_PREDICT_COEF,   "blk.%d.altup_predict_coef" }, | ||||
|             { LLM_TENSOR_ALTUP_ROUTER,         "blk.%d.altup_router" }, | ||||
|             { LLM_TENSOR_ALTUP_ROUTER_NORM,    "blk.%d.altup_router_norm" }, | ||||
|             { LLM_TENSOR_LAUREL_L,             "blk.%d.laurel_l" }, | ||||
|             { LLM_TENSOR_LAUREL_R,             "blk.%d.laurel_r" }, | ||||
|             { LLM_TENSOR_LAUREL_POST_NORM,     "blk.%d.laurel_post_norm" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_STARCODER2, | ||||
|         { | ||||
| @@ -1749,6 +1786,23 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = { | ||||
|     {LLM_TENSOR_FFN_GATE_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, | ||||
|     {LLM_TENSOR_FFN_UP_EXPS,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, | ||||
|     {LLM_TENSOR_FFN_EXP_PROBS_B,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}}, | ||||
|     // altup / laurel (gemma 3n) | ||||
|     {LLM_TENSOR_PER_LAYER_TOKEN_EMBD,       {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_GET_ROWS}}, | ||||
|     {LLM_TENSOR_PER_LAYER_MODEL_PROJ,       {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_PER_LAYER_PROJ_NORM,        {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL}}, | ||||
|     {LLM_TENSOR_ALTUP_PROJ,                 {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_ALTUP_UNEMBD_PROJ,          {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_PER_LAYER_INP_GATE,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_PER_LAYER_PROJ,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_PER_LAYER_POST_NORM,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, | ||||
|     {LLM_TENSOR_ALTUP_CORRECT_COEF,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_ALTUP_CORRECT_SCALE,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, | ||||
|     {LLM_TENSOR_ALTUP_PREDICT_COEF,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_ALTUP_ROUTER,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_ALTUP_ROUTER_NORM,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, | ||||
|     {LLM_TENSOR_LAUREL_L,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_LAUREL_R,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, | ||||
|     {LLM_TENSOR_LAUREL_POST_NORM,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, | ||||
|     // this tensor is loaded for T5, but never used | ||||
|     {LLM_TENSOR_DEC_CROSS_ATTN_REL_B,       {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}}, | ||||
|     {LLM_TENSOR_CONV1D,                     {LLM_TENSOR_LAYER_INPUT,     GGML_OP_IM2COL}}, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Xuan-Son Nguyen
					Xuan-Son Nguyen