mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : gemma3 : use output tensor if it exists in model weight (#12506)
* llama : gemma3 : use output tensor if it exists in model weight * also add to the llm_tensor_names
This commit is contained in:
		| @@ -1113,6 +1113,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { | |||||||
|     ], |     ], | ||||||
|     MODEL_ARCH.GEMMA3: [ |     MODEL_ARCH.GEMMA3: [ | ||||||
|         MODEL_TENSOR.TOKEN_EMBD, |         MODEL_TENSOR.TOKEN_EMBD, | ||||||
|  |         MODEL_TENSOR.OUTPUT, | ||||||
|         MODEL_TENSOR.OUTPUT_NORM, |         MODEL_TENSOR.OUTPUT_NORM, | ||||||
|         MODEL_TENSOR.ATTN_Q, |         MODEL_TENSOR.ATTN_Q, | ||||||
|         MODEL_TENSOR.ATTN_Q_NORM, |         MODEL_TENSOR.ATTN_Q_NORM, | ||||||
|   | |||||||
| @@ -778,6 +778,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | |||||||
|         { |         { | ||||||
|             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" }, |             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" }, | ||||||
|             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" }, |             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" }, | ||||||
|  |             { LLM_TENSOR_OUTPUT,          "output" }, | ||||||
|             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" }, |             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" }, | ||||||
|             { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" }, |             { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" }, | ||||||
|             { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" }, |             { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" }, | ||||||
|   | |||||||
| @@ -2571,7 +2571,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) { | |||||||
|  |  | ||||||
|                     // output |                     // output | ||||||
|                     output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); |                     output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); | ||||||
|                     output      = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD,  "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); // same as tok_embd, duplicated to allow offloading |                     output      = create_tensor(tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); | ||||||
|  |  | ||||||
|  |                     // if output is NULL, init from the input tok embed | ||||||
|  |                     if (output == NULL) { | ||||||
|  |                         output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD,   "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); | ||||||
|  |                     } | ||||||
|  |  | ||||||
|                     for (int i = 0; i < n_layer; ++i) { |                     for (int i = 0; i < n_layer; ++i) { | ||||||
|                         auto & layer = layers[i]; |                         auto & layer = layers[i]; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Xuan-Son Nguyen
					Xuan-Son Nguyen