mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : fix falcon arch for tied output embeddings (#4978)
* falcon arch fix for tied output embeddings * Update llama.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update llama.cpp * Update llama.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update llama.cpp --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		| @@ -3438,7 +3438,12 @@ static bool llm_load_tensors( | ||||
|                     { | ||||
|                         model.output_norm   = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); | ||||
|                         model.output_norm_b = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd}); | ||||
|                         model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}); | ||||
|                         if (gguf_find_tensor(ml.ctx_gguf, tn(LLM_TENSOR_OUTPUT, "weight").c_str()) >= 0) { | ||||
|                             model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT,     "weight"), {n_embd, n_vocab}); | ||||
|                         } else { | ||||
|                             model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // needs to be on GPU | ||||
|                             ml.n_created--; // artificial tensor | ||||
|                         } | ||||
|                     } | ||||
|  | ||||
|                     for (int i = 0; i < n_layer; ++i) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 John
					John