mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	mpt : do not duplicate token_embd.weight on disk (#5670)
This commit is contained in:
		| @@ -509,7 +509,6 @@ static std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES = | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" }, | ||||
|             { LLM_TENSOR_OUTPUT,          "output" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" }, | ||||
| @@ -4056,7 +4055,10 @@ static bool llm_load_tensors( | ||||
|                         model.output_norm   = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); | ||||
|                         model.output_norm_b = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd}, false); | ||||
|  | ||||
|                         model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}); | ||||
|                         // same as tok_embd, duplicated to allow offloading | ||||
|                         model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD,  "weight"), {n_embd, n_vocab}); | ||||
|                         ml.n_created--; // artificial tensor | ||||
|                         ml.size_data += ggml_nbytes(model.output); | ||||
|                     } | ||||
|  | ||||
|                     for (int i = 0; i < n_layer; ++i) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jared Van Bortel
					Jared Van Bortel