mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	mpt : do not duplicate token_embd.weight on disk (#5670)
This commit is contained in:
		@@ -622,11 +622,6 @@ class MPTModel(Model):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            self.gguf_writer.add_tensor(new_name, data)
 | 
					            self.gguf_writer.add_tensor(new_name, data)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # note: MPT output is tied to (same as) wte in original model;
 | 
					 | 
				
			||||||
            # for easier implementation in llama.cpp it's duplicated in GGUF, though :/
 | 
					 | 
				
			||||||
            if new_name == "token_embd.weight":
 | 
					 | 
				
			||||||
                self.gguf_writer.add_tensor("output.weight", data)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class OrionModel(Model):
 | 
					class OrionModel(Model):
 | 
				
			||||||
    def set_vocab(self):
 | 
					    def set_vocab(self):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -509,7 +509,6 @@ static std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES =
 | 
				
			|||||||
        {
 | 
					        {
 | 
				
			||||||
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
 | 
					            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
 | 
				
			||||||
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
 | 
					            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
 | 
				
			||||||
            { LLM_TENSOR_OUTPUT,          "output" },
 | 
					 | 
				
			||||||
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
 | 
					            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
 | 
				
			||||||
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
 | 
					            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
 | 
				
			||||||
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
 | 
					            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
 | 
				
			||||||
@@ -4056,7 +4055,10 @@ static bool llm_load_tensors(
 | 
				
			|||||||
                        model.output_norm   = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
 | 
					                        model.output_norm   = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
 | 
				
			||||||
                        model.output_norm_b = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd}, false);
 | 
					                        model.output_norm_b = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd}, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab});
 | 
					                        // same as tok_embd, duplicated to allow offloading
 | 
				
			||||||
 | 
					                        model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD,  "weight"), {n_embd, n_vocab});
 | 
				
			||||||
 | 
					                        ml.n_created--; // artificial tensor
 | 
				
			||||||
 | 
					                        ml.size_data += ggml_nbytes(model.output);
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    for (int i = 0; i < n_layer; ++i) {
 | 
					                    for (int i = 0; i < n_layer; ++i) {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user