mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : fix compatibility with old 2 expert models (#6735)
This commit is contained in:
		| @@ -4592,7 +4592,7 @@ static bool llm_load_tensors( | |||||||
|     size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output |     size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output | ||||||
|  |  | ||||||
|     // for moe merged tensors |     // for moe merged tensors | ||||||
|     ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer; |     ctx_size += ggml_tensor_overhead()*n_layer*3; | ||||||
|  |  | ||||||
|     std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map; |     std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map; | ||||||
|     for (auto & it : buft_layer_count) { |     for (auto & it : buft_layer_count) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren