mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : add support for larger Granite Code Models (20B, 34B) (#7324)
Tie the weights for ARCH_STARCODER to support the larger Granite code models. Partially addresses ggerganov/issues/7116 There still remains to be a few things to fix. Currently requires `--override-kv tokenizer.ggml.add_bos_token=bool:false`
This commit is contained in:
		| @@ -5188,7 +5188,14 @@ static bool llm_load_tensors( | |||||||
|                     { |                     { | ||||||
|                         model.output_norm   = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); |                         model.output_norm   = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); | ||||||
|                         model.output_norm_b = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd}); |                         model.output_norm_b = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd}); | ||||||
|                         model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}); |                         model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, false); | ||||||
|  |                         if (!model.output) { | ||||||
|  |                             // needs to be on GPU | ||||||
|  |                             model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); | ||||||
|  |                             ml.n_created--; // artificial tensor | ||||||
|  |                             ml.size_data += ggml_nbytes(model.output); | ||||||
|  |                         } | ||||||
|  |  | ||||||
|                     } |                     } | ||||||
|  |  | ||||||
|                     for (int i = 0; i < n_layer; ++i) { |                     for (int i = 0; i < n_layer; ++i) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Steffen Röcker
					Steffen Röcker