mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	mpt : add optional bias tensors (#5638)
Update for MPT with optional bias parameters: to work with PhoGPT and SEA-LION models that were pre-trained with 'bias'.
This commit is contained in:
		
							
								
								
									
										38
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										38
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -4054,6 +4054,8 @@ static bool llm_load_tensors( | ||||
|                     // output | ||||
|                     { | ||||
|                         model.output_norm   = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); | ||||
|                         model.output_norm_b = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd}, false); | ||||
|  | ||||
|                         model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}); | ||||
|                     } | ||||
|  | ||||
| @@ -4063,14 +4065,23 @@ static bool llm_load_tensors( | ||||
|  | ||||
|                         auto & layer = model.layers[i]; | ||||
|  | ||||
|                         layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}); | ||||
|                         layer.attn_norm   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}); | ||||
|                         layer.attn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "bias", i),   {n_embd}, false); | ||||
|  | ||||
|                         layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}); | ||||
|                         layer.wo   = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); | ||||
|                         layer.bqkv = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_QKV, "bias", i),   {n_embd + 2*n_embd_gqa}, false); | ||||
|  | ||||
|                         layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}); | ||||
|                         layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {  n_ff, n_embd}); | ||||
|                         layer.ffn_up   = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP,   "weight", i), {n_embd,   n_ff}); | ||||
|                         layer.wo   = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); | ||||
|                         layer.bo   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT, "bias", i),   {n_embd}, false); | ||||
|  | ||||
|                         layer.ffn_norm   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}); | ||||
|                         layer.ffn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "bias", i),   {n_embd}, false); | ||||
|  | ||||
|                         layer.ffn_down   = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}); | ||||
|                         layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i),   {n_embd}, false); | ||||
|  | ||||
|                         layer.ffn_up     = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP,   "weight", i), {n_embd,   n_ff}); | ||||
|                         layer.ffn_up_b   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP,   "bias", i),   {n_ff}, false); | ||||
|  | ||||
|                         // AWQ ScaleActivation layer | ||||
|                         layer.ffn_act = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_ACT, "scales", i), {n_ff}, false); | ||||
| @@ -6171,7 +6182,7 @@ struct llm_build_context { | ||||
|  | ||||
|             attn_norm = llm_build_norm(ctx0, inpL, hparams, | ||||
|                     model.layers[il].attn_norm, | ||||
|                     NULL, | ||||
|                     model.layers[il].attn_norm_b, | ||||
|                     LLM_NORM, cb, il); | ||||
|             cb(attn_norm, "attn_norm", il); | ||||
|  | ||||
| @@ -6181,6 +6192,11 @@ struct llm_build_context { | ||||
|  | ||||
|                 cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, cur); | ||||
|                 cb(cur, "wqkv", il); | ||||
|                  | ||||
|                 if (model.layers[il].bqkv){ | ||||
|                     cur = ggml_add(ctx0, cur, model.layers[il].bqkv); | ||||
|                     cb(cur, "bqkv", il); | ||||
|                 } | ||||
|  | ||||
|                 if (hparams.f_clamp_kqv > 0.0f) { | ||||
|                     cur = ggml_clamp(ctx0, cur, -hparams.f_clamp_kqv, hparams.f_clamp_kqv); | ||||
| @@ -6198,7 +6214,7 @@ struct llm_build_context { | ||||
|                 Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); | ||||
|  | ||||
|                 cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, | ||||
|                         model.layers[il].wo, NULL, | ||||
|                         model.layers[il].wo, model.layers[il].bo, | ||||
|                         Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); | ||||
|                 cb(cur, "kqv_out", il); | ||||
|             } | ||||
| @@ -6211,13 +6227,13 @@ struct llm_build_context { | ||||
|             { | ||||
|                 cur = llm_build_norm(ctx0, ffn_inp, hparams, | ||||
|                         model.layers[il].ffn_norm, | ||||
|                         NULL, | ||||
|                         model.layers[il].ffn_norm_b, | ||||
|                         LLM_NORM, cb, il); | ||||
|                 cb(cur, "ffn_norm", il); | ||||
|                 cur = llm_build_ffn(ctx0, cur, | ||||
|                         model.layers[il].ffn_up,   NULL, | ||||
|                         model.layers[il].ffn_up,   model.layers[il].ffn_up_b, | ||||
|                         NULL,                      NULL, | ||||
|                         model.layers[il].ffn_down, NULL, | ||||
|                         model.layers[il].ffn_down, model.layers[il].ffn_down_b, | ||||
|                         model.layers[il].ffn_act, | ||||
|                         LLM_FFN_GELU, LLM_FFN_SEQ, cb, il); | ||||
|                 cb(cur, "ffn_out", il); | ||||
| @@ -6234,7 +6250,7 @@ struct llm_build_context { | ||||
|  | ||||
|         cur = llm_build_norm(ctx0, cur, hparams, | ||||
|                 model.output_norm, | ||||
|                 NULL, | ||||
|                 model.output_norm_b, | ||||
|                 LLM_NORM, cb, -1); | ||||
|         cb(cur, "result_norm", -1); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Dat Quoc Nguyen
					Dat Quoc Nguyen