mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama : extend llm_build_ffn() to support _scale tensors (#8103)
This commit is contained in:
		
							
								
								
									
										255
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										255
									
								
								llama.cpp
									
									
									
									
									
								
							@@ -7212,10 +7212,13 @@ static struct ggml_tensor * llm_build_ffn(
 | 
				
			|||||||
         struct ggml_tensor * cur,
 | 
					         struct ggml_tensor * cur,
 | 
				
			||||||
         struct ggml_tensor * up,
 | 
					         struct ggml_tensor * up,
 | 
				
			||||||
         struct ggml_tensor * up_b,
 | 
					         struct ggml_tensor * up_b,
 | 
				
			||||||
 | 
					         struct ggml_tensor * up_s,
 | 
				
			||||||
         struct ggml_tensor * gate,
 | 
					         struct ggml_tensor * gate,
 | 
				
			||||||
         struct ggml_tensor * gate_b,
 | 
					         struct ggml_tensor * gate_b,
 | 
				
			||||||
 | 
					         struct ggml_tensor * gate_s,
 | 
				
			||||||
         struct ggml_tensor * down,
 | 
					         struct ggml_tensor * down,
 | 
				
			||||||
         struct ggml_tensor * down_b,
 | 
					         struct ggml_tensor * down_b,
 | 
				
			||||||
 | 
					         struct ggml_tensor * down_s,
 | 
				
			||||||
         struct ggml_tensor * act_scales,
 | 
					         struct ggml_tensor * act_scales,
 | 
				
			||||||
            llm_ffn_op_type   type_op,
 | 
					            llm_ffn_op_type   type_op,
 | 
				
			||||||
          llm_ffn_gate_type   type_gate,
 | 
					          llm_ffn_gate_type   type_gate,
 | 
				
			||||||
@@ -7229,6 +7232,11 @@ static struct ggml_tensor * llm_build_ffn(
 | 
				
			|||||||
        cb(tmp, "ffn_up_b", il);
 | 
					        cb(tmp, "ffn_up_b", il);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (up_s) {
 | 
				
			||||||
 | 
					        tmp = ggml_mul(ctx, tmp, up_s);
 | 
				
			||||||
 | 
					        cb(tmp, "ffn_up_s", il);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (gate) {
 | 
					    if (gate) {
 | 
				
			||||||
        switch (type_gate) {
 | 
					        switch (type_gate) {
 | 
				
			||||||
            case LLM_FFN_SEQ:
 | 
					            case LLM_FFN_SEQ:
 | 
				
			||||||
@@ -7247,6 +7255,12 @@ static struct ggml_tensor * llm_build_ffn(
 | 
				
			|||||||
            cur = ggml_add(ctx, cur, gate_b);
 | 
					            cur = ggml_add(ctx, cur, gate_b);
 | 
				
			||||||
            cb(cur, "ffn_gate_b", il);
 | 
					            cb(cur, "ffn_gate_b", il);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (gate_s) {
 | 
				
			||||||
 | 
					            cur = ggml_mul(ctx, cur, gate_s);
 | 
				
			||||||
 | 
					            cb(cur, "ffn_gate_s", il);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        cur = tmp;
 | 
					        cur = tmp;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -7286,7 +7300,10 @@ static struct ggml_tensor * llm_build_ffn(
 | 
				
			|||||||
        cb(cur, "ffn_gate_par", il);
 | 
					        cb(cur, "ffn_gate_par", il);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cur = ggml_mul_mat(ctx, down, cur);
 | 
					    if (down) {
 | 
				
			||||||
 | 
					        cur = ggml_mul_mat(ctx, down, cur);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (down_b) {
 | 
					    if (down_b) {
 | 
				
			||||||
        cb(cur, "ffn_down", il);
 | 
					        cb(cur, "ffn_down", il);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -7295,6 +7312,11 @@ static struct ggml_tensor * llm_build_ffn(
 | 
				
			|||||||
        cur = ggml_add(ctx, cur, down_b);
 | 
					        cur = ggml_add(ctx, cur, down_b);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (down_s) {
 | 
				
			||||||
 | 
					        cur = ggml_mul(ctx, cur, down_s);
 | 
				
			||||||
 | 
					        cb(cur, "ffn_down_s", il);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return cur;
 | 
					    return cur;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -8003,9 +8025,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, model.layers[il].ffn_gate_b,
 | 
					                        model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -8137,9 +8159,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -8242,9 +8264,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -8358,9 +8380,9 @@ struct llm_build_context {
 | 
				
			|||||||
            // feed forward
 | 
					            // feed forward
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, attn_norm, // !! use the attn norm, not the result
 | 
					                cur = llm_build_ffn(ctx0, attn_norm, // !! use the attn norm, not the result
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -8749,9 +8771,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -8841,9 +8863,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -9026,23 +9048,23 @@ struct llm_build_context {
 | 
				
			|||||||
            // feed-forward network
 | 
					            // feed-forward network
 | 
				
			||||||
            if (model.arch == LLM_ARCH_BERT) {
 | 
					            if (model.arch == LLM_ARCH_BERT) {
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
            } else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
 | 
					            } else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@@ -9138,9 +9160,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -9276,9 +9298,9 @@ struct llm_build_context {
 | 
				
			|||||||
                        LLM_NORM, cb, il);
 | 
					                        LLM_NORM, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_act,
 | 
					                        model.layers[il].ffn_act,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -9425,9 +9447,9 @@ struct llm_build_context {
 | 
				
			|||||||
                    cur = inpSA;
 | 
					                    cur = inpSA;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -9538,9 +9560,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -9651,9 +9673,9 @@ struct llm_build_context {
 | 
				
			|||||||
            cb(cur, "ffn_norm", il);
 | 
					            cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            cur = llm_build_ffn(ctx0, cur,
 | 
					            cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                    model.layers[il].ffn_up,   NULL,
 | 
					                    model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_gate, NULL,
 | 
					                    model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_down, NULL,
 | 
					                    model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                    NULL,
 | 
					                    NULL,
 | 
				
			||||||
                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
            cb(cur, "ffn_out", il);
 | 
					            cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -9788,9 +9810,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur_gate, "ffn_shexp_gate", il);
 | 
					                cb(cur_gate, "ffn_shexp_gate", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                ggml_tensor * cur_ffn = llm_build_ffn(ctx0, cur,
 | 
					                ggml_tensor * cur_ffn = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up_shexp,   NULL,
 | 
					                        model.layers[il].ffn_up_shexp,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate_shexp, NULL,
 | 
					                        model.layers[il].ffn_gate_shexp, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down_shexp, NULL,
 | 
					                        model.layers[il].ffn_down_shexp, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur_ffn, "ffn_shexp", il);
 | 
					                cb(cur_ffn, "ffn_shexp", il);
 | 
				
			||||||
@@ -9917,9 +9939,9 @@ struct llm_build_context {
 | 
				
			|||||||
            // FF
 | 
					            // FF
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                ffn_output = llm_build_ffn(ctx0, attn_norm_output,
 | 
					                ffn_output = llm_build_ffn(ctx0, attn_norm_output,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
                cb(ffn_output, "ffn_out", il);
 | 
					                cb(ffn_output, "ffn_out", il);
 | 
				
			||||||
@@ -10155,9 +10177,9 @@ struct llm_build_context {
 | 
				
			|||||||
            // feed-forward network
 | 
					            // feed-forward network
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up, NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -10263,9 +10285,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -10374,9 +10396,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -10491,9 +10513,9 @@ struct llm_build_context {
 | 
				
			|||||||
            cb(cur, "ffn_norm", il);
 | 
					            cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            cur = llm_build_ffn(ctx0, cur,
 | 
					            cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                    model.layers[il].ffn_up,   NULL,
 | 
					                    model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_gate, NULL,
 | 
					                    model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_down, NULL,
 | 
					                    model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                    NULL,
 | 
					                    NULL,
 | 
				
			||||||
                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
            cb(cur, "ffn_out", il);
 | 
					            cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -10609,9 +10631,9 @@ struct llm_build_context {
 | 
				
			|||||||
            cb(cur, "ffn_norm", il);
 | 
					            cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            cur = llm_build_ffn(ctx0, cur,
 | 
					            cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                    model.layers[il].ffn_up,   NULL,
 | 
					                    model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_gate, NULL,
 | 
					                    model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_down, NULL,
 | 
					                    model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                    NULL,
 | 
					                    NULL,
 | 
				
			||||||
                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
            cb(cur, "ffn_out", il);
 | 
					            cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -10746,9 +10768,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -10863,9 +10885,9 @@ struct llm_build_context {
 | 
				
			|||||||
            // feed-forward network
 | 
					            // feed-forward network
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up, NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -10983,9 +11005,9 @@ struct llm_build_context {
 | 
				
			|||||||
            cb(cur, "ffn_norm", il);
 | 
					            cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            cur = llm_build_ffn(ctx0, cur,
 | 
					            cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
            cb(cur, "ffn_out", il);
 | 
					            cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -11271,9 +11293,9 @@ struct llm_build_context {
 | 
				
			|||||||
            // feed-forward network
 | 
					            // feed-forward network
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, ffn_inp,
 | 
					                cur = llm_build_ffn(ctx0, ffn_inp,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -11408,9 +11430,9 @@ struct llm_build_context {
 | 
				
			|||||||
            cb(cur, "ffn_norm", il);
 | 
					            cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            cur = llm_build_ffn(ctx0, cur,
 | 
					            cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                    model.layers[il].ffn_up,   NULL,
 | 
					                    model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_gate, NULL,
 | 
					                    model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_down, NULL,
 | 
					                    model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                    NULL,
 | 
					                    NULL,
 | 
				
			||||||
                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
            cb(cur, "ffn_out", il);
 | 
					            cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -11522,9 +11544,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -11553,9 +11575,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,
 | 
					                        model.layers[il].ffn_up,   model.layers[il].ffn_up_b,   NULL,
 | 
				
			||||||
                        NULL,                      NULL,
 | 
					                        NULL,                      NULL,                        NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, model.layers[il].ffn_down_b,
 | 
					                        model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
					                        LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -11662,9 +11684,9 @@ struct llm_build_context {
 | 
				
			|||||||
            cb(cur, "ffn_norm", il);
 | 
					            cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            cur = llm_build_ffn(ctx0, cur,
 | 
					            cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                    model.layers[il].ffn_up,   NULL,
 | 
					                    model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_gate, NULL,
 | 
					                    model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                    model.layers[il].ffn_down, NULL,
 | 
					                    model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                    NULL,
 | 
					                    NULL,
 | 
				
			||||||
                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
            cb(cur, "ffn_out", il);
 | 
					            cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -11884,9 +11906,9 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(cur, "ffn_norm", il);
 | 
					                cb(cur, "ffn_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_ffn(ctx0, cur,
 | 
					                cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                        model.layers[il].ffn_up,   NULL,
 | 
					                        model.layers[il].ffn_up,   NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_gate, NULL,
 | 
					                        model.layers[il].ffn_gate, NULL, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_down, NULL,
 | 
					                        model.layers[il].ffn_down, NULL, NULL,
 | 
				
			||||||
                        NULL,
 | 
					                        NULL,
 | 
				
			||||||
                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                        LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                cb(cur, "ffn_out", il);
 | 
					                cb(cur, "ffn_out", il);
 | 
				
			||||||
@@ -11912,9 +11934,9 @@ struct llm_build_context {
 | 
				
			|||||||
                // FFN shared expert
 | 
					                // FFN shared expert
 | 
				
			||||||
                {
 | 
					                {
 | 
				
			||||||
                    ggml_tensor * ffn_shexp = llm_build_ffn(ctx0, cur,
 | 
					                    ggml_tensor * ffn_shexp = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                            model.layers[il].ffn_up_shexp,   NULL,
 | 
					                            model.layers[il].ffn_up_shexp,   NULL, NULL,
 | 
				
			||||||
                            model.layers[il].ffn_gate_shexp, NULL,
 | 
					                            model.layers[il].ffn_gate_shexp, NULL, NULL,
 | 
				
			||||||
                            model.layers[il].ffn_down_shexp, NULL,
 | 
					                            model.layers[il].ffn_down_shexp, NULL, NULL,
 | 
				
			||||||
                            NULL,
 | 
					                            NULL,
 | 
				
			||||||
                            LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
					                            LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
                    cb(ffn_shexp, "ffn_shexp", il);
 | 
					                    cb(ffn_shexp, "ffn_shexp", il);
 | 
				
			||||||
@@ -12017,7 +12039,7 @@ struct llm_build_context {
 | 
				
			|||||||
                cb(Kcur, "Kcur", il);
 | 
					                cb(Kcur, "Kcur", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_kv(ctx0, model, hparams, cparams, kv_self, gf,
 | 
					                cur = llm_build_kv(ctx0, model, hparams, cparams, kv_self, gf,
 | 
				
			||||||
                        nullptr, nullptr,
 | 
					                        NULL, NULL,
 | 
				
			||||||
                        Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
 | 
					                        Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = llm_build_norm(ctx0, cur, hparams,
 | 
					                cur = llm_build_norm(ctx0, cur, hparams,
 | 
				
			||||||
@@ -12044,35 +12066,28 @@ struct llm_build_context {
 | 
				
			|||||||
            cb(ffn_inp, "ffn_inp", il);
 | 
					            cb(ffn_inp, "ffn_inp", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // feed-forward forward
 | 
					            // feed-forward forward
 | 
				
			||||||
            if (model.layers[il].ffn_gate_inp == nullptr) {
 | 
					            cur = llm_build_norm(ctx0, ffn_inp, hparams,
 | 
				
			||||||
                cur = llm_build_norm(ctx0, ffn_inp, hparams,
 | 
					                    model.layers[il].ffn_norm, NULL,
 | 
				
			||||||
                        model.layers[il].ffn_norm, NULL,
 | 
					                    LLM_NORM_RMS, cb, il);
 | 
				
			||||||
                        LLM_NORM_RMS, cb, il);
 | 
					            cb(cur, "ffn_norm", il);
 | 
				
			||||||
                cb(cur, "ffn_norm", il);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                struct ggml_tensor *tmp = ggml_mul_mat(ctx0, model.layers[il].ffn_up, cur);
 | 
					            cur = llm_build_ffn(ctx0, cur,
 | 
				
			||||||
                tmp = ggml_mul(ctx0, tmp, model.layers[il].ffn_up_scale);
 | 
					                    model.layers[il].ffn_up,   NULL, model.layers[il].ffn_up_scale,
 | 
				
			||||||
                cb(tmp, "ffn_up", il);
 | 
					                    model.layers[il].ffn_gate, NULL, model.layers[il].ffn_gate_scale,
 | 
				
			||||||
 | 
					                    NULL,                      NULL, NULL,
 | 
				
			||||||
 | 
					                    NULL,
 | 
				
			||||||
 | 
					                    LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
 | 
				
			||||||
 | 
					            cb(cur, "ffn_sub_out", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = ggml_mul_mat(ctx0, model.layers[il].ffn_gate, cur);
 | 
					            cur = llm_build_norm(ctx0, cur, hparams,
 | 
				
			||||||
                cur = ggml_mul(ctx0, cur, model.layers[il].ffn_gate_scale);
 | 
					                            model.layers[il].ffn_sub_norm, NULL,
 | 
				
			||||||
                cb(cur, "ffn_gate", il);
 | 
					                            LLM_NORM_RMS, cb, il);
 | 
				
			||||||
 | 
					            cb(cur, "ffn_sub_norm", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = ggml_silu(ctx0, cur);
 | 
					            cur = ggml_mul_mat(ctx0, model.layers[il].ffn_down, cur);
 | 
				
			||||||
                cb(cur, "ffn_silu", il);
 | 
					            cur = ggml_mul(ctx0, cur, model.layers[il].ffn_down_scale);
 | 
				
			||||||
 | 
					            cb(cur, "ffn_down", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                cur = ggml_mul(ctx0, cur, tmp);
 | 
					 | 
				
			||||||
                cb(cur, "ffn_gate_par", il);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                cur = llm_build_norm(ctx0, cur, hparams,
 | 
					 | 
				
			||||||
                                model.layers[il].ffn_sub_norm, NULL,
 | 
					 | 
				
			||||||
                                LLM_NORM_RMS, cb, il);
 | 
					 | 
				
			||||||
                cb(cur, "ffn_sub_norm", il);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                cur = ggml_mul_mat(ctx0, model.layers[il].ffn_down, cur);
 | 
					 | 
				
			||||||
                cur = ggml_mul(ctx0, cur, model.layers[il].ffn_down_scale);
 | 
					 | 
				
			||||||
                cb(cur, "ffn_down", il);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            cur = ggml_add(ctx0, cur, ffn_inp);
 | 
					            cur = ggml_add(ctx0, cur, ffn_inp);
 | 
				
			||||||
            cb(cur, "l_out", il);
 | 
					            cb(cur, "l_out", il);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user