mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-16 11:27:03 +00:00
injected mup
This commit is contained in:
@@ -545,10 +545,6 @@ ggml_tensor * llm_graph_context::build_ffn(
|
||||
case LLM_FFN_PAR:
|
||||
{
|
||||
cur = build_lora_mm(gate, cur);
|
||||
if (arch == LLM_ARCH_FALCON_H1) {
|
||||
cur = ggml_scale(ctx0, cur, hparams.mlp_gate_multiplier);
|
||||
}
|
||||
|
||||
cb(cur, "ffn_gate", il);
|
||||
} break;
|
||||
}
|
||||
@@ -635,9 +631,6 @@ ggml_tensor * llm_graph_context::build_ffn(
|
||||
// GLM4 seems to have numerical issues with half-precision accumulators
|
||||
ggml_mul_mat_set_prec(cur, GGML_PREC_F32);
|
||||
}
|
||||
if (arch == LLM_ARCH_FALCON_H1) {
|
||||
cur = ggml_scale(ctx0, cur, hparams.mlp_down_multiplier);
|
||||
}
|
||||
}
|
||||
|
||||
if (down_b) {
|
||||
|
||||
Reference in New Issue
Block a user