diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 421ebe9628..cdc753965c 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -14868,7 +14868,7 @@ struct llm_build_falcon_h1 : public llm_graph_context { // TODO: skip computing output earlier for unused tokens y = ggml_add(ctx0, y, ggml_mul(ctx0, x, model.layers[il].ssm_d)); - y = ggml_mul(ctx0, y, ggml_silu(ctx0, ggml_cont(ctx0, z))); + y = ggml_swiglu_split(ctx0, ggml_cont(ctx0, z), y); // grouped RMS norm if (model.layers[il].ssm_norm) {