ggml : fix GGML_F32_VEC_FMA argument order in ggml_vec_mad1_f32 (#16307)

* fix GGML_F32_VEC_FMA argument order in ggml_vec_mad1_f32 * add test that fails on simd
2025-10-27 08:21:30 +00:00 · 2025-09-28 23:15:03 +02:00
parent bd0af02fc9
commit b887d2f341
2 changed files with 2 additions and 1 deletions
--- a/ggml/src/ggml-cpu/vec.h
+++ b/ggml/src/ggml-cpu/vec.h
@@ -610,7 +610,7 @@ inline static void ggml_vec_mad1_f32(const int n, float * y, const float * x, co
        for (int i = 0; i < np; i += GGML_F32_STEP) {
            for (int j = 0; j < GGML_F32_ARR; j++) {
                ay[j] = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
-                ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb);
+                ay[j] = GGML_F32_VEC_FMA(vb, ay[j], vs);

                GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]);
            }
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -6155,6 +6155,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
    test_cases.emplace_back(new test_add1());
    test_cases.emplace_back(new test_scale());
    test_cases.emplace_back(new test_scale(GGML_TYPE_F32, {10, 10, 10, 10}, 2.0f, 1.0f));
+    test_cases.emplace_back(new test_scale(GGML_TYPE_F32, {100, 10, 10, 10}, 2.0f, 1.0f));
    test_cases.emplace_back(new test_softcap(GGML_TYPE_F32, {10, 10, 10, 10}, 50.0f));
    test_cases.emplace_back(new test_silu_back());