From ebbad7796df3caf794587a168372cf589d665930 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Wed, 9 Jul 2025 14:11:53 +0200 Subject: [PATCH] add x param to ggml_vec_mad1_f32 --- ggml/src/ggml-cpu/ops.cpp | 10 +++++----- ggml/src/ggml-cpu/vec.h | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 5a07819038..fd77e9a6ab 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -4670,17 +4670,17 @@ static void ggml_compute_forward_scale_f32( for (int i1 = ir0; i1 < ir1; i1++) { if (dst->data != src0->data) { // src0 is same shape as dst => same indices + // TODO: add x parameter to ggml_vec_scale_f32 and remove this memcpy memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float)); } ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), s); } } else { for (int i1 = ir0; i1 < ir1; i1++) { - if (dst->data != src0->data) { - // src0 is same shape as dst => same indices - memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float)); - } - ggml_vec_mad1_f32(nc, (float *) ((char *) dst->data + i1*nb1), s, b); + ggml_vec_mad1_f32(nc, + (float *) ((char *) dst->data + i1*nb1), + (float *) ((char *) src0->data + i1*nb1), + s, b); } } } diff --git a/ggml/src/ggml-cpu/vec.h b/ggml/src/ggml-cpu/vec.h index 4652598ead..d18783a00a 100644 --- a/ggml/src/ggml-cpu/vec.h +++ b/ggml/src/ggml-cpu/vec.h @@ -351,14 +351,14 @@ inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int #endif } -inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, const float b) { +inline static void ggml_vec_mad1_f32(const int n, float * y, const float * x, const float s, const float b) { #if defined(GGML_USE_ACCELERATE) - vDSP_vsmsa(y, 1, &s, &b, y, 1, n); + vDSP_vsmsa(x, 1, &s, &b, y, 1, n); #elif defined(GGML_SIMD) #if defined(__ARM_FEATURE_SVE) // scalar ; TODO: Write SVE code for (int i = 0; i < n; ++i) { - y[i] = y[i]*s + b; + y[i] = x[i]*s + b; } #else const int np = (n & ~(GGML_F32_STEP - 1)); @@ -370,7 +370,7 @@ inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, cons for (int i = 0; i < np; i += GGML_F32_STEP) { for (int j = 0; j < GGML_F32_ARR; j++) { - ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR); + ay[j] = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR); ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb); GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]); @@ -379,13 +379,13 @@ inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, cons // leftovers for (int i = np; i < n; ++i) { - y[i] = y[i]*s + b; + y[i] = x[i]*s + b; } #endif #else // scalar for (int i = 0; i < n; ++i) { - y[i] = y[i]*s + b; + y[i] = x[i]*s + b; } #endif }