ggml_vec_mad1_f32

This commit is contained in:
Xuan Son Nguyen
2025-07-08 23:13:42 +02:00
parent 7af3fd98a1
commit a5ccf168f1
2 changed files with 31 additions and 4 deletions

View File

@@ -4671,10 +4671,7 @@ static void ggml_compute_forward_scale_f32(
// src0 is same shape as dst => same indices // src0 is same shape as dst => same indices
memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float)); memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float));
} }
ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), s); ggml_vec_mad1_f32(nc, (float *) ((char *) dst->data + i1*nb1), s, b);
if (b != 0.0f) {
ggml_vec_acc1_f32(nc, (float *) ((char *) dst->data + i1*nb1), b);
}
} }
} }

View File

@@ -351,6 +351,36 @@ inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int
#endif #endif
} }
inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, const float b) {
#if defined(GGML_SIMD)
const int np = (n & ~(GGML_F32_STEP - 1));
GGML_F32_VEC vs = GGML_F32_VEC_SET1(s);
GGML_F32_VEC vb = GGML_F32_VEC_SET1(b);
GGML_F32_VEC ay[GGML_F32_ARR];
for (int i = 0; i < np; i += GGML_F32_STEP) {
for (int j = 0; j < GGML_F32_ARR; j++) {
ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb);
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]);
}
}
// leftovers
for (int i = np; i < n; ++i) {
y[i] = y[i]*s + b;
}
#else
// scalar
for (int i = 0; i < n; ++i) {
y[i] *= y[i]*s + b;
}
#endif
}
//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; } //inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; }
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
#if defined(GGML_USE_ACCELERATE) #if defined(GGML_USE_ACCELERATE)