mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-29 08:41:22 +00:00
Merge commit 'refs/pull/14417/head' of github.com:ggerganov/llama.cpp into xsn/ggml_scale_bias
This commit is contained in:
@@ -4670,17 +4670,17 @@ static void ggml_compute_forward_scale_f32(
|
|||||||
for (int i1 = ir0; i1 < ir1; i1++) {
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
||||||
if (dst->data != src0->data) {
|
if (dst->data != src0->data) {
|
||||||
// src0 is same shape as dst => same indices
|
// src0 is same shape as dst => same indices
|
||||||
|
// TODO: add x parameter to ggml_vec_scale_f32 and remove this memcpy
|
||||||
memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float));
|
memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float));
|
||||||
}
|
}
|
||||||
ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), s);
|
ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), s);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int i1 = ir0; i1 < ir1; i1++) {
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
||||||
if (dst->data != src0->data) {
|
ggml_vec_mad1_f32(nc,
|
||||||
// src0 is same shape as dst => same indices
|
(float *) ((char *) dst->data + i1*nb1),
|
||||||
memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float));
|
(float *) ((char *) src0->data + i1*nb1),
|
||||||
}
|
s, b);
|
||||||
ggml_vec_mad1_f32(nc, (float *) ((char *) dst->data + i1*nb1), s, b);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -351,14 +351,14 @@ inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, const float b) {
|
inline static void ggml_vec_mad1_f32(const int n, float * y, const float * x, const float s, const float b) {
|
||||||
#if defined(GGML_USE_ACCELERATE)
|
#if defined(GGML_USE_ACCELERATE)
|
||||||
vDSP_vsmsa(y, 1, &s, &b, y, 1, n);
|
vDSP_vsmsa(x, 1, &s, &b, y, 1, n);
|
||||||
#elif defined(GGML_SIMD)
|
#elif defined(GGML_SIMD)
|
||||||
#if defined(__ARM_FEATURE_SVE)
|
#if defined(__ARM_FEATURE_SVE)
|
||||||
// scalar ; TODO: Write SVE code
|
// scalar ; TODO: Write SVE code
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
y[i] = y[i]*s + b;
|
y[i] = x[i]*s + b;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
const int np = (n & ~(GGML_F32_STEP - 1));
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
||||||
@@ -370,7 +370,7 @@ inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, cons
|
|||||||
|
|
||||||
for (int i = 0; i < np; i += GGML_F32_STEP) {
|
for (int i = 0; i < np; i += GGML_F32_STEP) {
|
||||||
for (int j = 0; j < GGML_F32_ARR; j++) {
|
for (int j = 0; j < GGML_F32_ARR; j++) {
|
||||||
ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
|
ay[j] = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
|
||||||
ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb);
|
ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb);
|
||||||
|
|
||||||
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]);
|
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]);
|
||||||
@@ -379,13 +379,13 @@ inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, cons
|
|||||||
|
|
||||||
// leftovers
|
// leftovers
|
||||||
for (int i = np; i < n; ++i) {
|
for (int i = np; i < n; ++i) {
|
||||||
y[i] = y[i]*s + b;
|
y[i] = x[i]*s + b;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
// scalar
|
// scalar
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
y[i] = y[i]*s + b;
|
y[i] = x[i]*s + b;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user