ggml-cpu : add RISC-V vector intrinsic support for silu and cvar operations (#17227)

Signed-off-by: Wang Yang <yangwang@iscas.ac.cn>
This commit is contained in:
ixgbe
2025-11-13 20:13:32 +08:00
committed by GitHub
parent 0cfb19166b
commit 1215dde7b0

View File

@@ -360,6 +360,13 @@ void ggml_vec_silu_f32(const int n, float * y, const float * x) {
for (; i + 3 < n; i += 4) {
vst1q_f32(y + i, ggml_v_silu(vld1q_f32(x + i)));
}
#elif defined(__riscv_v_intrinsic)
for (int vl; i < n; i += vl) {
vl = __riscv_vsetvl_e32m2(n - i);
vfloat32m2_t vx = __riscv_vle32_v_f32m2(&x[i], vl);
vfloat32m2_t vy = ggml_v_silu_m2(vx, vl);
__riscv_vse32_v_f32m2(&y[i], vy, vl);
}
#endif
for (; i < n; ++i) {
y[i] = ggml_silu_f32(x[i]);
@@ -460,6 +467,16 @@ ggml_float ggml_vec_cvar_f32(const int n, float * y, const float * x, const floa
val = vec_mul(val, val);
sum += (ggml_float)vec_hsum_f32x4(val);
}
#elif defined(__riscv_v_intrinsic)
vfloat64m1_t vsum = __riscv_vfmv_v_f_f64m1(0, 1);
for (int vl; i < n; i += vl) {
vl = __riscv_vsetvl_e32m2(n - i);
vfloat32m2_t val = __riscv_vfsub_vf_f32m2(__riscv_vle32_v_f32m2(&x[i], vl), mean, vl);
__riscv_vse32_v_f32m2(&y[i], val, vl);
val = __riscv_vfmul_vv_f32m2(val, val, vl);
vsum = __riscv_vfwredusum_vs_f32m2_f64m1(val, vsum, vl);
}
sum = (ggml_float)__riscv_vfmv_f_s_f64m1_f64(vsum);
#endif
for (; i < n; ++i) {
float val = x[i] - mean;