ggml: move ggml_table_f32_f16 to ggml-cpu

ref: https://github.com/ggml-org/llama.cpp/pull/14317#discussion_r2164775006

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
Aaron Teo
2025-06-25 14:57:58 +08:00
parent 1b23fec005
commit 9e40d984ad
4 changed files with 5 additions and 25 deletions

View File

@@ -3479,6 +3479,7 @@ void ggml_cpu_init(void) {
ggml_fp16_t fp16;
} u = {i};
float f = GGML_CPU_FP16_TO_FP32(u.fp16);
ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
}

View File

@@ -137,6 +137,10 @@
}
#endif
// precomputed f32 table for f16 (256 KB)
// defined in ggml.c, initialized in ggml_init()
GGML_API float ggml_table_f32_f16[1 << 16];
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
// so we define GGML_CPU_FP16_TO_FP32 and GGML_CPU_FP32_TO_FP16 elsewhere for NEON.
// This is also true for POWER9.