From a88843aa10fcc220be21c9e430311062c56f6ef7 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Fri, 20 Jun 2025 21:01:37 +0800 Subject: [PATCH] ggml-cpu: switch fp16->fp32 to inline asm and test Signed-off-by: Aaron Teo --- ggml/src/ggml-impl.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index d28b3cce55..9b2c54c0b1 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -428,9 +428,17 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size); // TODO: Determine if inline assembly is faster static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { - uint16x8_t v_h = vec_splats(h); - uint16x8_t nnpa_dlf16 = vec_convert_from_fp16(v_h, 0); - return vec_extend_to_fp32_hi(nnpa_dlf16, 0)[0]; + float f; + __asm__ ( + "vlvgp %%v0, %1, %1\n" + "vreph %%v0, %%v0, 3\n" + "vcnf %%v0, %%v0, 0, 1\n" + "vclfnh %%v0, %%v0, 2, 0\n" + "ler %0, %%f0\n" : + /* out */ "=f"(f) : + /* in */ "r"(h) : + /* clobber */ "v0", "f0"); + return f; } // TODO: Determine if inline assembly is faster