mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml : simplify Arm fp16 CPU logic (ggml/1177)
* ggml : simlpify Arm fp16 CPU logic ggml-ci * cont : bring back CUDA/MUSA checks ggml-ci
This commit is contained in:
		| @@ -16,14 +16,6 @@ | ||||
| #include <arm_sve.h> | ||||
| #endif // __ARM_FEATURE_SVE | ||||
|  | ||||
| #if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__) | ||||
| // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example: | ||||
| // | ||||
| //   $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ | ||||
| // | ||||
| #include <arm_neon.h> | ||||
| #endif | ||||
|  | ||||
| #if defined(__F16C__) | ||||
| #include <immintrin.h> | ||||
| #endif | ||||
| @@ -311,29 +303,35 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size); | ||||
|  | ||||
| // FP16 to FP32 conversion | ||||
|  | ||||
| #if defined(__ARM_NEON) | ||||
|     #if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) | ||||
|         typedef uint16_t ggml_fp16_internal_t; | ||||
|     #else | ||||
|         typedef __fp16 ggml_fp16_internal_t; | ||||
|     #endif | ||||
| #endif | ||||
| // 16-bit float | ||||
| // on Arm, we use __fp16 | ||||
| // on x86, we use uint16_t | ||||
| // | ||||
| // for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616 | ||||
| // for     MUSA compilers        , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843 | ||||
| // | ||||
| #if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__) | ||||
|  | ||||
|     // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example: | ||||
|     // | ||||
|     //   $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ | ||||
|     // | ||||
|     #include <arm_neon.h> | ||||
|  | ||||
| #if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) | ||||
|     #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) | ||||
|     #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) | ||||
|  | ||||
|     #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) | ||||
|  | ||||
|     static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { | ||||
|         ggml_fp16_internal_t tmp; | ||||
|         __fp16 tmp; | ||||
|         memcpy(&tmp, &h, sizeof(ggml_fp16_t)); | ||||
|         return (float)tmp; | ||||
|     } | ||||
|  | ||||
|     static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { | ||||
|         ggml_fp16_t res; | ||||
|         ggml_fp16_internal_t tmp = f; | ||||
|         __fp16 tmp = f; | ||||
|         memcpy(&res, &tmp, sizeof(ggml_fp16_t)); | ||||
|         return res; | ||||
|     } | ||||
| @@ -485,7 +483,7 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size); | ||||
|     #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) | ||||
|     #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) | ||||
|  | ||||
| #endif // defined(__ARM_NEON) && (!defined(__MSC_VER) | ||||
| #endif // defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__) | ||||
|  | ||||
| // precomputed f32 table for f16 (256 KB) | ||||
| // defined in ggml.c, initialized in ggml_init() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov