mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml: move ggml_table_f32_f16 to ggml-cpu
ref: https://github.com/ggml-org/llama.cpp/pull/14317#discussion_r2164775006 Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
		| @@ -3479,6 +3479,7 @@ void ggml_cpu_init(void) { | |||||||
|                     ggml_fp16_t fp16; |                     ggml_fp16_t fp16; | ||||||
|                 } u = {i}; |                 } u = {i}; | ||||||
|                 float f = GGML_CPU_FP16_TO_FP32(u.fp16); |                 float f = GGML_CPU_FP16_TO_FP32(u.fp16); | ||||||
|  |                 ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16); | ||||||
|                 ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f)); |                 ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f)); | ||||||
|                 ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f)); |                 ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f)); | ||||||
|             } |             } | ||||||
|   | |||||||
| @@ -137,6 +137,10 @@ | |||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | // precomputed f32 table for f16 (256 KB) | ||||||
|  | // defined in ggml.c, initialized in ggml_init() | ||||||
|  | GGML_API float ggml_table_f32_f16[1 << 16]; | ||||||
|  |  | ||||||
| // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32, | // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32, | ||||||
| // so we define GGML_CPU_FP16_TO_FP32 and GGML_CPU_FP32_TO_FP16 elsewhere for NEON. | // so we define GGML_CPU_FP16_TO_FP32 and GGML_CPU_FP32_TO_FP16 elsewhere for NEON. | ||||||
| // This is also true for POWER9. | // This is also true for POWER9. | ||||||
|   | |||||||
| @@ -393,10 +393,6 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { | |||||||
| #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x) | #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x) | ||||||
| #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) | #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) | ||||||
|  |  | ||||||
| // precomputed f32 table for f16 (256 KB) |  | ||||||
| // defined in ggml.c, initialized in ggml_init() |  | ||||||
| GGML_API float ggml_table_f32_f16[1 << 16]; |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Converts brain16 to float32. |  * Converts brain16 to float32. | ||||||
|  * |  * | ||||||
|   | |||||||
| @@ -1414,27 +1414,6 @@ static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const str | |||||||
| //////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
| struct ggml_context * ggml_init(struct ggml_init_params params) { | struct ggml_context * ggml_init(struct ggml_init_params params) { | ||||||
|     static bool is_first_call = true; |  | ||||||
|  |  | ||||||
|     ggml_critical_section_start(); |  | ||||||
|  |  | ||||||
|     if (is_first_call) { |  | ||||||
|         // initialize time system (required on Windows) |  | ||||||
|         ggml_time_init(); |  | ||||||
|  |  | ||||||
|         for (int i = 0; i < (1 << 16); ++i) { |  | ||||||
|             union { |  | ||||||
|                 uint16_t u16; |  | ||||||
|                 ggml_fp16_t fp16; |  | ||||||
|             } u = {i}; |  | ||||||
|             ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         is_first_call = false; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     ggml_critical_section_end(); |  | ||||||
|  |  | ||||||
|     struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context)); |     struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context)); | ||||||
|  |  | ||||||
|     // allow to call ggml_init with 0 size |     // allow to call ggml_init with 0 size | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Aaron Teo
					Aaron Teo