mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml : refactor forward_dup for cpu backend (#16062)
* ggml : refactor forward_dup for cpu backend * clean up a bit * add quant/dequant perf test
This commit is contained in:
		| @@ -28,6 +28,14 @@ static inline float bf16_to_f32(ggml_bf16_t x) { | ||||
|     return GGML_BF16_TO_FP32(x); | ||||
| } | ||||
|  | ||||
| static inline float i32_to_f32(int32_t x) { | ||||
|     return x; | ||||
| } | ||||
|  | ||||
| static inline int32_t f32_to_i32(float x) { | ||||
|     return x; | ||||
| } | ||||
|  | ||||
| static inline float f32_to_f32(float x) { | ||||
|     return x; | ||||
| } | ||||
| @@ -54,6 +62,12 @@ struct type_conversion_table<ggml_bf16_t> { | ||||
|     static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16; | ||||
| }; | ||||
|  | ||||
| template <> | ||||
| struct type_conversion_table<int32_t> { | ||||
|     static constexpr float (*to_f32)(int32_t) = i32_to_f32; | ||||
|     static constexpr int32_t (*from_f32)(float) = f32_to_i32; | ||||
| }; | ||||
|  | ||||
| static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) { | ||||
|     const int64_t ith = params->ith; | ||||
|     const int64_t nth = params->nth; | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -6632,6 +6632,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() { | ||||
|     test_cases.emplace_back(new test_cpy(GGML_TYPE_F32,  GGML_TYPE_F16,  {512, 3072, 1, 1})); | ||||
|     test_cases.emplace_back(new test_cpy(GGML_TYPE_F32,  GGML_TYPE_F32,  {8192, 512, 2, 1}, {0, 2, 1, 3})); | ||||
|     test_cases.emplace_back(new test_cpy(GGML_TYPE_F32,  GGML_TYPE_F32,  {3072, 512, 2, 1}, {0, 2, 1, 3})); | ||||
|     test_cases.emplace_back(new test_cpy(GGML_TYPE_F32,  GGML_TYPE_Q4_0, {8192, 512, 2, 1})); | ||||
|     test_cases.emplace_back(new test_cpy(GGML_TYPE_Q4_0, GGML_TYPE_F32,  {8192, 512, 2, 1})); | ||||
|  | ||||
|     test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); | ||||
|     test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {12888, 256, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Xuan-Son Nguyen
					Xuan-Son Nguyen