mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	cpu: de-duplicate some of the operators and refactor (ggml/1144)
* cpu: de-duplicate some of the operators and refactor * Fix PR comments * Fix PR comments
This commit is contained in:
		
							
								
								
									
										72
									
								
								ggml/src/ggml-cpu/common.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								ggml/src/ggml-cpu/common.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,72 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include "ggml.h"
 | 
			
		||||
#include "ggml-cpu-traits.h"
 | 
			
		||||
#include "ggml-cpu-impl.h"
 | 
			
		||||
#include "ggml-impl.h"
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
 | 
			
		||||
#include <utility>
 | 
			
		||||
 | 
			
		||||
// convenience functions/macros for use in template calls
 | 
			
		||||
// note: these won't be required after the 'traits' lookup table is used.
 | 
			
		||||
static inline ggml_fp16_t f32_to_f16(float x) {
 | 
			
		||||
    return GGML_FP32_TO_FP16(x);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline float f16_to_f32(ggml_fp16_t x) {
 | 
			
		||||
    return GGML_FP16_TO_FP32(x);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline ggml_bf16_t f32_to_bf16(float x) {
 | 
			
		||||
    return GGML_FP32_TO_BF16(x);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline float bf16_to_f32(ggml_bf16_t x) {
 | 
			
		||||
    return GGML_BF16_TO_FP32(x);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline float f32_to_f32(float x) {
 | 
			
		||||
    return x;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TODO - merge this into the traits table, after using row-based conversions
 | 
			
		||||
template <class T>
 | 
			
		||||
struct type_conversion_table;
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
struct type_conversion_table<ggml_fp16_t> {
 | 
			
		||||
    static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
 | 
			
		||||
    static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
struct type_conversion_table<float> {
 | 
			
		||||
    static constexpr float (*to_f32)(float) = f32_to_f32;
 | 
			
		||||
    static constexpr float (*from_f32)(float) = f32_to_f32;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
struct type_conversion_table<ggml_bf16_t> {
 | 
			
		||||
    static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
 | 
			
		||||
    static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
 | 
			
		||||
    const int64_t ith = params->ith;
 | 
			
		||||
    const int64_t nth = params->nth;
 | 
			
		||||
 | 
			
		||||
    const int64_t nr  = ggml_nrows(src0);
 | 
			
		||||
 | 
			
		||||
    // rows per thread
 | 
			
		||||
    const int64_t dr = (nr + nth - 1)/nth;
 | 
			
		||||
 | 
			
		||||
    // row range for this thread
 | 
			
		||||
    const int64_t ir0 = dr*ith;
 | 
			
		||||
    const int64_t ir1 = MIN(ir0 + dr, nr);
 | 
			
		||||
 | 
			
		||||
    return {ir0, ir1};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
		Reference in New Issue
	
	Block a user