mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml : fix q4xx mat mul, increase ggml_aligned_malloc alignment (#10167)
This commit is contained in:
		| @@ -304,6 +304,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = { | |||||||
|         .nrows                    = 1, |         .nrows                    = 1, | ||||||
|     }, |     }, | ||||||
|     [GGML_TYPE_Q8_0] = { |     [GGML_TYPE_Q8_0] = { | ||||||
|  |         .from_float_to_mat        = quantize_mat_q8_0, | ||||||
|         .vec_dot                  = ggml_vec_dot_q8_0_q8_0, |         .vec_dot                  = ggml_vec_dot_q8_0_q8_0, | ||||||
|         .vec_dot_type             = GGML_TYPE_Q8_0, |         .vec_dot_type             = GGML_TYPE_Q8_0, | ||||||
| #if defined (__ARM_FEATURE_MATMUL_INT8) | #if defined (__ARM_FEATURE_MATMUL_INT8) | ||||||
| @@ -13692,9 +13693,7 @@ void ggml_cpu_init(void) { | |||||||
|                     uint16_t u16; |                     uint16_t u16; | ||||||
|                     ggml_fp16_t fp16; |                     ggml_fp16_t fp16; | ||||||
|                 } u = {i}; |                 } u = {i}; | ||||||
|                 // FIXME: this table is used in conversion functions outside of compute |                 float f = GGML_FP16_TO_FP32(u.fp16); | ||||||
|                 // current code depends on ggml_init initializing this table |  | ||||||
|                 float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16); |  | ||||||
|                 ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f)); |                 ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f)); | ||||||
|                 ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f)); |                 ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f)); | ||||||
|             } |             } | ||||||
|   | |||||||
| @@ -220,8 +220,10 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi | |||||||
|  |  | ||||||
|  |  | ||||||
| void * ggml_aligned_malloc(size_t size) { | void * ggml_aligned_malloc(size_t size) { | ||||||
|  |     const int alignment = 64; | ||||||
|  |  | ||||||
| #if defined(_MSC_VER) || defined(__MINGW32__) | #if defined(_MSC_VER) || defined(__MINGW32__) | ||||||
|     return _aligned_malloc(size, TENSOR_ALIGNMENT); |     return _aligned_malloc(size, alignment); | ||||||
| #else | #else | ||||||
|     if (size == 0) { |     if (size == 0) { | ||||||
|         GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); |         GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); | ||||||
| @@ -229,8 +231,9 @@ void * ggml_aligned_malloc(size_t size) { | |||||||
|     } |     } | ||||||
|     void * aligned_memory = NULL; |     void * aligned_memory = NULL; | ||||||
|   #ifdef GGML_USE_CPU_HBM |   #ifdef GGML_USE_CPU_HBM | ||||||
|     int result = hbw_posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size); |     int result = hbw_posix_memalign(&aligned_memory, alignment, size); | ||||||
|   #elif TARGET_OS_OSX |   #elif TARGET_OS_OSX | ||||||
|  |     GGML_UNUSED(alignment); | ||||||
|     kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); |     kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); | ||||||
|     int result = EFAULT; |     int result = EFAULT; | ||||||
|     switch (alloc_status) { |     switch (alloc_status) { | ||||||
| @@ -248,7 +251,7 @@ void * ggml_aligned_malloc(size_t size) { | |||||||
|             break; |             break; | ||||||
|     } |     } | ||||||
|   #else |   #else | ||||||
|     int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size); |     int result = posix_memalign(&aligned_memory, alignment, size); | ||||||
|   #endif |   #endif | ||||||
|     if (result != 0) { |     if (result != 0) { | ||||||
|         // Handle allocation failure |         // Handle allocation failure | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Diego Devesa
					Diego Devesa