mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml-cpu : remove the weak alias trick (#14221)
This commit is contained in:
		| @@ -1,88 +0,0 @@ | |||||||
| #pragma once |  | ||||||
|  |  | ||||||
| // Solve alias issue for Apple targets (currently PowerPC, x86, and ARM64). |  | ||||||
| // Mach-O has a weak alias equivalent but no practical compiler support can |  | ||||||
| // be found, so we need to do it manually. |  | ||||||
| // ref: https://stackoverflow.com/questions/42757744 |  | ||||||
| // |  | ||||||
| // This file is a complement to native implementations in the `arch` folder. |  | ||||||
| // A kernel in quants.c or repack.cpp is either: |  | ||||||
| // - implemented in the `arch` folder, or |  | ||||||
| // - defined in this file to remove the `_generic` suffix |  | ||||||
|  |  | ||||||
| #if defined(GGML_CPU_GENERIC) |  | ||||||
| // quants.c |  | ||||||
| #define quantize_row_q8_0_generic quantize_row_q8_0 |  | ||||||
| #define quantize_row_q8_1_generic quantize_row_q8_1 |  | ||||||
| #define quantize_row_q8_K_generic quantize_row_q8_K |  | ||||||
| #define ggml_vec_dot_q4_0_q8_0_generic ggml_vec_dot_q4_0_q8_0 |  | ||||||
| #define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1 |  | ||||||
| #define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0 |  | ||||||
| #define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1 |  | ||||||
| #define ggml_vec_dot_q8_0_q8_0_generic ggml_vec_dot_q8_0_q8_0 |  | ||||||
| #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K |  | ||||||
| #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K |  | ||||||
| #define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K |  | ||||||
| #define ggml_vec_dot_q3_K_q8_K_generic ggml_vec_dot_q3_K_q8_K |  | ||||||
| #define ggml_vec_dot_q4_K_q8_K_generic ggml_vec_dot_q4_K_q8_K |  | ||||||
| #define ggml_vec_dot_q5_K_q8_K_generic ggml_vec_dot_q5_K_q8_K |  | ||||||
| #define ggml_vec_dot_q6_K_q8_K_generic ggml_vec_dot_q6_K_q8_K |  | ||||||
| #define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K |  | ||||||
| #define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K |  | ||||||
| #define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K |  | ||||||
| #define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K |  | ||||||
| #define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K |  | ||||||
| #define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K |  | ||||||
| #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K |  | ||||||
| #define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0 |  | ||||||
| #define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K |  | ||||||
| // repack.cpp |  | ||||||
| #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 |  | ||||||
| #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8 |  | ||||||
| #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 |  | ||||||
| #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 |  | ||||||
| #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 |  | ||||||
| #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0 |  | ||||||
| #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K |  | ||||||
| #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 |  | ||||||
| #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 |  | ||||||
| #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 |  | ||||||
| #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0 |  | ||||||
| #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K |  | ||||||
| #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 |  | ||||||
| #elif defined(__aarch64__) || defined(__arm__) |  | ||||||
| // repack.cpp |  | ||||||
| #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 |  | ||||||
| #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K |  | ||||||
| #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K |  | ||||||
| #elif defined(__x86_64__) || defined(__i386__) |  | ||||||
| // repack.cpp |  | ||||||
| #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 |  | ||||||
| #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 |  | ||||||
| #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 |  | ||||||
| #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 |  | ||||||
| #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 |  | ||||||
| #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 |  | ||||||
| #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 |  | ||||||
| #elif defined(__POWERPC__) |  | ||||||
| // ref: https://github.com/ggml-org/llama.cpp/pull/14146#issuecomment-2972561679 |  | ||||||
| // quants.c |  | ||||||
| #define quantize_row_q8_K_generic quantize_row_q8_K |  | ||||||
| #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K |  | ||||||
| #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K |  | ||||||
| #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K |  | ||||||
| // repack.cpp |  | ||||||
| #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 |  | ||||||
| #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8 |  | ||||||
| #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 |  | ||||||
| #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 |  | ||||||
| #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 |  | ||||||
| #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0 |  | ||||||
| #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K |  | ||||||
| #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 |  | ||||||
| #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 |  | ||||||
| #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 |  | ||||||
| #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0 |  | ||||||
| #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K |  | ||||||
| #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 |  | ||||||
| #endif |  | ||||||
							
								
								
									
										184
									
								
								ggml/src/ggml-cpu/arch-fallback.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										184
									
								
								ggml/src/ggml-cpu/arch-fallback.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,184 @@ | |||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | // Rename `_generic` functions if no native implementation is available. | ||||||
|  | // This effectively selects the generic implementation. | ||||||
|  |  | ||||||
|  | #if defined(GGML_CPU_GENERIC) | ||||||
|  | // quants.c | ||||||
|  | #define quantize_row_q8_0_generic quantize_row_q8_0 | ||||||
|  | #define quantize_row_q8_1_generic quantize_row_q8_1 | ||||||
|  | #define quantize_row_q8_K_generic quantize_row_q8_K | ||||||
|  | #define ggml_vec_dot_q4_0_q8_0_generic ggml_vec_dot_q4_0_q8_0 | ||||||
|  | #define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1 | ||||||
|  | #define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0 | ||||||
|  | #define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1 | ||||||
|  | #define ggml_vec_dot_q8_0_q8_0_generic ggml_vec_dot_q8_0_q8_0 | ||||||
|  | #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K | ||||||
|  | #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K | ||||||
|  | #define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K | ||||||
|  | #define ggml_vec_dot_q3_K_q8_K_generic ggml_vec_dot_q3_K_q8_K | ||||||
|  | #define ggml_vec_dot_q4_K_q8_K_generic ggml_vec_dot_q4_K_q8_K | ||||||
|  | #define ggml_vec_dot_q5_K_q8_K_generic ggml_vec_dot_q5_K_q8_K | ||||||
|  | #define ggml_vec_dot_q6_K_q8_K_generic ggml_vec_dot_q6_K_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K | ||||||
|  | #define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K | ||||||
|  | #define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0 | ||||||
|  | #define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K | ||||||
|  | // repack.cpp | ||||||
|  | #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 | ||||||
|  | #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8 | ||||||
|  | #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 | ||||||
|  | #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 | ||||||
|  | #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64) | ||||||
|  | // repack.cpp | ||||||
|  | #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 | ||||||
|  | #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K | ||||||
|  | #elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64) | ||||||
|  | // repack.cpp | ||||||
|  | #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 | ||||||
|  | #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 | ||||||
|  | #elif defined(__POWERPC__) || defined(__powerpc__) | ||||||
|  | // ref: https://github.com/ggml-org/llama.cpp/pull/14146#issuecomment-2972561679 | ||||||
|  | // quants.c | ||||||
|  | #define quantize_row_q8_K_generic quantize_row_q8_K | ||||||
|  | #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K | ||||||
|  | #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K | ||||||
|  | // repack.cpp | ||||||
|  | #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 | ||||||
|  | #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8 | ||||||
|  | #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 | ||||||
|  | #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 | ||||||
|  | #elif defined(__loongarch64) | ||||||
|  | // quants.c | ||||||
|  | #define quantize_row_q8_K_generic quantize_row_q8_K | ||||||
|  | #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K | ||||||
|  | #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K | ||||||
|  | // repack.cpp | ||||||
|  | #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 | ||||||
|  | #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8 | ||||||
|  | #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 | ||||||
|  | #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 | ||||||
|  | #elif defined(__riscv) | ||||||
|  | // quants.c | ||||||
|  | #define quantize_row_q8_K_generic quantize_row_q8_K | ||||||
|  | #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K | ||||||
|  | #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K | ||||||
|  | #define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K | ||||||
|  | #define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0 | ||||||
|  | #define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K | ||||||
|  | // repack.cpp | ||||||
|  | #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 | ||||||
|  | #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8 | ||||||
|  | #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 | ||||||
|  | #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 | ||||||
|  | #elif defined(__s390x__) | ||||||
|  | // quants.c | ||||||
|  | #define quantize_row_q8_K_generic quantize_row_q8_K | ||||||
|  | #define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0 | ||||||
|  | #define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1 | ||||||
|  | #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K | ||||||
|  | #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K | ||||||
|  | #define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K | ||||||
|  | #define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K | ||||||
|  | // repack.cpp | ||||||
|  | #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 | ||||||
|  | #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8 | ||||||
|  | #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 | ||||||
|  | #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 | ||||||
|  | #elif defined(__wasm__) | ||||||
|  | // quants.c | ||||||
|  | #define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1 | ||||||
|  | #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K | ||||||
|  | #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K | ||||||
|  | #define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K | ||||||
|  | #define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K | ||||||
|  | #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K | ||||||
|  | #define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0 | ||||||
|  | #define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K | ||||||
|  | // repack.cpp | ||||||
|  | #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 | ||||||
|  | #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8 | ||||||
|  | #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 | ||||||
|  | #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0 | ||||||
|  | #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K | ||||||
|  | #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 | ||||||
|  | #endif | ||||||
| @@ -509,28 +509,3 @@ int  ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value); | |||||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| #define GGML_DO_PRAGMA_(x) _Pragma (#x) |  | ||||||
| #define GGML_DO_PRAGMA(x) GGML_DO_PRAGMA_(x) |  | ||||||
| #if defined(GGML_CPU_GENERIC) || defined(__HIPCC__) || defined(__APPLE__) |  | ||||||
| // Note for Apple targets: |  | ||||||
| // - clang: aliases are not supported on darwin |  | ||||||
| // - all native kernels need to be implemented in both x86 and arm files |  | ||||||
| // - on iOS, tvOS, and visionOS, if cmake cannot determine the target architecture, all `_generic` names are replaced by defines |  | ||||||
| # define GGML_WEAK_ALIAS(name, alias) |  | ||||||
| #elif defined(__GNUC__) |  | ||||||
| // GCC/Clang on *nix |  | ||||||
| # define GGML_WEAK_ALIAS(name, alias) GGML_DO_PRAGMA(weak name = alias) // NOLINT |  | ||||||
| #elif defined(_MSC_VER) && defined(_WIN64) |  | ||||||
| // MSVC |  | ||||||
| // Note: C name mangling varies across different calling conventions |  | ||||||
| // see https://learn.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170 |  | ||||||
| # define GGML_WEAK_ALIAS(name, alias) GGML_DO_PRAGMA(comment(linker, "/alternatename:" #name "=" #alias)) |  | ||||||
| #elif defined(_MSC_VER) && defined(WIN32) |  | ||||||
| // ref: https://github.com/ggml-org/whisper.cpp/pull/3239#issuecomment-2958224591 |  | ||||||
| # define GGML_WEAK_ALIAS(name, alias) GGML_DO_PRAGMA(comment(linker, "/alternatename:_" #name "=_" #alias)) |  | ||||||
| #else |  | ||||||
| # error "Unsupported compiler for GGML_WEAK_ALIAS" |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #define GGML_CPU_NATIVE_IMPL(name) GGML_WEAK_ALIAS(name, name ## _generic) |  | ||||||
|   | |||||||
| @@ -5,9 +5,7 @@ | |||||||
| #include "ggml-quants.h" | #include "ggml-quants.h" | ||||||
| #include "quants.h" | #include "quants.h" | ||||||
|  |  | ||||||
| #if defined(__APPLE__) | #include "arch-fallback.h" | ||||||
| #include "apple-fallback.h" |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #include <string.h> | #include <string.h> | ||||||
| #include <assert.h> | #include <assert.h> | ||||||
| @@ -42,12 +40,10 @@ void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in | |||||||
| void quantize_row_q8_0_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) { | void quantize_row_q8_0_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) { | ||||||
|     quantize_row_q8_0_ref(x, y, k); |     quantize_row_q8_0_ref(x, y, k); | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(quantize_row_q8_0) |  | ||||||
|  |  | ||||||
| void quantize_row_q8_1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) { | void quantize_row_q8_1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) { | ||||||
|     quantize_row_q8_1_ref(x, y, k); |     quantize_row_q8_1_ref(x, y, k); | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(quantize_row_q8_1) |  | ||||||
|  |  | ||||||
| // | // | ||||||
| // 2-6 bit quantization in super-blocks | // 2-6 bit quantization in super-blocks | ||||||
| @@ -108,7 +104,6 @@ void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, | |||||||
| void quantize_row_q8_K_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) { | void quantize_row_q8_K_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) { | ||||||
|     quantize_row_q8_K_ref(x, y, k); |     quantize_row_q8_K_ref(x, y, k); | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(quantize_row_q8_K) |  | ||||||
|  |  | ||||||
| //===================================== Dot products ================================= | //===================================== Dot products ================================= | ||||||
|  |  | ||||||
| @@ -147,7 +142,6 @@ void ggml_vec_dot_q4_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_0_q8_0) |  | ||||||
|  |  | ||||||
| // TODO: add WASM SIMD | // TODO: add WASM SIMD | ||||||
| void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
| @@ -185,7 +179,6 @@ void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_1_q8_1) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     const int qk = QK8_0; |     const int qk = QK8_0; | ||||||
| @@ -229,7 +222,6 @@ void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_0_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     const int qk = QK8_1; |     const int qk = QK8_1; | ||||||
| @@ -273,7 +265,6 @@ void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_1_q8_1) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     const int qk = QK8_0; |     const int qk = QK8_0; | ||||||
| @@ -304,7 +295,6 @@ void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q8_0_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(nrc == 1); |     assert(nrc == 1); | ||||||
| @@ -357,7 +347,6 @@ void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq1_0_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(nrc == 1); |     assert(nrc == 1); | ||||||
| @@ -390,7 +379,6 @@ void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq2_0_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(nrc == 1); |     assert(nrc == 1); | ||||||
| @@ -443,7 +431,6 @@ void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|     } |     } | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q2_K_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -523,7 +510,6 @@ void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|     for (int l = 0; l < 8; ++l) sumf += sums[l]; |     for (int l = 0; l < 8; ++l) sumf += sums[l]; | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q3_K_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -599,7 +585,6 @@ void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|     for (int l = 0; l < 8; ++l) sumf += sums[l]; |     for (int l = 0; l < 8; ++l) sumf += sums[l]; | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_K_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy,  size_t by, int nrc) { | void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy,  size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -680,7 +665,6 @@ void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|     for (int l = 0; l < 8; ++l) sumf += sums[l]; |     for (int l = 0; l < 8; ++l) sumf += sums[l]; | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_K_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -736,7 +720,6 @@ void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c | |||||||
|     for (int l = 0; l < 8; ++l) sumf += sums[l]; |     for (int l = 0; l < 8; ++l) sumf += sums[l]; | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q6_K_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -779,7 +762,6 @@ void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs | |||||||
|     } |     } | ||||||
|     *s = 0.125f * sumf; |     *s = 0.125f * sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xxs_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -830,7 +812,6 @@ void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|     } |     } | ||||||
|     *s = 0.125f * sumf; |     *s = 0.125f * sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xs_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -883,7 +864,6 @@ void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|  |  | ||||||
|     *s = 0.125f * sumf; |     *s = 0.125f * sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_s_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -928,7 +908,6 @@ void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs | |||||||
|     } |     } | ||||||
|     *s = 0.25f * sumf; |     *s = 0.25f * sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_xxs_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -985,7 +964,6 @@ void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|     } |     } | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_s_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -1029,7 +1007,6 @@ void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_s_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(n % QK_K == 0); |     assert(n % QK_K == 0); | ||||||
| @@ -1091,7 +1068,6 @@ void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_m_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(nrc == 1); |     assert(nrc == 1); | ||||||
| @@ -1121,7 +1097,6 @@ void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|     } |     } | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_nl_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { | ||||||
|     assert(nrc == 1); |     assert(nrc == 1); | ||||||
| @@ -1168,7 +1143,6 @@ void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|     } |     } | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_xs_q8_K) |  | ||||||
|  |  | ||||||
| // ============================ 4-bit non-linear quants | // ============================ 4-bit non-linear quants | ||||||
|  |  | ||||||
|   | |||||||
| @@ -8,9 +8,7 @@ | |||||||
| #include "ggml-cpu-impl.h" | #include "ggml-cpu-impl.h" | ||||||
| #include "traits.h" | #include "traits.h" | ||||||
|  |  | ||||||
| #if defined(__APPLE__) | #include "arch-fallback.h" | ||||||
| #include "apple-fallback.h" |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #include <cmath> | #include <cmath> | ||||||
| #include <cstring> | #include <cstring> | ||||||
| @@ -87,7 +85,6 @@ void ggml_quantize_mat_q8_0_4x4_generic(const float * GGML_RESTRICT x, void * GG | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_quantize_mat_q8_0_4x4) |  | ||||||
|  |  | ||||||
| void ggml_quantize_mat_q8_0_4x8_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) { | void ggml_quantize_mat_q8_0_4x8_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) { | ||||||
|     assert(QK8_0 == 32); |     assert(QK8_0 == 32); | ||||||
| @@ -126,7 +123,6 @@ void ggml_quantize_mat_q8_0_4x8_generic(const float * GGML_RESTRICT x, void * GG | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_quantize_mat_q8_0_4x8) |  | ||||||
|  |  | ||||||
| void ggml_quantize_mat_q8_K_4x8_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) { | void ggml_quantize_mat_q8_K_4x8_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) { | ||||||
|     assert(QK_K == 256); |     assert(QK_K == 256); | ||||||
| @@ -178,7 +174,6 @@ void ggml_quantize_mat_q8_K_4x8_generic(const float * GGML_RESTRICT x, void * GG | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_quantize_mat_q8_K_4x8) |  | ||||||
|  |  | ||||||
| } // extern "C" | } // extern "C" | ||||||
|  |  | ||||||
| @@ -248,7 +243,6 @@ void ggml_gemv_q4_0_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|         for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j]; |         for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j]; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemv_q4_0_4x4_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_gemv_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | void ggml_gemv_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | ||||||
|     const int qk = QK8_0; |     const int qk = QK8_0; | ||||||
| @@ -293,7 +287,6 @@ void ggml_gemv_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|         for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j]; |         for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j]; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemv_q4_0_4x8_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_gemv_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | void ggml_gemv_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | ||||||
|     const int qk = QK8_0; |     const int qk = QK8_0; | ||||||
| @@ -340,7 +333,6 @@ void ggml_gemv_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemv_q4_0_8x8_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_gemv_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | void ggml_gemv_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | ||||||
|     const int qk = QK_K; |     const int qk = QK_K; | ||||||
| @@ -419,7 +411,6 @@ void ggml_gemv_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemv_q4_K_8x8_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_gemv_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | void ggml_gemv_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | ||||||
|     const int qk = QK8_0; |     const int qk = QK8_0; | ||||||
| @@ -466,7 +457,6 @@ void ggml_gemv_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemv_iq4_nl_4x4_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_gemm_q4_0_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | void ggml_gemm_q4_0_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | ||||||
|     const int qk = QK8_0; |     const int qk = QK8_0; | ||||||
| @@ -523,7 +513,6 @@ void ggml_gemm_q4_0_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemm_q4_0_4x4_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_gemm_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | void ggml_gemm_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | ||||||
|     const int qk = QK8_0; |     const int qk = QK8_0; | ||||||
| @@ -578,7 +567,6 @@ void ggml_gemm_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemm_q4_0_4x8_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | void ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | ||||||
|     const int qk = QK8_0; |     const int qk = QK8_0; | ||||||
| @@ -633,7 +621,6 @@ void ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemm_q4_0_8x8_q8_0) |  | ||||||
|  |  | ||||||
| void ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | void ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | ||||||
|     const int qk = QK_K; |     const int qk = QK_K; | ||||||
| @@ -723,7 +710,6 @@ void ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemm_q4_K_8x8_q8_K) |  | ||||||
|  |  | ||||||
| void ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | void ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { | ||||||
|     const int qk = QK8_0; |     const int qk = QK8_0; | ||||||
| @@ -780,7 +766,6 @@ void ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| GGML_CPU_NATIVE_IMPL(ggml_gemm_iq4_nl_4x4_q8_0) |  | ||||||
|  |  | ||||||
| } // extern "C" | } // extern "C" | ||||||
|  |  | ||||||
|   | |||||||
| @@ -64,10 +64,6 @@ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(ggml_half) + QK4_NL * 2, "wro | |||||||
| extern "C" { | extern "C" { | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| // Workaround for clang: |  | ||||||
| // clang++ complains: ``error: call to 'ggml_gemm_q4_0_4x4_q8_0' is ambiguous'' |  | ||||||
| // repro: https://godbolt.org/z/oKdeWKonM (ICE), https://godbolt.org/z/1szq6P36v (ambiguous call) |  | ||||||
| #if defined(GGML_CPU_CLANG_WORKAROUND) || defined(__APPLE__) || !(defined(__GNUC__) && defined(__clang__)) || defined(__HIPCC__) |  | ||||||
| void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); | void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); | ||||||
| void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); | void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); | ||||||
| void ggml_quantize_mat_q8_K_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); | void ggml_quantize_mat_q8_K_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); | ||||||
| @@ -81,7 +77,6 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo | |||||||
| void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); | void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); | ||||||
| void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); | void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); | ||||||
| void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); | void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); | ||||||
| #endif // !defined(__clang__) |  | ||||||
|  |  | ||||||
| // Native implementations | // Native implementations | ||||||
| void ggml_quantize_mat_q8_0_4x4_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); | void ggml_quantize_mat_q8_0_4x4_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 xctan
					xctan