mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
ggml-cpu : remove the weak alias trick (#14221)
This commit is contained in:
@@ -5,9 +5,7 @@
|
||||
#include "ggml-quants.h"
|
||||
#include "quants.h"
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include "apple-fallback.h"
|
||||
#endif
|
||||
#include "arch-fallback.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
@@ -42,12 +40,10 @@ void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
|
||||
void quantize_row_q8_0_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
||||
quantize_row_q8_0_ref(x, y, k);
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(quantize_row_q8_0)
|
||||
|
||||
void quantize_row_q8_1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
||||
quantize_row_q8_1_ref(x, y, k);
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(quantize_row_q8_1)
|
||||
|
||||
//
|
||||
// 2-6 bit quantization in super-blocks
|
||||
@@ -108,7 +104,6 @@ void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy,
|
||||
void quantize_row_q8_K_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
||||
quantize_row_q8_K_ref(x, y, k);
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(quantize_row_q8_K)
|
||||
|
||||
//===================================== Dot products =================================
|
||||
|
||||
@@ -147,7 +142,6 @@ void ggml_vec_dot_q4_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_0_q8_0)
|
||||
|
||||
// TODO: add WASM SIMD
|
||||
void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
@@ -185,7 +179,6 @@ void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_1_q8_1)
|
||||
|
||||
void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
const int qk = QK8_0;
|
||||
@@ -229,7 +222,6 @@ void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_0_q8_0)
|
||||
|
||||
void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
const int qk = QK8_1;
|
||||
@@ -273,7 +265,6 @@ void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_1_q8_1)
|
||||
|
||||
void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
const int qk = QK8_0;
|
||||
@@ -304,7 +295,6 @@ void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q8_0_q8_0)
|
||||
|
||||
void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(nrc == 1);
|
||||
@@ -357,7 +347,6 @@ void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq1_0_q8_K)
|
||||
|
||||
void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(nrc == 1);
|
||||
@@ -390,7 +379,6 @@ void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq2_0_q8_K)
|
||||
|
||||
void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(nrc == 1);
|
||||
@@ -443,7 +431,6 @@ void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
}
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q2_K_q8_K)
|
||||
|
||||
void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -523,7 +510,6 @@ void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
for (int l = 0; l < 8; ++l) sumf += sums[l];
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q3_K_q8_K)
|
||||
|
||||
void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -599,7 +585,6 @@ void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
for (int l = 0; l < 8; ++l) sumf += sums[l];
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_K_q8_K)
|
||||
|
||||
void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -680,7 +665,6 @@ void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
for (int l = 0; l < 8; ++l) sumf += sums[l];
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_K_q8_K)
|
||||
|
||||
void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -736,7 +720,6 @@ void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
||||
for (int l = 0; l < 8; ++l) sumf += sums[l];
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q6_K_q8_K)
|
||||
|
||||
void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -779,7 +762,6 @@ void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs
|
||||
}
|
||||
*s = 0.125f * sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xxs_q8_K)
|
||||
|
||||
void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -830,7 +812,6 @@ void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
||||
}
|
||||
*s = 0.125f * sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xs_q8_K)
|
||||
|
||||
void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -883,7 +864,6 @@ void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
||||
|
||||
*s = 0.125f * sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_s_q8_K)
|
||||
|
||||
void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -928,7 +908,6 @@ void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs
|
||||
}
|
||||
*s = 0.25f * sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_xxs_q8_K)
|
||||
|
||||
void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -985,7 +964,6 @@ void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
||||
}
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_s_q8_K)
|
||||
|
||||
void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -1029,7 +1007,6 @@ void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_s_q8_K)
|
||||
|
||||
void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(n % QK_K == 0);
|
||||
@@ -1091,7 +1068,6 @@ void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_m_q8_K)
|
||||
|
||||
void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(nrc == 1);
|
||||
@@ -1121,7 +1097,6 @@ void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
||||
}
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_nl_q8_0)
|
||||
|
||||
void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(nrc == 1);
|
||||
@@ -1168,7 +1143,6 @@ void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
||||
}
|
||||
*s = sumf;
|
||||
}
|
||||
GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_xs_q8_K)
|
||||
|
||||
// ============================ 4-bit non-linear quants
|
||||
|
||||
|
||||
Reference in New Issue
Block a user