Implement GGML_CPU_ALL_VARIANTS for ARM (#14080)

* ggml-cpu: Factor out feature detection build from x86

* ggml-cpu: Add ARM feature detection and scoring

This is analogous to cpu-feats-x86.cpp. However, to detect compile-time
activation of features, we rely on GGML_USE_<FEAT> which need to be set
in cmake, instead of GGML_<FEAT> that users would set for x86.

This is because on ARM, users specify features with GGML_CPU_ARM_ARCH,
rather than with individual flags.

* ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for ARM

Like x86, however to pass around arch flags within cmake, we use
GGML_INTERNAL_<FEAT> as we don't have GGML_<FEAT>.

Some features are optional, so we may need to build multiple backends
per arch version (armv8.2_1, armv8.2_2, ...), and let the scoring
function sort out which one can be used.

* ggml-cpu: Limit ARM GGML_CPU_ALL_VARIANTS to Linux for now

The other platforms will need their own specific variants.

This also fixes the bug that the the variant-building branch was always
being executed as the else-branch of GGML_NATIVE=OFF. The branch is
moved to an elseif-branch which restores the previous behavior.
This commit is contained in:
Christian Kastner
2025-06-11 19:07:44 +00:00
committed by GitHub
parent d4e0d95cf5
commit 532802f938
3 changed files with 183 additions and 23 deletions

View File

@@ -0,0 +1,94 @@
#include "ggml-backend-impl.h"
#if defined(__aarch64__)
#if defined(__linux__)
#include <sys/auxv.h>
#elif defined(__APPLE__)
#include <sys/sysctl.h>
#endif
#if !defined(HWCAP2_I8MM)
#define HWCAP2_I8MM (1 << 13)
#endif
#if !defined(HWCAP2_SME)
#define HWCAP2_SME (1 << 23)
#endif
struct aarch64_features {
// has_neon not needed, aarch64 has NEON guaranteed
bool has_dotprod = false;
bool has_fp16_va = false;
bool has_sve = false;
bool has_sve2 = false;
bool has_i8mm = false;
bool has_sme = false;
aarch64_features() {
#if defined(__linux__)
uint32_t hwcap = getauxval(AT_HWCAP);
uint32_t hwcap2 = getauxval(AT_HWCAP2);
has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
has_fp16_va = !!(hwcap & HWCAP_FPHP);
has_sve = !!(hwcap & HWCAP_SVE);
has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
has_sme = !!(hwcap2 & HWCAP2_SME);
#elif defined(__APPLE__)
int oldp = 0;
size_t size = sizeof(oldp);
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) {
has_dotprod = static_cast<bool>(oldp);
}
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) {
has_i8mm = static_cast<bool>(oldp);
}
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) {
has_sme = static_cast<bool>(oldp);
}
// Apple apparently does not implement SVE yet
#endif
}
};
static int ggml_backend_cpu_aarch64_score() {
int score = 1;
aarch64_features af;
#ifdef GGML_USE_DOTPROD
if (!af.has_dotprod) { return 0; }
score += 1<<1;
#endif
#ifdef GGML_USE_FP16_VECTOR_ARITHMETIC
if (!af.has_fp16_va) { return 0; }
score += 1<<2;
#endif
#ifdef GGML_USE_SVE
if (!af.has_sve) { return 0; }
score += 1<<3;
#endif
#ifdef GGML_USE_MATMUL_INT8
if (!af.has_i8mm) { return 0; }
score += 1<<4;
#endif
#ifdef GGML_USE_SVE2
if (!af.has_sve2) { return 0; }
score += 1<<5;
#endif
#ifdef GGML_USE_SME
if (!af.has_sme) { return 0; }
score += 1<<6;
#endif
return score;
}
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_aarch64_score)
# endif // defined(__aarch64__)