ggml : support AVX512VNNI (#6280)

This change causes some quants (e.g. Q4_0, Q8_0) to go faster on some architectures (e.g. AMD Zen 4).
2025-10-28 08:31:25 +00:00 · 2024-03-25 01:39:56 -04:00
parent a32b77c4b2
commit 7733f0c760
1 changed files with 1 additions and 1 deletions
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -132,7 +132,7 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
 }

 static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) {
-#if __AVXVNNI__
+#if defined(__AVXVNNI__) || defined(__AVX512VNNI__)
    const __m256i zero = _mm256_setzero_si256();
    const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy);
    return _mm256_cvtepi32_ps(summed_pairs);