ggml-quants : Q2_2 now faster than Q4_K on with AVX2

2025-11-07 09:57:00 +00:00 · 2024-06-19 22:12:43 -04:00
parent 48b73b8498
commit ef1e345c85
3 changed files with 64 additions and 144 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -296,6 +296,8 @@ class Model:
                ))

                if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
+                    # TODO: cleaner model-specific per-tensor types
+                    # NOTE: Q1_3 is only relevant for BitNet 1.58b
                    if self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3 and not any(
                        self.match_model_tensor_name(new_name, key, None)
                        for key in [