convert : support non-mxfp4 HF model (#15153)

* convert : support non-mxfp4 HF model * rm redundant check * disable debug check
2025-10-27 08:21:30 +00:00 · 2025-08-07 23:26:03 +02:00
parent c4f53563df
commit 50aa938901
2 changed files with 17 additions and 6 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -999,7 +999,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
                new_size += llama_tensor_quantize_impl(new_type, f32_data_03, new_data_03, chunk_size, nrows, n_per_row, imatrix_03, workers, nthread_use);

                // TODO: temporary sanity check that the F16 -> MXFP4 is lossless
-#if 1
+#if 0
                if (new_type == GGML_TYPE_MXFP4) {
                    auto * x = f32_data_03;