imatrix : use GGUF by default

Still uses the old format when the output filename ends with .dat but this can be overridden with --output-format
2025-11-03 09:22:01 +00:00 · 2025-07-23 21:33:53 -04:00
parent a12363bbf0
commit 53f65c354e
4 changed files with 24 additions and 5 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2627,6 +2627,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
            params.n_out_freq = value;
        }
    ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
+    add_opt(common_arg(
+        {"--output-format"}, "{gguf,dat}",
+        string_format("output format for imatrix file (default: gguf except when output filename ends with .dat)"),
+        [](common_params & params, const std::string & value) {
+            /**/ if (value == "gguf") { params.imat_out_type = COMMON_IMATRIX_FORMAT_GGUF; }
+            else if (value == "dat")  { params.imat_out_type = COMMON_IMATRIX_FORMAT_DAT;  }
+            else { throw std::invalid_argument("invalid output format"); }
+        }
+    ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
    add_opt(common_arg(
        {"--save-frequency"}, "N",
        string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq),
--- a/common/common.h
+++ b/common/common.h
@@ -233,6 +233,12 @@ enum common_reasoning_format {
    COMMON_REASONING_FORMAT_DEEPSEEK,        // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
 };

+enum common_imatrix_format_type {
+    COMMON_IMATRIX_FORMAT_AUTO,
+    COMMON_IMATRIX_FORMAT_GGUF,
+    COMMON_IMATRIX_FORMAT_DAT,  // legacy
+};
+
 struct common_params {
    int32_t n_predict             =    -1; // new tokens to predict
    int32_t n_ctx                 =  4096; // context size
@@ -431,6 +437,7 @@ struct common_params {
    int32_t n_out_freq  = 10; // output the imatrix every n_out_freq iterations
    int32_t n_save_freq =  0; // save the imatrix every n_save_freq iterations
    int32_t i_chunk     =  0; // start processing from this chunk
+    common_imatrix_format_type imat_out_type = COMMON_IMATRIX_FORMAT_AUTO; // format of the output imatrix

    bool process_output  = false; // collect data for the output tensor
    bool compute_ppl     = true;  // whether to compute perplexity