imatrix : use GGUF regardless of the output filename

The legacy format can only be produced with --output-format dat
This commit is contained in:
Francis Couture-Harpin
2025-07-23 23:08:03 -04:00
parent 53f65c354e
commit 1ef3cc1a87
4 changed files with 14 additions and 17 deletions

View File

@@ -2629,10 +2629,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
add_opt(common_arg(
{"--output-format"}, "{gguf,dat}",
string_format("output format for imatrix file (default: gguf except when output filename ends with .dat)"),
string_format("output format for imatrix file (default: %s)", params.imat_dat ? "dat" : "gguf"),
[](common_params & params, const std::string & value) {
/**/ if (value == "gguf") { params.imat_out_type = COMMON_IMATRIX_FORMAT_GGUF; }
else if (value == "dat") { params.imat_out_type = COMMON_IMATRIX_FORMAT_DAT; }
/**/ if (value == "gguf") { params.imat_dat = false; }
else if (value == "dat") { params.imat_dat = true; }
else { throw std::invalid_argument("invalid output format"); }
}
).set_examples({LLAMA_EXAMPLE_IMATRIX}));

View File

@@ -233,12 +233,6 @@ enum common_reasoning_format {
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
};
enum common_imatrix_format_type {
COMMON_IMATRIX_FORMAT_AUTO,
COMMON_IMATRIX_FORMAT_GGUF,
COMMON_IMATRIX_FORMAT_DAT, // legacy
};
struct common_params {
int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 4096; // context size
@@ -437,7 +431,7 @@ struct common_params {
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
int32_t i_chunk = 0; // start processing from this chunk
common_imatrix_format_type imat_out_type = COMMON_IMATRIX_FORMAT_AUTO; // format of the output imatrix
bool imat_dat = false; // whether the legacy imatrix.dat format should be output
bool process_output = false; // collect data for the output tensor
bool compute_ppl = true; // whether to compute perplexity