mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-03 09:22:01 +00:00
imatrix : use GGUF by default
Still uses the old format when the output filename ends with .dat but this can be overridden with --output-format
This commit is contained in:
@@ -2627,6 +2627,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
params.n_out_freq = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
||||
add_opt(common_arg(
|
||||
{"--output-format"}, "{gguf,dat}",
|
||||
string_format("output format for imatrix file (default: gguf except when output filename ends with .dat)"),
|
||||
[](common_params & params, const std::string & value) {
|
||||
/**/ if (value == "gguf") { params.imat_out_type = COMMON_IMATRIX_FORMAT_GGUF; }
|
||||
else if (value == "dat") { params.imat_out_type = COMMON_IMATRIX_FORMAT_DAT; }
|
||||
else { throw std::invalid_argument("invalid output format"); }
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
||||
add_opt(common_arg(
|
||||
{"--save-frequency"}, "N",
|
||||
string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq),
|
||||
|
||||
@@ -233,6 +233,12 @@ enum common_reasoning_format {
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
|
||||
};
|
||||
|
||||
enum common_imatrix_format_type {
|
||||
COMMON_IMATRIX_FORMAT_AUTO,
|
||||
COMMON_IMATRIX_FORMAT_GGUF,
|
||||
COMMON_IMATRIX_FORMAT_DAT, // legacy
|
||||
};
|
||||
|
||||
struct common_params {
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_ctx = 4096; // context size
|
||||
@@ -431,6 +437,7 @@ struct common_params {
|
||||
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
|
||||
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
|
||||
int32_t i_chunk = 0; // start processing from this chunk
|
||||
common_imatrix_format_type imat_out_type = COMMON_IMATRIX_FORMAT_AUTO; // format of the output imatrix
|
||||
|
||||
bool process_output = false; // collect data for the output tensor
|
||||
bool compute_ppl = true; // whether to compute perplexity
|
||||
|
||||
Reference in New Issue
Block a user