mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
imatrix : use GGUF regardless of the output filename
The legacy format can only be produced with --output-format dat
This commit is contained in:
@@ -2629,10 +2629,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||||||
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--output-format"}, "{gguf,dat}",
|
{"--output-format"}, "{gguf,dat}",
|
||||||
string_format("output format for imatrix file (default: gguf except when output filename ends with .dat)"),
|
string_format("output format for imatrix file (default: %s)", params.imat_dat ? "dat" : "gguf"),
|
||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
/**/ if (value == "gguf") { params.imat_out_type = COMMON_IMATRIX_FORMAT_GGUF; }
|
/**/ if (value == "gguf") { params.imat_dat = false; }
|
||||||
else if (value == "dat") { params.imat_out_type = COMMON_IMATRIX_FORMAT_DAT; }
|
else if (value == "dat") { params.imat_dat = true; }
|
||||||
else { throw std::invalid_argument("invalid output format"); }
|
else { throw std::invalid_argument("invalid output format"); }
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
||||||
|
|||||||
@@ -233,12 +233,6 @@ enum common_reasoning_format {
|
|||||||
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
|
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
|
||||||
};
|
};
|
||||||
|
|
||||||
enum common_imatrix_format_type {
|
|
||||||
COMMON_IMATRIX_FORMAT_AUTO,
|
|
||||||
COMMON_IMATRIX_FORMAT_GGUF,
|
|
||||||
COMMON_IMATRIX_FORMAT_DAT, // legacy
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_params {
|
struct common_params {
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
int32_t n_ctx = 4096; // context size
|
int32_t n_ctx = 4096; // context size
|
||||||
@@ -437,7 +431,7 @@ struct common_params {
|
|||||||
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
|
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
|
||||||
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
|
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
|
||||||
int32_t i_chunk = 0; // start processing from this chunk
|
int32_t i_chunk = 0; // start processing from this chunk
|
||||||
common_imatrix_format_type imat_out_type = COMMON_IMATRIX_FORMAT_AUTO; // format of the output imatrix
|
bool imat_dat = false; // whether the legacy imatrix.dat format should be output
|
||||||
|
|
||||||
bool process_output = false; // collect data for the output tensor
|
bool process_output = false; // collect data for the output tensor
|
||||||
bool compute_ppl = true; // whether to compute perplexity
|
bool compute_ppl = true; // whether to compute perplexity
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ The parameters in square brackets are optional and have the following meaning:
|
|||||||
* `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`.
|
* `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`.
|
||||||
* `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used.
|
* `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used.
|
||||||
* `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks)
|
* `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks)
|
||||||
* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf" unless the output filename ends with `.dat`.
|
* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf".
|
||||||
* `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never)
|
* `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never)
|
||||||
* `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default.
|
* `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default.
|
||||||
* `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets.
|
* `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets.
|
||||||
@@ -46,14 +46,19 @@ Recent versions of `llama-imatrix` store data in GGUF format by default. For the
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# generate and save the imatrix using legacy format
|
# generate and save the imatrix using legacy format
|
||||||
./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt -o imatrix-legcy-format.dat -ngl 99
|
./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt --output-format dat -o imatrix-legcy-format.dat -ngl 99
|
||||||
```
|
```
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# covert legacy (binary) imatrix format to new (GGUF) format
|
# convert legacy (binary) imatrix format to new (GGUF) format
|
||||||
./llama-imatrix --in-file imatrix-legacy-format.dat -o imatrix-new-format.gguf
|
./llama-imatrix --in-file imatrix-legacy-format.dat -o imatrix-new-format.gguf
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# convert new (GGUF) imatrix format to legacy (binary) format
|
||||||
|
./llama-imatrix --in-file imatrix-new-format.gguf --output-format dat -o imatrix-legacy-format.dat
|
||||||
|
```
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# combine existing imatrices
|
# combine existing imatrices
|
||||||
./llama-imatrix --in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf -o imatrix-combined.gguf
|
./llama-imatrix --in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf -o imatrix-combined.gguf
|
||||||
|
|||||||
@@ -492,11 +492,9 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const {
|
|||||||
|
|
||||||
void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
|
void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
|
||||||
auto fname = m_params.out_file;
|
auto fname = m_params.out_file;
|
||||||
auto imat_type = m_params.imat_out_type;
|
bool use_legacy_format = m_params.imat_dat;
|
||||||
|
|
||||||
if ((imat_type == COMMON_IMATRIX_FORMAT_AUTO && string_ends_with(fname, ".dat")) ||
|
if (use_legacy_format) {
|
||||||
(imat_type == COMMON_IMATRIX_FORMAT_DAT)) {
|
|
||||||
LOG_WRN("\n%s: saving to legacy imatrix format\n", __func__);
|
|
||||||
this->save_imatrix_legacy(n_chunk);
|
this->save_imatrix_legacy(n_chunk);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user