mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	imatrix : use GGUF by default (#14842)
* imatrix : use GGUF by default * imatrix : use GGUF regardless of the output filename The legacy format can only be produced with --output-format dat
This commit is contained in:
		| @@ -2647,6 +2647,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | |||||||
|             params.n_out_freq = value; |             params.n_out_freq = value; | ||||||
|         } |         } | ||||||
|     ).set_examples({LLAMA_EXAMPLE_IMATRIX})); |     ).set_examples({LLAMA_EXAMPLE_IMATRIX})); | ||||||
|  |     add_opt(common_arg( | ||||||
|  |         {"--output-format"}, "{gguf,dat}", | ||||||
|  |         string_format("output format for imatrix file (default: %s)", params.imat_dat ? "dat" : "gguf"), | ||||||
|  |         [](common_params & params, const std::string & value) { | ||||||
|  |             /**/ if (value == "gguf") { params.imat_dat = false; } | ||||||
|  |             else if (value == "dat")  { params.imat_dat = true;  } | ||||||
|  |             else { throw std::invalid_argument("invalid output format"); } | ||||||
|  |         } | ||||||
|  |     ).set_examples({LLAMA_EXAMPLE_IMATRIX})); | ||||||
|     add_opt(common_arg( |     add_opt(common_arg( | ||||||
|         {"--save-frequency"}, "N", |         {"--save-frequency"}, "N", | ||||||
|         string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq), |         string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq), | ||||||
|   | |||||||
| @@ -439,6 +439,7 @@ struct common_params { | |||||||
|     int32_t n_out_freq  = 10; // output the imatrix every n_out_freq iterations |     int32_t n_out_freq  = 10; // output the imatrix every n_out_freq iterations | ||||||
|     int32_t n_save_freq =  0; // save the imatrix every n_save_freq iterations |     int32_t n_save_freq =  0; // save the imatrix every n_save_freq iterations | ||||||
|     int32_t i_chunk     =  0; // start processing from this chunk |     int32_t i_chunk     =  0; // start processing from this chunk | ||||||
|  |     bool    imat_dat    = false; // whether the legacy imatrix.dat format should be output | ||||||
|  |  | ||||||
|     bool process_output  = false; // collect data for the output tensor |     bool process_output  = false; // collect data for the output tensor | ||||||
|     bool compute_ppl     = true;  // whether to compute perplexity |     bool compute_ppl     = true;  // whether to compute perplexity | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ More information is available in <https://github.com/ggml-org/llama.cpp/pull/486 | |||||||
|  |  | ||||||
| ``` | ``` | ||||||
| ./llama-imatrix \ | ./llama-imatrix \ | ||||||
|     -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \ |     -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \ | ||||||
|     [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \ |     [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \ | ||||||
|     [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \ |     [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \ | ||||||
|     [--show-statistics] [...] |     [--show-statistics] [...] | ||||||
| @@ -20,6 +20,7 @@ The parameters in square brackets are optional and have the following meaning: | |||||||
| * `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`. | * `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`. | ||||||
| * `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used. | * `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used. | ||||||
| * `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks) | * `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks) | ||||||
|  | * `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf". | ||||||
| * `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never) | * `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never) | ||||||
| * `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default. | * `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default. | ||||||
| * `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets. | * `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets. | ||||||
| @@ -45,14 +46,19 @@ Recent versions of `llama-imatrix` store data in GGUF format by default. For the | |||||||
|  |  | ||||||
| ```bash | ```bash | ||||||
| # generate and save the imatrix using legacy format | # generate and save the imatrix using legacy format | ||||||
| ./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt -o imatrix-legcy-format.dat -ngl 99 | ./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt --output-format dat -o imatrix-legcy-format.dat -ngl 99 | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| ```bash | ```bash | ||||||
| # covert legacy (binary) imatrix format to new (GGUF) format | # convert legacy (binary) imatrix format to new (GGUF) format | ||||||
| ./llama-imatrix --in-file imatrix-legacy-format.dat -o imatrix-new-format.gguf | ./llama-imatrix --in-file imatrix-legacy-format.dat -o imatrix-new-format.gguf | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|  | ```bash | ||||||
|  | # convert new (GGUF) imatrix format to legacy (binary) format | ||||||
|  | ./llama-imatrix --in-file imatrix-new-format.gguf --output-format dat -o imatrix-legacy-format.dat | ||||||
|  | ``` | ||||||
|  |  | ||||||
| ```bash | ```bash | ||||||
| # combine existing imatrices | # combine existing imatrices | ||||||
| ./llama-imatrix --in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf -o imatrix-combined.gguf | ./llama-imatrix --in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf -o imatrix-combined.gguf | ||||||
|   | |||||||
| @@ -26,7 +26,7 @@ | |||||||
| static void print_usage(int, char ** argv) { | static void print_usage(int, char ** argv) { | ||||||
|     LOG("\nexample usage:\n"); |     LOG("\nexample usage:\n"); | ||||||
|     LOG("\n    %s \\\n" |     LOG("\n    %s \\\n" | ||||||
|             "       -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \\\n" |             "       -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n" | ||||||
|             "       [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n" |             "       [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n" | ||||||
|             "       [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n" |             "       [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n" | ||||||
|             "       [--show-statistics] [...]\n" , argv[0]); |             "       [--show-statistics] [...]\n" , argv[0]); | ||||||
| @@ -506,13 +506,13 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const { | |||||||
|  |  | ||||||
| void IMatrixCollector::save_imatrix(int32_t n_chunk) const { | void IMatrixCollector::save_imatrix(int32_t n_chunk) const { | ||||||
|     auto fname = m_params.out_file; |     auto fname = m_params.out_file; | ||||||
|  |     bool use_legacy_format = m_params.imat_dat; | ||||||
|  |  | ||||||
|     // TODO: use the new format in more cases |     if (use_legacy_format) { | ||||||
|     if (!string_ends_with(fname, ".gguf")) { |  | ||||||
|         LOG_WRN("\n%s: saving to legacy imatrix format because output suffix is not .gguf\n", __func__); |  | ||||||
|         this->save_imatrix_legacy(n_chunk); |         this->save_imatrix_legacy(n_chunk); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |     // else, default to GGUF imatrix | ||||||
|  |  | ||||||
|     if (n_chunk > 0) { |     if (n_chunk > 0) { | ||||||
|         fname += ".at_"; |         fname += ".at_"; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 compilade
					compilade