gguf: gguf_writer refactor (#15691)

* gguf: split gguf writer into base and buf impl
* gguf: templated gguf write out
* gguf: file based writer (avoid writing everything to memory first!)
* examples(llama2c): fix log not being the same level and compiler nits
This commit is contained in:
Erik Scholz
2025-09-05 11:34:28 +02:00
committed by GitHub
parent c610b6c11b
commit a81283820a
2 changed files with 107 additions and 32 deletions

View File

@@ -333,17 +333,17 @@ static void print_params(struct my_llama_hparams * params) {
}
static void print_tensor_info(const struct ggml_context * ctx) {
for (auto t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
for (auto * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
LOG_INF("%s: Allocating ", __func__);
int64_t total = 1;
int i = 0;
for (; i < ggml_n_dims(t); ++i) {
if (i > 0) LOG("x ");
LOG("[%" PRId64 "] ", t->ne[i]);
if (i > 0) { LOG_INF("x "); }
LOG_INF("[%" PRId64 "] ", t->ne[i]);
total *= t->ne[i];
}
if (i > 1) LOG("= [%" PRId64 "] ", total);
LOG("float space for %s\n", ggml_get_name(t));
if (i > 1) { LOG_INF("= [%" PRId64 "] ", total); }
LOG_INF("float space for %s\n", ggml_get_name(t));
}
}