convert : use reflinks for faster conversion

This commit is contained in:
Francis Couture-Harpin
2025-09-01 20:45:57 -04:00
parent e582f1ac63
commit f7394cdaf4
6 changed files with 266 additions and 60 deletions

View File

@@ -42,8 +42,8 @@ void ggml_print_backtrace(void);
# define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
// required for mmap as gguf only guarantees 32-byte alignment
#define TENSOR_ALIGNMENT 32
// required for mmap as gguf converted with reflinks from safetensors only guarantees 8-byte alignment
#define TENSOR_ALIGNMENT 8
// static_assert should be a #define, but if it's not,
// fall back to the _Static_assert C11 keyword.

View File

@@ -624,6 +624,8 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
ctx->size = 0;
for (size_t i = 0; i < ctx->info.size(); ++i) {
const gguf_tensor_info & ti = ctx->info[i];
// HACK: bypass the continuity check
ctx->size = ti.offset;
if (ti.offset != ctx->size) {
GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
__func__, ti.t.name, ti.offset, ctx->size);