mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	llama : refactor model loading code (#2620)
* llama : style formatting + remove helper methods * llama : fix quantization using gguf tool * llama : simplify gguf_file_saver * llama : fix method names * llama : simplify write_header() * llama : no need to pass full file loader to the file saver just gguf_ctx * llama : gguf_file_saver write I32 * llama : refactor tensor names (#2622) * gguf: update tensor names searched in quantization * gguf : define tensor names as constants * gguf : initial write API (not tested yet) * gguf : write to file API (not tested) * gguf : initial write API ready + example * gguf : fix header write * gguf : fixes + simplify example + add ggml_nbytes_pad() * gguf : minor * llama : replace gguf_file_saver with new gguf write API * gguf : streaming support when writing files * gguf : remove oboslete write methods * gguf : remove obosolete gguf_get_arr_xxx API * llama : simplify gguf_file_loader * llama : move hparams and vocab from gguf_file_loader to llama_model_loader * llama : merge gguf-util.h in llama.cpp * llama : reorder definitions in .cpp to match .h * llama : minor simplifications * llama : refactor llama_model_loader (WIP) wip : remove ggml_ctx from llama_model_loader wip : merge gguf_file_loader in llama_model_loader * llama : fix shape prints * llama : fix Windows build + fix norm_rms_eps key * llama : throw error on missing KV paris in model meta data * llama : improve printing + log meta data * llama : switch print order of meta data --------- Co-authored-by: M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
This commit is contained in:
		
							
								
								
									
										597
									
								
								ggml.c
									
									
									
									
									
								
							
							
						
						
									
										597
									
								
								ggml.c
									
									
									
									
									
								
							@@ -213,10 +213,10 @@ inline static void * ggml_aligned_malloc(size_t size) {
 | 
			
		||||
                error_desc = "insufficient memory";
 | 
			
		||||
                break;
 | 
			
		||||
        }
 | 
			
		||||
        GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n",
 | 
			
		||||
            __func__, error_desc, size/(1024.0*1024.0));
 | 
			
		||||
        GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
 | 
			
		||||
        return NULL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return aligned_memory;
 | 
			
		||||
}
 | 
			
		||||
#define GGML_ALIGNED_MALLOC(size)  ggml_aligned_malloc(size)
 | 
			
		||||
@@ -4109,7 +4109,11 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
 | 
			
		||||
    //
 | 
			
		||||
    // is enough, but just in case, adding the second part
 | 
			
		||||
 | 
			
		||||
    return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
 | 
			
		||||
    return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
 | 
			
		||||
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
 | 
			
		||||
@@ -16899,7 +16903,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
 | 
			
		||||
    // compute size of intermediate results
 | 
			
		||||
    // TODO: does not take into account scratch buffers !!!!
 | 
			
		||||
    for (int i = 0; i < cgraph->n_nodes; ++i) {
 | 
			
		||||
        size_eval += ggml_nbytes(cgraph->nodes[i]);
 | 
			
		||||
        size_eval += ggml_nbytes_pad(cgraph->nodes[i]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // print
 | 
			
		||||
@@ -18579,6 +18583,19 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
 | 
			
		||||
};
 | 
			
		||||
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
 | 
			
		||||
 | 
			
		||||
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
 | 
			
		||||
    [GGUF_TYPE_UINT8]   = "uint8",
 | 
			
		||||
    [GGUF_TYPE_INT8]    = "int8",
 | 
			
		||||
    [GGUF_TYPE_UINT16]  = "uint16",
 | 
			
		||||
    [GGUF_TYPE_INT16]   = "int16",
 | 
			
		||||
    [GGUF_TYPE_UINT32]  = "uint32",
 | 
			
		||||
    [GGUF_TYPE_INT32]   = "int32",
 | 
			
		||||
    [GGUF_TYPE_FLOAT32] = "float32",
 | 
			
		||||
    [GGUF_TYPE_BOOL]    = "bool",
 | 
			
		||||
    [GGUF_TYPE_STRING]  = "string",
 | 
			
		||||
    [GGUF_TYPE_ARRAY]   = "array",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
union gguf_value {
 | 
			
		||||
    uint8_t  uint8;
 | 
			
		||||
    int8_t   int8;
 | 
			
		||||
@@ -18613,8 +18630,6 @@ struct gguf_header {
 | 
			
		||||
    uint32_t version;
 | 
			
		||||
    uint32_t n_tensors;
 | 
			
		||||
    uint32_t n_kv;
 | 
			
		||||
 | 
			
		||||
    struct gguf_kv * kv;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct gguf_tensor_info {
 | 
			
		||||
@@ -18622,44 +18637,69 @@ struct gguf_tensor_info {
 | 
			
		||||
 | 
			
		||||
    uint32_t n_dims;
 | 
			
		||||
    uint32_t ne[GGML_MAX_DIMS];
 | 
			
		||||
    uint32_t n_elms; // TODO: is this needed?
 | 
			
		||||
 | 
			
		||||
    enum ggml_type type;
 | 
			
		||||
 | 
			
		||||
    uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
 | 
			
		||||
 | 
			
		||||
    // for writing API
 | 
			
		||||
    const void * data;
 | 
			
		||||
    size_t size;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct gguf_context {
 | 
			
		||||
    struct gguf_header        header;
 | 
			
		||||
    struct gguf_header header;
 | 
			
		||||
 | 
			
		||||
    struct gguf_kv          * kv;
 | 
			
		||||
    struct gguf_tensor_info * infos;
 | 
			
		||||
 | 
			
		||||
    size_t alignment;
 | 
			
		||||
    size_t offset;    // offset of `data` from beginning of file
 | 
			
		||||
    size_t size_data; // size of `data` in bytes
 | 
			
		||||
    size_t size;      // size of `data` in bytes
 | 
			
		||||
 | 
			
		||||
    //uint8_t * padding;
 | 
			
		||||
    uint8_t * data;
 | 
			
		||||
    void * data;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static bool gguf_fread_el(void * dst, size_t size, FILE * file, size_t * offset) {
 | 
			
		||||
static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
 | 
			
		||||
    const size_t n = fread(dst, 1, size, file);
 | 
			
		||||
    *offset += n;
 | 
			
		||||
    return n == size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool gguf_fread_str(struct gguf_str * p, FILE * file, size_t * offset) {
 | 
			
		||||
static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
 | 
			
		||||
    p->n    = 0;
 | 
			
		||||
    p->data = NULL;
 | 
			
		||||
 | 
			
		||||
    bool ok = true;
 | 
			
		||||
 | 
			
		||||
    // TODO: how to avoid mallocs for strings?
 | 
			
		||||
    ok = ok && gguf_fread_el(&p->n,    sizeof(p->n), file, offset); p->data = calloc(p->n + 1, 1);
 | 
			
		||||
    ok = ok && gguf_fread_el( p->data, p->n,         file, offset);
 | 
			
		||||
    ok = ok && gguf_fread_el(file, &p->n,    sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
 | 
			
		||||
    ok = ok && gguf_fread_el(file,  p->data, p->n,         offset);
 | 
			
		||||
 | 
			
		||||
    return ok;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct gguf_context * gguf_init_empty(void) {
 | 
			
		||||
    struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
 | 
			
		||||
 | 
			
		||||
    ctx->header.magic     = GGUF_MAGIC;
 | 
			
		||||
    ctx->header.version   = GGUF_VERSION;
 | 
			
		||||
    ctx->header.n_tensors = 0;
 | 
			
		||||
    ctx->header.n_kv      = 0;
 | 
			
		||||
 | 
			
		||||
    ctx->kv    = NULL;
 | 
			
		||||
    ctx->infos = NULL;
 | 
			
		||||
 | 
			
		||||
    ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
 | 
			
		||||
    ctx->offset    = 0;
 | 
			
		||||
    ctx->size      = 0;
 | 
			
		||||
 | 
			
		||||
    ctx->data = NULL;
 | 
			
		||||
 | 
			
		||||
    return ctx;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
 | 
			
		||||
    FILE * file = fopen(fname, "rb");
 | 
			
		||||
    if (!file) {
 | 
			
		||||
@@ -18673,7 +18713,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 | 
			
		||||
 | 
			
		||||
    // check the magic before making allocations
 | 
			
		||||
    {
 | 
			
		||||
        gguf_fread_el(&magic, sizeof(magic), file, &offset);
 | 
			
		||||
        gguf_fread_el(file, &magic, sizeof(magic), &offset);
 | 
			
		||||
 | 
			
		||||
        if (magic != GGUF_MAGIC) {
 | 
			
		||||
            fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
 | 
			
		||||
@@ -18689,14 +18729,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 | 
			
		||||
    // read the header
 | 
			
		||||
    {
 | 
			
		||||
        ctx->header.magic = magic;
 | 
			
		||||
        ctx->header.kv    = NULL;
 | 
			
		||||
 | 
			
		||||
        ctx->kv    = NULL;
 | 
			
		||||
        ctx->infos = NULL;
 | 
			
		||||
        ctx->data  = NULL;
 | 
			
		||||
 | 
			
		||||
        ok = ok && gguf_fread_el(&ctx->header.version,   sizeof(ctx->header.version),   file, &offset);
 | 
			
		||||
        ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
 | 
			
		||||
        ok = ok && gguf_fread_el(&ctx->header.n_kv,      sizeof(ctx->header.n_kv),      file, &offset);
 | 
			
		||||
        ok = ok && gguf_fread_el(file, &ctx->header.version,   sizeof(ctx->header.version),   &offset);
 | 
			
		||||
        ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
 | 
			
		||||
        ok = ok && gguf_fread_el(file, &ctx->header.n_kv,      sizeof(ctx->header.n_kv),      &offset);
 | 
			
		||||
 | 
			
		||||
        if (!ok) {
 | 
			
		||||
            fprintf(stderr, "%s: failed to read header\n", __func__);
 | 
			
		||||
@@ -18708,33 +18748,33 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 | 
			
		||||
 | 
			
		||||
    // read the kv pairs
 | 
			
		||||
    {
 | 
			
		||||
        ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
 | 
			
		||||
        ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
 | 
			
		||||
 | 
			
		||||
        for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
 | 
			
		||||
            struct gguf_kv * kv = &ctx->header.kv[i];
 | 
			
		||||
            struct gguf_kv * kv = &ctx->kv[i];
 | 
			
		||||
 | 
			
		||||
            //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
 | 
			
		||||
 | 
			
		||||
            ok = ok && gguf_fread_str(&kv->key,                          file, &offset);
 | 
			
		||||
          //ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
 | 
			
		||||
            ok = ok && gguf_fread_el (&kv->type,    sizeof(kv->type),    file, &offset);
 | 
			
		||||
            ok = ok && gguf_fread_str(file, &kv->key,                          &offset);
 | 
			
		||||
          //ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
 | 
			
		||||
            ok = ok && gguf_fread_el (file, &kv->type,    sizeof(kv->type),    &offset);
 | 
			
		||||
 | 
			
		||||
            //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
 | 
			
		||||
 | 
			
		||||
            switch (kv->type) {
 | 
			
		||||
                case GGUF_TYPE_UINT8:   ok = ok && gguf_fread_el (&kv->value.uint8,   sizeof(kv->value.uint8),   file, &offset); break;
 | 
			
		||||
                case GGUF_TYPE_INT8:    ok = ok && gguf_fread_el (&kv->value.int8,    sizeof(kv->value.int8),    file, &offset); break;
 | 
			
		||||
                case GGUF_TYPE_UINT16:  ok = ok && gguf_fread_el (&kv->value.uint16,  sizeof(kv->value.uint16),  file, &offset); break;
 | 
			
		||||
                case GGUF_TYPE_INT16:   ok = ok && gguf_fread_el (&kv->value.int16,   sizeof(kv->value.int16),   file, &offset); break;
 | 
			
		||||
                case GGUF_TYPE_UINT32:  ok = ok && gguf_fread_el (&kv->value.uint32,  sizeof(kv->value.uint32),  file, &offset); break;
 | 
			
		||||
                case GGUF_TYPE_INT32:   ok = ok && gguf_fread_el (&kv->value.int32,   sizeof(kv->value.int32),   file, &offset); break;
 | 
			
		||||
                case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
 | 
			
		||||
                case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (&kv->value.bool_,   sizeof(kv->value.bool_),   file, &offset); break;
 | 
			
		||||
                case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(&kv->value.str,                                file, &offset); break;
 | 
			
		||||
                case GGUF_TYPE_UINT8:   ok = ok && gguf_fread_el (file, &kv->value.uint8,   sizeof(kv->value.uint8),   &offset); break;
 | 
			
		||||
                case GGUF_TYPE_INT8:    ok = ok && gguf_fread_el (file, &kv->value.int8,    sizeof(kv->value.int8),    &offset); break;
 | 
			
		||||
                case GGUF_TYPE_UINT16:  ok = ok && gguf_fread_el (file, &kv->value.uint16,  sizeof(kv->value.uint16),  &offset); break;
 | 
			
		||||
                case GGUF_TYPE_INT16:   ok = ok && gguf_fread_el (file, &kv->value.int16,   sizeof(kv->value.int16),   &offset); break;
 | 
			
		||||
                case GGUF_TYPE_UINT32:  ok = ok && gguf_fread_el (file, &kv->value.uint32,  sizeof(kv->value.uint32),  &offset); break;
 | 
			
		||||
                case GGUF_TYPE_INT32:   ok = ok && gguf_fread_el (file, &kv->value.int32,   sizeof(kv->value.int32),   &offset); break;
 | 
			
		||||
                case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
 | 
			
		||||
                case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (file, &kv->value.bool_,   sizeof(kv->value.bool_),   &offset); break;
 | 
			
		||||
                case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(file, &kv->value.str,                                &offset); break;
 | 
			
		||||
                case GGUF_TYPE_ARRAY:
 | 
			
		||||
                    {
 | 
			
		||||
                        ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset);
 | 
			
		||||
                        ok = ok && gguf_fread_el(&kv->value.arr.n,    sizeof(kv->value.arr.n),    file, &offset);
 | 
			
		||||
                        ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
 | 
			
		||||
                        ok = ok && gguf_fread_el(file, &kv->value.arr.n,    sizeof(kv->value.arr.n),    &offset);
 | 
			
		||||
 | 
			
		||||
                        switch (kv->value.arr.type) {
 | 
			
		||||
                            case GGUF_TYPE_UINT8:
 | 
			
		||||
@@ -18747,17 +18787,17 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 | 
			
		||||
                            case GGUF_TYPE_BOOL:
 | 
			
		||||
                                {
 | 
			
		||||
                                    kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
 | 
			
		||||
                                    ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset);
 | 
			
		||||
                                    ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset);
 | 
			
		||||
                                } break;
 | 
			
		||||
                            case GGUF_TYPE_STRING:
 | 
			
		||||
                                {
 | 
			
		||||
                                    kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
 | 
			
		||||
                                    for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
 | 
			
		||||
                                        ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset);
 | 
			
		||||
                                        ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
 | 
			
		||||
                                    }
 | 
			
		||||
                                } break;
 | 
			
		||||
                            case GGUF_TYPE_ARRAY:
 | 
			
		||||
                            case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
 | 
			
		||||
                            case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
 | 
			
		||||
                        };
 | 
			
		||||
                    } break;
 | 
			
		||||
                case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
 | 
			
		||||
@@ -18787,14 +18827,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 | 
			
		||||
                info->ne[j] = 1;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            ok = ok && gguf_fread_str(&info->name,                          file, &offset);
 | 
			
		||||
            ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims),  file, &offset);
 | 
			
		||||
            ok = ok && gguf_fread_str(file, &info->name,                          &offset);
 | 
			
		||||
            ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims),  &offset);
 | 
			
		||||
            for (uint32_t j = 0; j < info->n_dims; ++j) {
 | 
			
		||||
                ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
 | 
			
		||||
                ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
 | 
			
		||||
            }
 | 
			
		||||
          //ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms),  file, &offset);
 | 
			
		||||
            ok = ok && gguf_fread_el (&info->type,   sizeof(info->type),    file, &offset);
 | 
			
		||||
            ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset),  file, &offset);
 | 
			
		||||
            ok = ok && gguf_fread_el (file, &info->type,   sizeof(info->type),    &offset);
 | 
			
		||||
            ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset),  &offset);
 | 
			
		||||
 | 
			
		||||
            if (!ok) {
 | 
			
		||||
                fprintf(stderr, "%s: failed to read tensor info\n", __func__);
 | 
			
		||||
@@ -18827,8 +18866,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 | 
			
		||||
 | 
			
		||||
    // compute the total size of the data section, taking into account the alignment
 | 
			
		||||
    {
 | 
			
		||||
 | 
			
		||||
        ctx->size_data = 0;
 | 
			
		||||
        ctx->size = 0;
 | 
			
		||||
        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
 | 
			
		||||
            struct gguf_tensor_info * info = &ctx->infos[i];
 | 
			
		||||
 | 
			
		||||
@@ -18848,7 +18886,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 | 
			
		||||
 | 
			
		||||
            const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
 | 
			
		||||
 | 
			
		||||
            ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
 | 
			
		||||
            ctx->size += GGML_PAD(size_cur, ctx->alignment);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -18862,7 +18900,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 | 
			
		||||
        const size_t mem_size =
 | 
			
		||||
            params.no_alloc ?
 | 
			
		||||
            (ctx->header.n_tensors    )*ggml_tensor_overhead() :
 | 
			
		||||
            (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
 | 
			
		||||
            (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
 | 
			
		||||
 | 
			
		||||
        struct ggml_init_params pdata = {
 | 
			
		||||
            .mem_size   = mem_size,
 | 
			
		||||
@@ -18877,12 +18915,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 | 
			
		||||
        struct ggml_tensor * data = NULL;
 | 
			
		||||
 | 
			
		||||
        if (params.no_alloc == false) {
 | 
			
		||||
            data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size_data);
 | 
			
		||||
            data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
 | 
			
		||||
 | 
			
		||||
            ok = ok && data != NULL;
 | 
			
		||||
 | 
			
		||||
            // read the binary blob with the tensor data
 | 
			
		||||
            ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
 | 
			
		||||
            ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset);
 | 
			
		||||
 | 
			
		||||
            if (!ok) {
 | 
			
		||||
                fprintf(stderr, "%s: failed to read tensor data\n", __func__);
 | 
			
		||||
@@ -18944,10 +18982,10 @@ void gguf_free(struct gguf_context * ctx) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (ctx->header.kv) {
 | 
			
		||||
    if (ctx->kv) {
 | 
			
		||||
        // free string memory - not great..
 | 
			
		||||
        for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
 | 
			
		||||
            struct gguf_kv * kv = &ctx->header.kv[i];
 | 
			
		||||
            struct gguf_kv * kv = &ctx->kv[i];
 | 
			
		||||
 | 
			
		||||
            if (kv->key.data) {
 | 
			
		||||
                free(kv->key.data);
 | 
			
		||||
@@ -18974,7 +19012,7 @@ void gguf_free(struct gguf_context * ctx) {
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        GGML_ALIGNED_FREE(ctx->header.kv);
 | 
			
		||||
        GGML_ALIGNED_FREE(ctx->kv);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (ctx->infos) {
 | 
			
		||||
@@ -18992,6 +19030,10 @@ void gguf_free(struct gguf_context * ctx) {
 | 
			
		||||
    GGML_ALIGNED_FREE(ctx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char * gguf_type_name(enum gguf_type type) {
 | 
			
		||||
    return GGUF_TYPE_NAME[type];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int gguf_get_version(struct gguf_context * ctx) {
 | 
			
		||||
    return ctx->header.version;
 | 
			
		||||
}
 | 
			
		||||
@@ -19014,8 +19056,9 @@ int gguf_get_n_kv(struct gguf_context * ctx) {
 | 
			
		||||
 | 
			
		||||
int gguf_find_key(struct gguf_context * ctx, const char * key) {
 | 
			
		||||
    // return -1 if key not found
 | 
			
		||||
    int keyfound = -1;
 | 
			
		||||
 | 
			
		||||
    const int n_kv = gguf_get_n_kv(ctx);
 | 
			
		||||
    int keyfound   = -1;
 | 
			
		||||
 | 
			
		||||
    for (int i = 0; i < n_kv; ++i) {
 | 
			
		||||
        if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
 | 
			
		||||
@@ -19028,71 +19071,87 @@ int gguf_find_key(struct gguf_context * ctx, const char * key) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char * gguf_get_key(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].key.data;
 | 
			
		||||
    return ctx->kv[i].key.data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
enum gguf_type gguf_get_kv_type(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].type;
 | 
			
		||||
    return ctx->kv[i].type;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.arr.type;
 | 
			
		||||
    return ctx->kv[i].value.arr.type;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const void * gguf_get_arr_data(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->kv[i].value.arr.data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i) {
 | 
			
		||||
    struct gguf_kv * kv = &ctx->header.kv[key_id];
 | 
			
		||||
    struct gguf_kv * kv = &ctx->kv[key_id];
 | 
			
		||||
    struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
 | 
			
		||||
    return str->data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float gguf_get_arr_f32(struct gguf_context * ctx, int key_id, int i) {
 | 
			
		||||
    return ((float *) ctx->header.kv[key_id].value.arr.data)[i];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int gguf_get_arr_n(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.arr.n;
 | 
			
		||||
    return ctx->kv[i].value.arr.n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint8_t gguf_get_val_u8(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.uint8;
 | 
			
		||||
    return ctx->kv[i].value.uint8;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int8_t gguf_get_val_i8(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.int8;
 | 
			
		||||
    return ctx->kv[i].value.int8;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint16_t gguf_get_val_u16(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.uint16;
 | 
			
		||||
    return ctx->kv[i].value.uint16;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int16_t gguf_get_val_i16(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.int16;
 | 
			
		||||
    return ctx->kv[i].value.int16;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint32_t gguf_get_val_u32(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.uint32;
 | 
			
		||||
    return ctx->kv[i].value.uint32;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int32_t gguf_get_val_i32(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.int32;
 | 
			
		||||
    return ctx->kv[i].value.int32;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float gguf_get_val_f32(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.float32;
 | 
			
		||||
    return ctx->kv[i].value.float32;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.bool_;
 | 
			
		||||
    return ctx->kv[i].value.bool_;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char * gguf_get_val_str (struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->header.kv[i].value.str.data;
 | 
			
		||||
    return ctx->kv[i].value.str.data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int gguf_get_n_tensors(struct gguf_context * ctx) {
 | 
			
		||||
    return ctx->header.n_tensors;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
 | 
			
		||||
    // return -1 if tensor not found
 | 
			
		||||
    int tensorfound = -1;
 | 
			
		||||
 | 
			
		||||
    const int n_tensors = gguf_get_n_tensors(ctx);
 | 
			
		||||
 | 
			
		||||
    for (int i = 0; i < n_tensors; ++i) {
 | 
			
		||||
        if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
 | 
			
		||||
            tensorfound = i;
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return tensorfound;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->infos[i].offset;
 | 
			
		||||
}
 | 
			
		||||
@@ -19101,6 +19160,400 @@ char * gguf_get_tensor_name(struct gguf_context * ctx, int i) {
 | 
			
		||||
    return ctx->infos[i].name.data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// returns the index
 | 
			
		||||
static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
 | 
			
		||||
    const int idx = gguf_find_key(ctx, key);
 | 
			
		||||
    if (idx >= 0) {
 | 
			
		||||
        return idx;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const int n_kv = gguf_get_n_kv(ctx);
 | 
			
		||||
 | 
			
		||||
    ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv));
 | 
			
		||||
    ctx->kv[n_kv].key.n    = strlen(key) + 1;
 | 
			
		||||
    ctx->kv[n_kv].key.data = strdup(key);
 | 
			
		||||
    ctx->header.n_kv++;
 | 
			
		||||
 | 
			
		||||
    return n_kv;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type        = GGUF_TYPE_UINT8;
 | 
			
		||||
    ctx->kv[idx].value.uint8 = val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type       = GGUF_TYPE_INT8;
 | 
			
		||||
    ctx->kv[idx].value.int8 = val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type         = GGUF_TYPE_UINT16;
 | 
			
		||||
    ctx->kv[idx].value.uint16 = val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type        = GGUF_TYPE_INT16;
 | 
			
		||||
    ctx->kv[idx].value.int16 = val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type         = GGUF_TYPE_UINT32;
 | 
			
		||||
    ctx->kv[idx].value.uint32 = val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type        = GGUF_TYPE_INT32;
 | 
			
		||||
    ctx->kv[idx].value.int32 = val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type          = GGUF_TYPE_FLOAT32;
 | 
			
		||||
    ctx->kv[idx].value.float32 = val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type        = GGUF_TYPE_BOOL;
 | 
			
		||||
    ctx->kv[idx].value.bool_ = val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type           = GGUF_TYPE_STRING;
 | 
			
		||||
    ctx->kv[idx].value.str.n    = strlen(val) + 1;
 | 
			
		||||
    ctx->kv[idx].value.str.data = strdup(val);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type           = GGUF_TYPE_ARRAY;
 | 
			
		||||
    ctx->kv[idx].value.arr.type = type;
 | 
			
		||||
    ctx->kv[idx].value.arr.n    = n;
 | 
			
		||||
    ctx->kv[idx].value.arr.data = malloc(n*GGUF_TYPE_SIZE[type]);
 | 
			
		||||
    memcpy(ctx->kv[idx].value.arr.data, data, n*GGUF_TYPE_SIZE[type]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
 | 
			
		||||
    const int idx = gguf_get_or_add_key(ctx, key);
 | 
			
		||||
 | 
			
		||||
    ctx->kv[idx].type           = GGUF_TYPE_ARRAY;
 | 
			
		||||
    ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
 | 
			
		||||
    ctx->kv[idx].value.arr.n    = n;
 | 
			
		||||
    ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str));
 | 
			
		||||
    for (int i = 0; i < n; i++) {
 | 
			
		||||
        struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
 | 
			
		||||
        str->n    = strlen(data[i]) + 1;
 | 
			
		||||
        str->data = strdup(data[i]);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// set or add KV pairs from another context
 | 
			
		||||
void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
 | 
			
		||||
    for (uint32_t i = 0; i < src->header.n_kv; i++) {
 | 
			
		||||
        switch (src->kv[i].type) {
 | 
			
		||||
            case GGUF_TYPE_UINT8:   gguf_set_val_u8  (ctx, src->kv[i].key.data, src->kv[i].value.uint8);    break;
 | 
			
		||||
            case GGUF_TYPE_INT8:    gguf_set_val_i8  (ctx, src->kv[i].key.data, src->kv[i].value.int8);     break;
 | 
			
		||||
            case GGUF_TYPE_UINT16:  gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16);   break;
 | 
			
		||||
            case GGUF_TYPE_INT16:   gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16);    break;
 | 
			
		||||
            case GGUF_TYPE_UINT32:  gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32);   break;
 | 
			
		||||
            case GGUF_TYPE_INT32:   gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32);    break;
 | 
			
		||||
            case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32);  break;
 | 
			
		||||
            case GGUF_TYPE_BOOL:    gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_);    break;
 | 
			
		||||
            case GGUF_TYPE_STRING:  gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
 | 
			
		||||
            case GGUF_TYPE_ARRAY:
 | 
			
		||||
                {
 | 
			
		||||
                    if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
 | 
			
		||||
                        const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
 | 
			
		||||
                        for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
 | 
			
		||||
                            data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
 | 
			
		||||
                        }
 | 
			
		||||
                        gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
 | 
			
		||||
                        free(data);
 | 
			
		||||
                    } if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
 | 
			
		||||
                        GGML_ASSERT(false && "nested arrays not supported");
 | 
			
		||||
                    } else {
 | 
			
		||||
                        gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
 | 
			
		||||
                    }
 | 
			
		||||
                } break;
 | 
			
		||||
            case GGUF_TYPE_COUNT:  GGML_ASSERT(false && "invalid type"); break;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_add_tensor(
 | 
			
		||||
             struct gguf_context * ctx,
 | 
			
		||||
        const struct ggml_tensor * tensor) {
 | 
			
		||||
    const int idx = ctx->header.n_tensors;
 | 
			
		||||
    ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
 | 
			
		||||
 | 
			
		||||
    ctx->infos[idx].name.n    = strlen(tensor->name) + 1;
 | 
			
		||||
    ctx->infos[idx].name.data = strdup(tensor->name);
 | 
			
		||||
 | 
			
		||||
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
 | 
			
		||||
        ctx->infos[idx].ne[i] = 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ctx->infos[idx].n_dims = tensor->n_dims;
 | 
			
		||||
    for (int i = 0; i < tensor->n_dims; i++) {
 | 
			
		||||
        ctx->infos[idx].ne[i] = tensor->ne[i];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ctx->infos[idx].type   = tensor->type;
 | 
			
		||||
    ctx->infos[idx].offset = 0;
 | 
			
		||||
    ctx->infos[idx].data   = tensor->data;
 | 
			
		||||
    ctx->infos[idx].size   = ggml_nbytes(tensor);
 | 
			
		||||
 | 
			
		||||
    if (ctx->header.n_tensors > 0) {
 | 
			
		||||
        ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ctx->header.n_tensors++;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
 | 
			
		||||
    const int idx = gguf_find_tensor(ctx, name);
 | 
			
		||||
    if (idx < 0) {
 | 
			
		||||
        GGML_ASSERT(false && "tensor not found");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ctx->infos[idx].type = type;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
 | 
			
		||||
    const int idx = gguf_find_tensor(ctx, name);
 | 
			
		||||
    if (idx < 0) {
 | 
			
		||||
        GGML_ASSERT(false && "tensor not found");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ctx->infos[idx].data = data;
 | 
			
		||||
    ctx->infos[idx].size = size;
 | 
			
		||||
 | 
			
		||||
    // update offsets
 | 
			
		||||
    for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) {
 | 
			
		||||
        ctx->infos[i].offset = ctx->infos[i - 1].offset + GGML_PAD(ctx->infos[i - 1].size, ctx->alignment);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
 | 
			
		||||
//    fwrite(&val->n,   sizeof(val->n),    1, file);
 | 
			
		||||
//    fwrite(val->data, sizeof(char), val->n, file);
 | 
			
		||||
//}
 | 
			
		||||
//
 | 
			
		||||
//static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
 | 
			
		||||
//    fwrite(val, sizeof(char), size, file);
 | 
			
		||||
//}
 | 
			
		||||
 | 
			
		||||
struct gguf_buf {
 | 
			
		||||
    void * data;
 | 
			
		||||
    size_t size;
 | 
			
		||||
    size_t offset;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct gguf_buf gguf_buf_init(size_t size) {
 | 
			
		||||
    struct gguf_buf buf = {
 | 
			
		||||
        /*buf.data   =*/ size == 0 ? NULL : malloc(size),
 | 
			
		||||
        /*buf.size   =*/ size,
 | 
			
		||||
        /*buf.offset =*/ 0,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    return buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gguf_buf_free(struct gguf_buf buf) {
 | 
			
		||||
    if (buf.data) {
 | 
			
		||||
        free(buf.data);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gguf_buf_grow(struct gguf_buf * buf, size_t size) {
 | 
			
		||||
    if (buf->offset + size > buf->size) {
 | 
			
		||||
        buf->size = 1.5*(buf->offset + size);
 | 
			
		||||
        if (buf->data) {
 | 
			
		||||
            buf->data = realloc(buf->data, buf->size);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) {
 | 
			
		||||
    gguf_buf_grow(buf, sizeof(val->n) + val->n);
 | 
			
		||||
 | 
			
		||||
    buf->data && memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
 | 
			
		||||
    buf->offset += sizeof(val->n);
 | 
			
		||||
 | 
			
		||||
    buf->data && memcpy((char *) buf->data + buf->offset, val->data, val->n);
 | 
			
		||||
    buf->offset += val->n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) {
 | 
			
		||||
    gguf_buf_grow(buf, el_size);
 | 
			
		||||
 | 
			
		||||
    buf->data && memcpy((char *) buf->data + buf->offset, val, el_size);
 | 
			
		||||
    buf->offset += el_size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
 | 
			
		||||
    // write header
 | 
			
		||||
    gguf_bwrite_el(buf, &ctx->header.magic,     sizeof(ctx->header.magic));
 | 
			
		||||
    gguf_bwrite_el(buf, &ctx->header.version,   sizeof(ctx->header.version));
 | 
			
		||||
    gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
 | 
			
		||||
    gguf_bwrite_el(buf, &ctx->header.n_kv,      sizeof(ctx->header.n_kv));
 | 
			
		||||
 | 
			
		||||
    // write key-value pairs
 | 
			
		||||
    for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
 | 
			
		||||
        struct gguf_kv * kv = &ctx->kv[i];
 | 
			
		||||
 | 
			
		||||
        gguf_bwrite_str(buf, &kv->key);
 | 
			
		||||
        gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
 | 
			
		||||
 | 
			
		||||
        switch (kv->type) {
 | 
			
		||||
            case GGUF_TYPE_UINT8:   gguf_bwrite_el( buf, &kv->value.uint8,   sizeof(kv->value.uint8)  ); break;
 | 
			
		||||
            case GGUF_TYPE_INT8:    gguf_bwrite_el (buf, &kv->value.int8,    sizeof(kv->value.int8)   ); break;
 | 
			
		||||
            case GGUF_TYPE_UINT16:  gguf_bwrite_el (buf, &kv->value.uint16,  sizeof(kv->value.uint16) ); break;
 | 
			
		||||
            case GGUF_TYPE_INT16:   gguf_bwrite_el (buf, &kv->value.int16,   sizeof(kv->value.int16)  ); break;
 | 
			
		||||
            case GGUF_TYPE_UINT32:  gguf_bwrite_el (buf, &kv->value.uint32,  sizeof(kv->value.uint32) ); break;
 | 
			
		||||
            case GGUF_TYPE_INT32:   gguf_bwrite_el (buf, &kv->value.int32,   sizeof(kv->value.int32)  ); break;
 | 
			
		||||
            case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
 | 
			
		||||
            case GGUF_TYPE_BOOL:    gguf_bwrite_el (buf, &kv->value.bool_,   sizeof(kv->value.bool_)  ); break;
 | 
			
		||||
            case GGUF_TYPE_STRING:  gguf_bwrite_str(buf, &kv->value.str                               ); break;
 | 
			
		||||
            case GGUF_TYPE_ARRAY:
 | 
			
		||||
                {
 | 
			
		||||
                    gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
 | 
			
		||||
                    gguf_bwrite_el(buf, &kv->value.arr.n,    sizeof(kv->value.arr.n)   );
 | 
			
		||||
 | 
			
		||||
                    switch (kv->value.arr.type) {
 | 
			
		||||
                        case GGUF_TYPE_UINT8:
 | 
			
		||||
                        case GGUF_TYPE_INT8:
 | 
			
		||||
                        case GGUF_TYPE_UINT16:
 | 
			
		||||
                        case GGUF_TYPE_INT16:
 | 
			
		||||
                        case GGUF_TYPE_UINT32:
 | 
			
		||||
                        case GGUF_TYPE_INT32:
 | 
			
		||||
                        case GGUF_TYPE_FLOAT32:
 | 
			
		||||
                        case GGUF_TYPE_BOOL:
 | 
			
		||||
                            {
 | 
			
		||||
                                gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
 | 
			
		||||
                            } break;
 | 
			
		||||
                        case GGUF_TYPE_STRING:
 | 
			
		||||
                            {
 | 
			
		||||
                                for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
 | 
			
		||||
                                    gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
 | 
			
		||||
                                }
 | 
			
		||||
                            } break;
 | 
			
		||||
                        case GGUF_TYPE_ARRAY:
 | 
			
		||||
                        case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
 | 
			
		||||
                    };
 | 
			
		||||
                } break;
 | 
			
		||||
            case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // write tensor infos
 | 
			
		||||
    for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
 | 
			
		||||
        struct gguf_tensor_info * info = &ctx->infos[i];
 | 
			
		||||
 | 
			
		||||
        gguf_bwrite_str(buf, &info->name);
 | 
			
		||||
        gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
 | 
			
		||||
        for (uint32_t j = 0; j < info->n_dims; ++j) {
 | 
			
		||||
            gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
 | 
			
		||||
        }
 | 
			
		||||
        gguf_bwrite_el(buf, &info->type,   sizeof(info->type));
 | 
			
		||||
        gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // we require the data section to be aligned, so take into account any padding
 | 
			
		||||
    {
 | 
			
		||||
        const size_t offset     = buf->offset;
 | 
			
		||||
        const size_t offset_pad = GGML_PAD(offset, ctx->alignment);
 | 
			
		||||
 | 
			
		||||
        if (offset_pad != offset) {
 | 
			
		||||
            uint8_t pad = 0;
 | 
			
		||||
            for (size_t i = 0; i < offset_pad - offset; ++i) {
 | 
			
		||||
                gguf_bwrite_el(buf, &pad, sizeof(pad));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (only_meta) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    size_t offset = 0;
 | 
			
		||||
 | 
			
		||||
    // write tensor data
 | 
			
		||||
    for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
 | 
			
		||||
        struct gguf_tensor_info * info = &ctx->infos[i];
 | 
			
		||||
 | 
			
		||||
        const size_t size     = info->size;
 | 
			
		||||
        const size_t size_pad = GGML_PAD(size, ctx->alignment);
 | 
			
		||||
 | 
			
		||||
        gguf_bwrite_el(buf, info->data, size);
 | 
			
		||||
 | 
			
		||||
        if (size_pad != size) {
 | 
			
		||||
            uint8_t pad = 0;
 | 
			
		||||
            for (size_t j = 0; j < size_pad - size; ++j) {
 | 
			
		||||
                gguf_bwrite_el(buf, &pad, sizeof(pad));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        GGML_ASSERT(offset == info->offset);
 | 
			
		||||
 | 
			
		||||
        offset += size_pad;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta) {
 | 
			
		||||
    FILE * file = fopen(fname, "wb");
 | 
			
		||||
    if (!file) {
 | 
			
		||||
        GGML_ASSERT(false && "failed to open file for writing");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    struct gguf_buf buf = gguf_buf_init(16*1024);
 | 
			
		||||
 | 
			
		||||
    gguf_write_to_buf(ctx, &buf, only_meta);
 | 
			
		||||
 | 
			
		||||
    fwrite(buf.data, 1, buf.offset, file);
 | 
			
		||||
 | 
			
		||||
    gguf_buf_free(buf);
 | 
			
		||||
 | 
			
		||||
    fclose(file);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
size_t gguf_get_meta_size(struct gguf_context * ctx) {
 | 
			
		||||
    // no allocs - only compute size
 | 
			
		||||
    struct gguf_buf buf = gguf_buf_init(0);
 | 
			
		||||
 | 
			
		||||
    gguf_write_to_buf(ctx, &buf, true);
 | 
			
		||||
 | 
			
		||||
    return buf.offset;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void gguf_get_meta_data(struct gguf_context * ctx, void * data) {
 | 
			
		||||
    struct gguf_buf buf = gguf_buf_init(16*1024);
 | 
			
		||||
 | 
			
		||||
    gguf_write_to_buf(ctx, &buf, true);
 | 
			
		||||
 | 
			
		||||
    memcpy(data, buf.data, buf.offset);
 | 
			
		||||
 | 
			
		||||
    gguf_buf_free(buf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
int ggml_cpu_has_avx(void) {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user