mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	gguf : add array support
This commit is contained in:
		| @@ -29,7 +29,7 @@ void gguf_ex_write_u64(std::ofstream & fout, size_t val) { | ||||
| } | ||||
|  | ||||
| template<typename T> | ||||
| void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) { | ||||
| void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) { | ||||
|     gguf_ex_write_str(fout, key); | ||||
|     fout.write((const char *) &type, sizeof(type)); | ||||
|     fout.write((const char *) &val,  sizeof(val)); | ||||
| @@ -38,13 +38,65 @@ void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum ggu | ||||
| } | ||||
|  | ||||
| template<> | ||||
| void gguf_ex_write_param<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) { | ||||
| void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) { | ||||
|     gguf_ex_write_str(fout, key); | ||||
|     fout.write((const char *) &type, sizeof(type)); | ||||
|  | ||||
|     const int32_t n = val.size(); | ||||
|     fout.write((const char *) &n, sizeof(n)); | ||||
|     fout.write(val.c_str(), n); | ||||
|  | ||||
|     fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str()); | ||||
| } | ||||
|  | ||||
| template<typename T> | ||||
| void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) { | ||||
|     gguf_ex_write_str(fout, key); | ||||
|     { | ||||
|         const enum gguf_type tarr = GGUF_TYPE_ARRAY; | ||||
|         fout.write((const char *) &tarr, sizeof(tarr)); | ||||
|     } | ||||
|  | ||||
|     const int32_t n = val.size(); | ||||
|     fout.write((const char *) &type, sizeof(type)); | ||||
|     fout.write((const char *) &n,    sizeof(n)); | ||||
|     fout.write((const char *) val.data(), n * sizeof(T)); | ||||
|  | ||||
|     fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str()); | ||||
|     for (int i = 0; i < n; ++i) { | ||||
|         fprintf(stdout, "%s", to_string(val[i]).c_str()); | ||||
|         if (i < n - 1) { | ||||
|             fprintf(stdout, ", "); | ||||
|         } | ||||
|     } | ||||
|     fprintf(stdout, "]\n"); | ||||
| } | ||||
|  | ||||
| template<> | ||||
| void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) { | ||||
|     gguf_ex_write_str(fout, key); | ||||
|     { | ||||
|         const enum gguf_type tarr = GGUF_TYPE_ARRAY; | ||||
|         fout.write((const char *) &tarr, sizeof(tarr)); | ||||
|     } | ||||
|  | ||||
|     const int32_t n = val.size(); | ||||
|     fout.write((const char *) &type, sizeof(type)); | ||||
|     fout.write((const char *) &n,    sizeof(n)); | ||||
|     for (int i = 0; i < n; ++i) { | ||||
|         const int32_t nstr = val[i].size(); | ||||
|         fout.write((const char *) &nstr, sizeof(nstr)); | ||||
|         fout.write(val[i].c_str(), nstr); | ||||
|     } | ||||
|  | ||||
|     fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str()); | ||||
|     for (int i = 0; i < n; ++i) { | ||||
|         fprintf(stdout, "%s", val[i].c_str()); | ||||
|         if (i < n - 1) { | ||||
|             fprintf(stdout, ", "); | ||||
|         } | ||||
|     } | ||||
|     fprintf(stdout, "]\n"); | ||||
| } | ||||
|  | ||||
| bool gguf_ex_write(const std::string & fname) { | ||||
| @@ -60,8 +112,9 @@ bool gguf_ex_write(const std::string & fname) { | ||||
|         fout.write((const char *) &version, sizeof(version)); | ||||
|     } | ||||
|  | ||||
|     // NOTE: these have to match the output below! | ||||
|     const int n_tensors = 10; | ||||
|     const int n_kv = 9; | ||||
|     const int n_kv      = 12; | ||||
|  | ||||
|     fout.write((const char*) &n_tensors, sizeof(n_tensors)); | ||||
|     fout.write((const char*) &n_kv, sizeof(n_kv)); | ||||
| @@ -70,17 +123,21 @@ bool gguf_ex_write(const std::string & fname) { | ||||
|  | ||||
|     // kv data | ||||
|     { | ||||
|         gguf_ex_write_param< uint8_t>(fout, "some.parameter.uint8",   GGUF_TYPE_UINT8,   0x12); | ||||
|         gguf_ex_write_param<  int8_t>(fout, "some.parameter.int8",    GGUF_TYPE_INT8,   -0x13); | ||||
|         gguf_ex_write_param<uint16_t>(fout, "some.parameter.uint16",  GGUF_TYPE_UINT16,  0x1234); | ||||
|         gguf_ex_write_param< int16_t>(fout, "some.parameter.int16",   GGUF_TYPE_INT16,  -0x1235); | ||||
|         gguf_ex_write_param<uint32_t>(fout, "some.parameter.uint32",  GGUF_TYPE_UINT32,  0x12345678); | ||||
|         gguf_ex_write_param< int32_t>(fout, "some.parameter.int32",   GGUF_TYPE_INT32,  -0x12345679); | ||||
|         gguf_ex_write_val< uint8_t>(fout, "some.parameter.uint8",   GGUF_TYPE_UINT8,   0x12); | ||||
|         gguf_ex_write_val<  int8_t>(fout, "some.parameter.int8",    GGUF_TYPE_INT8,   -0x13); | ||||
|         gguf_ex_write_val<uint16_t>(fout, "some.parameter.uint16",  GGUF_TYPE_UINT16,  0x1234); | ||||
|         gguf_ex_write_val< int16_t>(fout, "some.parameter.int16",   GGUF_TYPE_INT16,  -0x1235); | ||||
|         gguf_ex_write_val<uint32_t>(fout, "some.parameter.uint32",  GGUF_TYPE_UINT32,  0x12345678); | ||||
|         gguf_ex_write_val< int32_t>(fout, "some.parameter.int32",   GGUF_TYPE_INT32,  -0x12345679); | ||||
|  | ||||
|         gguf_ex_write_param<float>   (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f); | ||||
|         gguf_ex_write_param<bool>    (fout, "some.parameter.bool",    GGUF_TYPE_BOOL,    true); | ||||
|         gguf_ex_write_val<float>   (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f); | ||||
|         gguf_ex_write_val<bool>    (fout, "some.parameter.bool",    GGUF_TYPE_BOOL,    true); | ||||
|  | ||||
|         gguf_ex_write_param<std::string>(fout, "some.parameter.string",  GGUF_TYPE_STRING,  "hello world"); | ||||
|         gguf_ex_write_val<std::string>(fout, "some.parameter.string",  GGUF_TYPE_STRING,  "hello world"); | ||||
|  | ||||
|         gguf_ex_write_arr<int16_t>    (fout, "some.parameter.arr.i16", GGUF_TYPE_INT16,   { 1, 2, 3, 4, }); | ||||
|         gguf_ex_write_arr<float>      (fout, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, { 3.145f, 2.718f, 1.414f, }); | ||||
|         gguf_ex_write_arr<std::string>(fout, "some.parameter.arr.str", GGUF_TYPE_STRING,  { "hello", "world", "!" }); | ||||
|     } | ||||
|  | ||||
|     uint64_t offset_tensor = 0; | ||||
| @@ -203,13 +260,15 @@ bool gguf_ex_read_0(const std::string & fname) { | ||||
|         fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); | ||||
|  | ||||
|         for (int i = 0; i < n_tensors; ++i) { | ||||
|             const char * name = gguf_get_tensor_name(ctx, i); | ||||
|             const char * name   = gguf_get_tensor_name  (ctx, i); | ||||
|             const size_t offset = gguf_get_tensor_offset(ctx, i); | ||||
|  | ||||
|             fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     gguf_free(ctx); | ||||
|  | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| @@ -248,7 +307,7 @@ bool gguf_ex_read_1(const std::string & fname) { | ||||
|         fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); | ||||
|  | ||||
|         for (int i = 0; i < n_tensors; ++i) { | ||||
|             const char * name = gguf_get_tensor_name(ctx, i); | ||||
|             const char * name   = gguf_get_tensor_name  (ctx, i); | ||||
|             const size_t offset = gguf_get_tensor_offset(ctx, i); | ||||
|  | ||||
|             fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); | ||||
|   | ||||
							
								
								
									
										64
									
								
								ggml.c
									
									
									
									
									
								
							
							
						
						
									
										64
									
								
								ggml.c
									
									
									
									
									
								
							| @@ -3698,7 +3698,6 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = { | ||||
| }; | ||||
| static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_SIZE is outdated"); | ||||
|  | ||||
|  | ||||
| static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = { | ||||
|     [GGML_TYPE_F32]  = "f32", | ||||
|     [GGML_TYPE_F16]  = "f16", | ||||
| @@ -18302,7 +18301,19 @@ struct gguf_str { | ||||
|     char * data; | ||||
| }; | ||||
|  | ||||
| union gguf_value; | ||||
| static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = { | ||||
|     [GGUF_TYPE_UINT8]   = sizeof(uint8_t), | ||||
|     [GGUF_TYPE_INT8]    = sizeof(int8_t), | ||||
|     [GGUF_TYPE_UINT16]  = sizeof(uint16_t), | ||||
|     [GGUF_TYPE_INT16]   = sizeof(int16_t), | ||||
|     [GGUF_TYPE_UINT32]  = sizeof(uint32_t), | ||||
|     [GGUF_TYPE_INT32]   = sizeof(int32_t), | ||||
|     [GGUF_TYPE_FLOAT32] = sizeof(float), | ||||
|     [GGUF_TYPE_BOOL]    = sizeof(bool), | ||||
|     [GGUF_TYPE_STRING]  = sizeof(struct gguf_str), | ||||
|     [GGUF_TYPE_ARRAY]   = 0, // undefined | ||||
| }; | ||||
| static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10"); | ||||
|  | ||||
| union gguf_value { | ||||
|     uint8_t  uint8; | ||||
| @@ -18320,7 +18331,7 @@ union gguf_value { | ||||
|         enum gguf_type type; | ||||
|  | ||||
|         uint32_t n; | ||||
|         union gguf_value * arr; | ||||
|         void * data; | ||||
|     } arr; | ||||
| }; | ||||
|  | ||||
| @@ -18457,8 +18468,35 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p | ||||
|                 case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (&kv->value.bool_,   sizeof(kv->value.bool_),   file, &offset); break; | ||||
|                 case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(&kv->value.str,                                file, &offset); break; | ||||
|                 case GGUF_TYPE_ARRAY: | ||||
|                                         GGML_ASSERT("gguf: array type not implemented"); | ||||
|                                         break; | ||||
|                     { | ||||
|                         ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset); | ||||
|                         ok = ok && gguf_fread_el(&kv->value.arr.n,    sizeof(kv->value.arr.n),    file, &offset); | ||||
|  | ||||
|                         switch (kv->value.arr.type) { | ||||
|                             case GGUF_TYPE_UINT8: | ||||
|                             case GGUF_TYPE_INT8: | ||||
|                             case GGUF_TYPE_UINT16: | ||||
|                             case GGUF_TYPE_INT16: | ||||
|                             case GGUF_TYPE_UINT32: | ||||
|                             case GGUF_TYPE_INT32: | ||||
|                             case GGUF_TYPE_FLOAT32: | ||||
|                             case GGUF_TYPE_BOOL: | ||||
|                                 { | ||||
|                                     kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]); | ||||
|                                     ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset); | ||||
|                                 } break; | ||||
|                             case GGUF_TYPE_STRING: | ||||
|                                 { | ||||
|                                     kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str)); | ||||
|                                     for (uint32_t j = 0; j < kv->value.arr.n; ++j) { | ||||
|                                         ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset); | ||||
|                                     } | ||||
|                                 } break; | ||||
|                             case GGUF_TYPE_ARRAY: | ||||
|                             case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); | ||||
|                         }; | ||||
|                     } break; | ||||
|                 case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); | ||||
|             }; | ||||
|  | ||||
|             if (!ok) { | ||||
| @@ -18629,6 +18667,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p | ||||
|         ggml_set_no_alloc(ctx_data, params.no_alloc); | ||||
|     } | ||||
|  | ||||
|     fclose(file); | ||||
|  | ||||
|     return ctx; | ||||
| } | ||||
|  | ||||
| @@ -18651,6 +18691,20 @@ void gguf_free(struct gguf_context * ctx) { | ||||
|                     free(kv->value.str.data); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             if (kv->type == GGUF_TYPE_ARRAY) { | ||||
|                 if (kv->value.arr.data) { | ||||
|                     if (kv->value.arr.type == GGUF_TYPE_STRING) { | ||||
|                         for (uint32_t j = 0; j < kv->value.arr.n; ++j) { | ||||
|                             struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j]; | ||||
|                             if (str->data) { | ||||
|                                 free(str->data); | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                     free(kv->value.arr.data); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         GGML_ALIGNED_FREE(ctx->header.kv); | ||||
|   | ||||
							
								
								
									
										4
									
								
								ggml.h
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								ggml.h
									
									
									
									
									
								
							| @@ -1631,6 +1631,7 @@ extern "C" { | ||||
|         GGUF_TYPE_BOOL    = 7, | ||||
|         GGUF_TYPE_STRING  = 8, | ||||
|         GGUF_TYPE_ARRAY   = 9, | ||||
|         GGUF_TYPE_COUNT,       // marks the end of the enum | ||||
|     }; | ||||
|  | ||||
|     struct gguf_context; | ||||
| @@ -1664,7 +1665,8 @@ extern "C" { | ||||
|     GGML_API float        gguf_get_val_f32 (struct gguf_context * ctx, int i); | ||||
|     GGML_API bool         gguf_get_val_bool(struct gguf_context * ctx, int i); | ||||
|     GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i); | ||||
|     // TODO: arr | ||||
|     GGML_API int          gguf_get_arr_n   (struct gguf_context * ctx, int i); | ||||
|     GGML_API void         gguf_get_arr_data(struct gguf_context * ctx, int i, void * data); | ||||
|  | ||||
|     GGML_API int    gguf_get_n_tensors    (struct gguf_context * ctx); | ||||
|     GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov