llama : refactor model loading code (#2620)

* llama : style formatting + remove helper methods * llama : fix quantization using gguf tool * llama : simplify gguf_file_saver * llama : fix method names * llama : simplify write_header() * llama : no need to pass full file loader to the file saver just gguf_ctx * llama : gguf_file_saver write I32 * llama : refactor tensor names (#2622) * gguf: update tensor names searched in quantization * gguf : define tensor names as constants * gguf : initial write API (not tested yet) * gguf : write to file API (not tested) * gguf : initial write API ready + example * gguf : fix header write * gguf : fixes + simplify example + add ggml_nbytes_pad() * gguf : minor * llama : replace gguf_file_saver with new gguf write API * gguf : streaming support when writing files * gguf : remove oboslete write methods * gguf : remove obosolete gguf_get_arr_xxx API * llama : simplify gguf_file_loader * llama : move hparams and vocab from gguf_file_loader to llama_model_loader * llama : merge gguf-util.h in llama.cpp * llama : reorder definitions in .cpp to match .h * llama : minor simplifications * llama : refactor llama_model_loader (WIP) wip : remove ggml_ctx from llama_model_loader wip : merge gguf_file_loader in llama_model_loader * llama : fix shape prints * llama : fix Windows build + fix norm_rms_eps key * llama : throw error on missing KV paris in model meta data * llama : improve printing + log meta data * llama : switch print order of meta data --------- Co-authored-by: M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
2025-11-12 10:47:01 +00:00 · 2023-08-16 14:34:03 +03:00
parent ea5615a03a
commit 758ff1bbb5
9 changed files with 1944 additions and 1889 deletions
--- a/gguf-llama.h
+++ b/gguf-llama.h
@@ -111,6 +111,7 @@ extern "C" {
        bool use_mlock;  // force system to keep model in RAM
        bool embedding;  // embedding mode only
    };
+
    // model file types
    enum llama_ftype {
        LLAMA_FTYPE_ALL_F32              = 0,
@@ -190,17 +191,12 @@ extern "C" {
        int32_t n_eval;
    };

-    // Set callback for all future logging events.
-    // If this is not called, or NULL is supplied, everything is output on stderr.
-    LLAMA_API void llama_log_set(llama_log_callback log_callback, void * user_data);
+    LLAMA_API struct llama_context_params llama_context_default_params(void);
+    LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params(void);

-    LLAMA_API int llama_max_devices();
-
-    LLAMA_API struct llama_context_params llama_context_default_params();
-    LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params();
-
-    LLAMA_API bool llama_mmap_supported();
-    LLAMA_API bool llama_mlock_supported();
+    LLAMA_API int  llama_max_devices(void);
+    LLAMA_API bool llama_mmap_supported(void);
+    LLAMA_API bool llama_mlock_supported(void);

    // TODO: not great API - very likely to change
    // Initialize the llama + ggml backend
@@ -208,9 +204,9 @@ extern "C" {
    // Call once at the start of the program
    LLAMA_API void llama_backend_init(bool numa);
    // Call once at the end of the program - currently only used for MPI
-    LLAMA_API void llama_backend_free();
+    LLAMA_API void llama_backend_free(void);

-    LLAMA_API int64_t llama_time_us();
+    LLAMA_API int64_t llama_time_us(void);

    LLAMA_API struct llama_model * llama_load_model_from_file(
                             const char * path_model,
@@ -377,9 +373,9 @@ extern "C" {
                                  char * str,
                                  int    length);
    // Special tokens
-    LLAMA_API llama_token llama_token_bos();  // beginning-of-sentence
-    LLAMA_API llama_token llama_token_eos();  // end-of-sentence
-    LLAMA_API llama_token llama_token_nl();   // next-line
+    LLAMA_API llama_token llama_token_bos(void);  // beginning-of-sentence
+    LLAMA_API llama_token llama_token_eos(void);  // end-of-sentence
+    LLAMA_API llama_token llama_token_nl(void);   // next-line

    // Grammar
    //
@@ -459,6 +455,10 @@ extern "C" {
    // Print system information
    LLAMA_API const char * llama_print_system_info(void);

+    // Set callback for all future logging events.
+    // If this is not called, or NULL is supplied, everything is output on stderr.
+    LLAMA_API void llama_log_set(llama_log_callback log_callback, void * user_data);
+
 #ifdef __cplusplus
 }
 #endif