gguf : quantization is working

2025-10-31 08:51:55 +00:00 · 2023-08-12 16:39:05 +03:00
parent 1fc3d30b71
commit 202eab04d3
3 changed files with 7 additions and 6 deletions
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@@ -421,7 +421,7 @@ int main(int argc, char ** argv) {
        GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
    } else if (mode == "r") {
        GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
-        //GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
+        GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
        GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
    } else if (mode == "q") {
        llama_model_quantize_params params = llama_model_quantize_default_params();
--- a/gguf-llama.cpp
+++ b/gguf-llama.cpp
@@ -752,7 +752,6 @@ struct gguf_file_saver {
        file.seek(info_offset, SEEK_SET);
        GGML_ASSERT(info_offset == file.tell());
        total_written += file.write_str(tensor.name);
 printf("total_written = %zu, name = %s\n", total_written, tensor.name.c_str());
        int32_t n_dims = tensor.ne.size();
        total_written += file.write_i32(n_dims);
@@ -765,7 +764,6 @@ printf("total_written = %zu, name = %s\n", total_written, tensor.name.c_str());
        info_offset   += total_written;
        file.seek(0, SEEK_END);
        printf("total_written = %zu\n", total_written);
        return total_written;
    }
@@ -936,8 +934,7 @@ struct llama_model_loader {
        } else {
            gguf_file & file = file_loader->file;
            file.seek(lt.file_off, SEEK_SET);
-            // TODO
+            file.read_raw(lt.data, lt.size);
            //file.read_raw(lt.data, lt.size);
        }
        if (0) {
--- a/gguf-util.h
+++ b/gguf-util.h
@@ -131,6 +131,10 @@ struct gguf_file {
        fwrite(data, size, 1, fp);
    }
    void read_raw(void * data, size_t size) {
        fread(data, size, 1, fp);
    }
    template<typename T>
    void write_val(const std::string & key, enum gguf_type type, const T & val) {
        write_str(key);