mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	gguf : start implementing quantization (WIP)
This commit is contained in:
		
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -393,7 +393,7 @@ $(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-in | ||||
| embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS) | ||||
| 	$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput | ||||
|  | ||||
| gguf: examples/gguf/gguf.cpp                                  build-info.h ggml.o $(OBJS) | ||||
| gguf: examples/gguf/gguf.cpp                                  build-info.h ggml.o gguf-llama.o $(OBJS) | ||||
| 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) | ||||
|  | ||||
| gguf-llama-simple: examples/gguf/gguf-llama-simple.cpp                            build-info.h ggml.o gguf-llama.o common.o $(OBJS) | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| #include "ggml.h" | ||||
| #include "gguf-util.h" | ||||
| #include "gguf-llama.h" | ||||
|  | ||||
| #include <cstdio> | ||||
| #include <cinttypes> | ||||
| @@ -7,14 +8,14 @@ | ||||
| #include <sstream> | ||||
| #include <fstream> | ||||
| #include <vector> | ||||
|  | ||||
| /* | ||||
| template<typename T> | ||||
| static std::string to_string(const T & val) { | ||||
|     std::stringstream ss; | ||||
|     ss << val; | ||||
|     return ss.str(); | ||||
| } | ||||
|  | ||||
| */ | ||||
| void gguf_ex_write_str(std::ofstream & fout, const std::string & val) { | ||||
|     const int32_t n = val.size(); | ||||
|     fout.write((const char *) &n, sizeof(n)); | ||||
| @@ -414,7 +415,7 @@ int main(int argc, char ** argv) { | ||||
|     const std::string fname(argv[1]); | ||||
|     const std::string mode (argv[2]); | ||||
|  | ||||
|     GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w"); | ||||
|     GGML_ASSERT((mode == "r" || mode == "w" || mode == "q") && "mode must be r, w or q"); | ||||
|  | ||||
|     if (mode == "w") { | ||||
|         GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file"); | ||||
| @@ -422,6 +423,9 @@ int main(int argc, char ** argv) { | ||||
|         GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file"); | ||||
|         GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); | ||||
|         GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file"); | ||||
|     } else if (mode == "q") { | ||||
|         llama_model_quantize_params params = llama_model_quantize_default_params(); | ||||
|         llama_model_quantize(fname.c_str(), "quant.gguf", ¶ms); | ||||
|     } | ||||
|  | ||||
|     return 0; | ||||
|   | ||||
| @@ -738,15 +738,19 @@ struct gguf_file_saver { | ||||
|         info_offset  = file.tell(); | ||||
|         size_t count = gguf_get_data_offset(fl->gguf_ctx) - info_offset; | ||||
|         file.write_zeros(count); | ||||
|         printf("info_offset = %zu\n", info_offset); | ||||
|         file.seek(info_offset, SEEK_SET); | ||||
|         GGML_ASSERT(info_offset == file.tell()); | ||||
|     } | ||||
|  | ||||
|     size_t write_tensor_info(llama_load_tensor & tensor) { | ||||
|         size_t total_written = 0; | ||||
|         file.seek(0, info_offset); | ||||
|         file.seek(info_offset, SEEK_SET); | ||||
|         GGML_ASSERT(info_offset == file.tell()); | ||||
|         total_written += file.write_str(tensor.name); | ||||
|  | ||||
|         int32_t n_dims = tensor.ne.size(); | ||||
|         file.write_i32(n_dims); | ||||
|         total_written += file.write_i32(n_dims); | ||||
|         for (int32_t i = 0; i < n_dims; ++i) { | ||||
|             total_written += file.write_i32(i); | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 M. Yusuf Sarıgöz
					M. Yusuf Sarıgöz