mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	gguf : quantization is working
This commit is contained in:
		| @@ -421,7 +421,7 @@ int main(int argc, char ** argv) { | |||||||
|         GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file"); |         GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file"); | ||||||
|     } else if (mode == "r") { |     } else if (mode == "r") { | ||||||
|         GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file"); |         GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file"); | ||||||
|         //GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); |         GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); | ||||||
|         GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file"); |         GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file"); | ||||||
|     } else if (mode == "q") { |     } else if (mode == "q") { | ||||||
|         llama_model_quantize_params params = llama_model_quantize_default_params(); |         llama_model_quantize_params params = llama_model_quantize_default_params(); | ||||||
|   | |||||||
| @@ -752,7 +752,6 @@ struct gguf_file_saver { | |||||||
|         file.seek(info_offset, SEEK_SET); |         file.seek(info_offset, SEEK_SET); | ||||||
|         GGML_ASSERT(info_offset == file.tell()); |         GGML_ASSERT(info_offset == file.tell()); | ||||||
|         total_written += file.write_str(tensor.name); |         total_written += file.write_str(tensor.name); | ||||||
| printf("total_written = %zu, name = %s\n", total_written, tensor.name.c_str()); |  | ||||||
|  |  | ||||||
|         int32_t n_dims = tensor.ne.size(); |         int32_t n_dims = tensor.ne.size(); | ||||||
|         total_written += file.write_i32(n_dims); |         total_written += file.write_i32(n_dims); | ||||||
| @@ -765,7 +764,6 @@ printf("total_written = %zu, name = %s\n", total_written, tensor.name.c_str()); | |||||||
|         info_offset   += total_written; |         info_offset   += total_written; | ||||||
|  |  | ||||||
|         file.seek(0, SEEK_END); |         file.seek(0, SEEK_END); | ||||||
|         printf("total_written = %zu\n", total_written); |  | ||||||
|          |          | ||||||
|         return total_written; |         return total_written; | ||||||
|     } |     } | ||||||
| @@ -936,8 +934,7 @@ struct llama_model_loader { | |||||||
|         } else { |         } else { | ||||||
|             gguf_file & file = file_loader->file; |             gguf_file & file = file_loader->file; | ||||||
|             file.seek(lt.file_off, SEEK_SET); |             file.seek(lt.file_off, SEEK_SET); | ||||||
|             // TODO |             file.read_raw(lt.data, lt.size); | ||||||
|             //file.read_raw(lt.data, lt.size); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (0) { |         if (0) { | ||||||
|   | |||||||
| @@ -131,6 +131,10 @@ struct gguf_file { | |||||||
|         fwrite(data, size, 1, fp); |         fwrite(data, size, 1, fp); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void read_raw(void * data, size_t size) { | ||||||
|  |         fread(data, size, 1, fp); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     template<typename T> |     template<typename T> | ||||||
|     void write_val(const std::string & key, enum gguf_type type, const T & val) { |     void write_val(const std::string & key, enum gguf_type type, const T & val) { | ||||||
|         write_str(key); |         write_str(key); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 M. Yusuf Sarıgöz
					M. Yusuf Sarıgöz