mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : prevent usage of k-quants when tensor size is not a multiple of 256 (#1921)
* Fix examples/metal * k-quants: prevent usage when tensor size is not divisible by 256 --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
		
							
								
								
									
										16
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -19,6 +19,11 @@ | ||||
| #ifdef GGML_USE_METAL | ||||
| #include "ggml-metal.h" | ||||
| #endif | ||||
| #ifdef GGML_USE_K_QUANTS | ||||
| #ifndef QK_K | ||||
| #define QK_K 256 | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| #include <array> | ||||
| #include <ctime> | ||||
| @@ -2491,6 +2496,17 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s | ||||
|         } else { | ||||
|             new_type = quantized_type; | ||||
| #ifdef GGML_USE_K_QUANTS | ||||
|             if (quantized_type == GGML_TYPE_Q2_K || quantized_type == GGML_TYPE_Q3_K || quantized_type == GGML_TYPE_Q4_K || | ||||
|                 quantized_type == GGML_TYPE_Q5_K || quantized_type == GGML_TYPE_Q6_K) { | ||||
|                 int nx = tensor.ne.at(0); | ||||
|                 int ny = tensor.ne.at(0); | ||||
|                 if (nx % QK_K != 0 || ny % QK_K != 0) { | ||||
|                     fprintf(stderr, "\n\n========================= Tensor sizes %d x %d are not divisible by %d\n",nx,ny,QK_K); | ||||
|                     fprintf(stderr, "This is required to be able to use k-quants for now!\n"); | ||||
|                     fprintf(stderr, "========================================================================================\n\n"); | ||||
|                     throw std::runtime_error("Unsupported tensor size encountered\n"); | ||||
|                 } | ||||
|             } | ||||
|             if (tensor.name == "output.weight") { | ||||
|                new_type = GGML_TYPE_Q6_K; | ||||
|             } else if (tensor.name.find("attention.wv.weight") != std::string::npos) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user