mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml : fix q2_k bpw in comments (ggml/680)
This commit is contained in:
		| @@ -70,7 +70,7 @@ static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block s | |||||||
| // 2-bit quantization | // 2-bit quantization | ||||||
| // weight is represented as x = a * q + b | // weight is represented as x = a * q + b | ||||||
| // 16 blocks of 16 elements each | // 16 blocks of 16 elements each | ||||||
| // Effectively 2.5625 bits per weight | // Effectively 2.625 bits per weight | ||||||
| typedef struct { | typedef struct { | ||||||
|     uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits |     uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits | ||||||
|     uint8_t qs[QK_K/4];      // quants |     uint8_t qs[QK_K/4];      // quants | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov