mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	k-quants : fix comments about block sizing (#3499)
This commit is contained in:
		
							
								
								
									
										10
									
								
								k_quants.h
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								k_quants.h
									
									
									
									
									
								
							| @@ -29,7 +29,7 @@ | |||||||
|  |  | ||||||
| // 2-bit quantization | // 2-bit quantization | ||||||
| // weight is represented as x = a * q + b | // weight is represented as x = a * q + b | ||||||
| // 16 blocks of 16 elemenets each | // 16 blocks of 16 elements each | ||||||
| // Effectively 2.5625 bits per weight | // Effectively 2.5625 bits per weight | ||||||
| typedef struct { | typedef struct { | ||||||
|     uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits |     uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits | ||||||
| @@ -41,7 +41,7 @@ static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "w | |||||||
|  |  | ||||||
| // 3-bit quantization | // 3-bit quantization | ||||||
| // weight is represented as x = a * q | // weight is represented as x = a * q | ||||||
| // 16 blocks of 16 elemenets each | // 16 blocks of 16 elements each | ||||||
| // Effectively 3.4375 bits per weight | // Effectively 3.4375 bits per weight | ||||||
| #ifdef GGML_QKK_64 | #ifdef GGML_QKK_64 | ||||||
| typedef struct { | typedef struct { | ||||||
| @@ -62,7 +62,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + | |||||||
| #endif | #endif | ||||||
|  |  | ||||||
| // 4-bit quantization | // 4-bit quantization | ||||||
| // 16 blocks of 32 elements each | // 8 blocks of 32 elements each | ||||||
| // weight is represented as x = a * q + b | // weight is represented as x = a * q + b | ||||||
| // Effectively 4.5 bits per weight | // Effectively 4.5 bits per weight | ||||||
| #ifdef GGML_QKK_64 | #ifdef GGML_QKK_64 | ||||||
| @@ -83,7 +83,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/ | |||||||
| #endif | #endif | ||||||
|  |  | ||||||
| // 5-bit quantization | // 5-bit quantization | ||||||
| // 16 blocks of 32 elements each | // 8 blocks of 32 elements each | ||||||
| // weight is represented as x = a * q + b | // weight is represented as x = a * q + b | ||||||
| // Effectively 5.5 bits per weight | // Effectively 5.5 bits per weight | ||||||
| #ifdef GGML_QKK_64 | #ifdef GGML_QKK_64 | ||||||
| @@ -107,7 +107,7 @@ static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/ | |||||||
|  |  | ||||||
| // 6-bit quantization | // 6-bit quantization | ||||||
| // weight is represented as x = a * q | // weight is represented as x = a * q | ||||||
| // 16 blocks of 16 elemenets each | // 16 blocks of 16 elements each | ||||||
| // Effectively 6.5625 bits per weight | // Effectively 6.5625 bits per weight | ||||||
| typedef struct { | typedef struct { | ||||||
|     uint8_t ql[QK_K/2];      // quants, lower 4 bits |     uint8_t ql[QK_K/2];      // quants, lower 4 bits | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Johannes Rudolph
					Johannes Rudolph