mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	Guard against all weights in a super-block being zero (#3010)
* Guard against all weights in a super-block being zero * Also guard against extremely small weights Closes #2982 --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
		| @@ -83,7 +83,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t * | |||||||
|         float ax = fabsf(x[i]); |         float ax = fabsf(x[i]); | ||||||
|         if (ax > amax) { amax = ax; max = x[i]; } |         if (ax > amax) { amax = ax; max = x[i]; } | ||||||
|     } |     } | ||||||
|     if (!amax) { // all zero |     if (amax < 1e-30f) { // all zero | ||||||
|         for (int i = 0; i < n; ++i) { |         for (int i = 0; i < n; ++i) { | ||||||
|             L[i] = 0; |             L[i] = 0; | ||||||
|         } |         } | ||||||
| @@ -1086,6 +1086,12 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict | |||||||
|  |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         if (!max_abs_scale) { | ||||||
|  |             memset(&y[i], 0, sizeof(block_q6_K)); | ||||||
|  |             y[i].d = ggml_fp32_to_fp16(0.f); | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         float iscale = -128.f/max_scale; |         float iscale = -128.f/max_scale; | ||||||
|         y[i].d = ggml_fp32_to_fp16(1/iscale); |         y[i].d = ggml_fp32_to_fp16(1/iscale); | ||||||
|         for (int ib = 0; ib < QK_K/16; ++ib) { |         for (int ib = 0; ib < QK_K/16; ++ib) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Kawrakow
					Kawrakow