mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml-quants : fix some edge cases in make_qkxh_nl_quants
This commit is contained in:
		| @@ -1149,10 +1149,11 @@ static float make_qkxh_nl_quants(int n, const float * GGML_RESTRICT x, const flo | |||||||
|             amax = ax; |             amax = ax; | ||||||
|             amax_i = i; |             amax_i = i; | ||||||
|         } |         } | ||||||
|         Laux[i] = k_heap->mid_k; |  | ||||||
|         sumlx += w * x[i] * kmin; |         sumlx += w * x[i] * kmin; | ||||||
|         suml2 += w * kmin * kmin; |         suml2 += w * kmin * kmin; | ||||||
|     } |     } | ||||||
|  |     memset(Laux, k_heap->mid_k, n); | ||||||
|  |     memset(L, k_heap->mid_k, n); | ||||||
|  |  | ||||||
|     const bool neg_scale = signed_scale && fast ? (x[amax_i] < 0.0f) != (k_heap->kmax < 0) : false; |     const bool neg_scale = signed_scale && fast ? (x[amax_i] < 0.0f) != (k_heap->kmax < 0) : false; | ||||||
|  |  | ||||||
| @@ -1163,19 +1164,17 @@ static float make_qkxh_nl_quants(int n, const float * GGML_RESTRICT x, const flo | |||||||
|     float best_suml2; |     float best_suml2; | ||||||
|     if (suml2 != 0.0f) { |     if (suml2 != 0.0f) { | ||||||
|         best = sumlx * sumlx; |         best = sumlx * sumlx; | ||||||
|         best_sumlx = neg_scale ? -sumlx : sumlx; |         best_sumlx = sumlx; // can't change the sign of kmin | ||||||
|         best_suml2 = suml2 != 0.0f ? suml2 : 1.0f; |         best_suml2 = suml2; | ||||||
|     } else { |     } else { | ||||||
|         best = 0.0f; |         best = 0.0f; | ||||||
|         best_sumlx = 0.0f; |         best_sumlx = 0.0f; | ||||||
|         best_suml2 = 1.0f; |         best_suml2 = 1.0f; | ||||||
|     } |     } | ||||||
|     { |  | ||||||
|     float sumlx_p = neg_scale ? -sumlx : sumlx; |     float sumlx_p = neg_scale ? -sumlx : sumlx; | ||||||
|     float suml2_p = suml2; |     float suml2_p = suml2; | ||||||
|         int best_p_i = -2; // not consecutive with 0..n_frac |     int best_p_i = -1; // consecutive with 0..n_frac | ||||||
|         int i = 0; |     for (int i = 0; k_heap->n > 0; ++i) { | ||||||
|         while (k_heap->n > 0) { |  | ||||||
|         struct fraction frac = k_heap_pop(k_heap); |         struct fraction frac = k_heap_pop(k_heap); | ||||||
|         const int ii = frac.i; |         const int ii = frac.i; | ||||||
|         const float w = weights ? weights[ii] : x[ii] * x[ii]; |         const float w = weights ? weights[ii] : x[ii] * x[ii]; | ||||||
| @@ -1191,29 +1190,23 @@ static float make_qkxh_nl_quants(int n, const float * GGML_RESTRICT x, const flo | |||||||
|                 // reduce copies for consecutive bests |                 // reduce copies for consecutive bests | ||||||
|                 L[ii] += (x[ii] < 0.0f) != neg_scale ? -1 : 1; |                 L[ii] += (x[ii] < 0.0f) != neg_scale ? -1 : 1; | ||||||
|             } else { |             } else { | ||||||
|                     for (int j = 0; j < n; ++j) { |                 memcpy(L, Laux, n); | ||||||
|                         L[j] = Laux[j]; |  | ||||||
|                     } |  | ||||||
|             } |             } | ||||||
|             best_p_i = i; |             best_p_i = i; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Non-linear mappings are usually not symmetric, so try negating the scale |     // Non-linear mappings are usually not symmetric, so try negating the scale | ||||||
|     // This is the same as above, but keeping the old best if the new best is not better. |     // This is the same as above, but keeping the old best if the new best is not better. | ||||||
|     if (signed_scale && !fast) { |     if (signed_scale && !fast) { | ||||||
|         for (int i = 0; i < n; ++i) { |         memset(Laux, k_heap->mid_k, n); | ||||||
|             Laux[i] = k_heap->mid_k; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         k_heap_set_x(k_heap, x, n, true); |         k_heap_set_x(k_heap, x, n, true); | ||||||
|  |  | ||||||
|         float sumlx_n = -sumlx; |         float sumlx_n = -sumlx; | ||||||
|         float suml2_n = suml2; |         float suml2_n = suml2; | ||||||
|         int best_n_i = -2; // not consecutive with 0..n_frac |         int best_n_i = -2; // not consecutive with 0..n_frac | ||||||
|         int i = 0; |         for (int i = 0; k_heap->n > 0; ++i) { | ||||||
|         while (k_heap->n > 0) { |  | ||||||
|             struct fraction frac = k_heap_pop(k_heap); |             struct fraction frac = k_heap_pop(k_heap); | ||||||
|             const int ii = frac.i; |             const int ii = frac.i; | ||||||
|             const float w = weights ? weights[ii] : x[ii] * x[ii]; |             const float w = weights ? weights[ii] : x[ii] * x[ii]; | ||||||
| @@ -1229,13 +1222,10 @@ static float make_qkxh_nl_quants(int n, const float * GGML_RESTRICT x, const flo | |||||||
|                     // reduce copies for consecutive bests |                     // reduce copies for consecutive bests | ||||||
|                     L[ii] += x[ii] >= 0.0f ? -1 : 1; |                     L[ii] += x[ii] >= 0.0f ? -1 : 1; | ||||||
|                 } else { |                 } else { | ||||||
|                     for (int j = 0; j < n; ++j) { |                     memcpy(L, Laux, n); | ||||||
|                         L[j] = Laux[j]; |  | ||||||
|                     } |  | ||||||
|                 } |                 } | ||||||
|                 best_n_i = i; |                 best_n_i = i; | ||||||
|             } |             } | ||||||
|             ++i; |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin