mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml: fix loongarch quantize_row_q8_1 error (#14827)
This commit is contained in:
		| @@ -544,7 +544,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i | |||||||
|         __m128 max4 = __lsx_vfmax_s( lasx_extractf128( max_abs, 1 ), lasx_extractf128( max_abs, 0) ); |         __m128 max4 = __lsx_vfmax_s( lasx_extractf128( max_abs, 1 ), lasx_extractf128( max_abs, 0) ); | ||||||
|         max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vpickod_d((__m128i) max4, (__m128i)max4 ) ); |         max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vpickod_d((__m128i) max4, (__m128i)max4 ) ); | ||||||
|         __m128 tmp = max4; |         __m128 tmp = max4; | ||||||
|         max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vextrins_w((__m128i)tmp, (__m128i)max4, 0x10 )); |         max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vextrins_w((__m128i)tmp, (__m128i)max4, 0x1 )); | ||||||
|         const float max_scalar = ((v4f32)max4)[0]; |         const float max_scalar = ((v4f32)max4)[0]; | ||||||
|  |  | ||||||
|         // Quantize these floats |         // Quantize these floats | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 lixing-star
					lixing-star