mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	ggml-quants : attempt to fix Arm 32-bit support
This commit is contained in:
		@@ -177,7 +177,7 @@ typedef __fp16 ggml_fp16_internal_t;
 | 
			
		||||
 | 
			
		||||
// 32-bit ARM compatibility
 | 
			
		||||
 | 
			
		||||
// vaddvq_s16
 | 
			
		||||
// vaddlvq_s16
 | 
			
		||||
// vpaddq_s16
 | 
			
		||||
// vpaddq_s32
 | 
			
		||||
// vaddvq_s32
 | 
			
		||||
@@ -187,12 +187,9 @@ typedef __fp16 ggml_fp16_internal_t;
 | 
			
		||||
// vzip1_u8
 | 
			
		||||
// vzip2_u8
 | 
			
		||||
 | 
			
		||||
inline static int32_t vaddvq_s16(int16x8_t v) {
 | 
			
		||||
    return
 | 
			
		||||
        (int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
 | 
			
		||||
        (int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) +
 | 
			
		||||
        (int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) +
 | 
			
		||||
        (int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7);
 | 
			
		||||
inline static int32_t vaddlvq_s16(int16x8_t v) {
 | 
			
		||||
    int32x4_t v0 = vreinterpretq_s32_s64(vpaddlq_s32(vpaddlq_s16(v)));
 | 
			
		||||
    return vgetq_lane_s32(v0, 0) + vgetq_lane_s32(v0, 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
 | 
			
		||||
 
 | 
			
		||||
@@ -11483,10 +11483,10 @@ void ggml_vec_dot_q1_3_q8_0(int n, float * restrict s, size_t bs, const void * r
 | 
			
		||||
        // WARNING: reading 3 bytes further than necessary
 | 
			
		||||
        const uint8x16_t x13b = vld1q_u8((const uint8_t *) x);
 | 
			
		||||
 | 
			
		||||
        uint8x16_t x0 = vqtbl1q_u8(x13b, mask0);
 | 
			
		||||
        uint8x16_t x1 = vqtbl1q_u8(x13b, mask1);
 | 
			
		||||
        uint8x16_t x2 = vqtbl1q_u8(x13b, mask2);
 | 
			
		||||
        uint8x16_t x3 = vqtbl1q_u8(x13b, mask3);
 | 
			
		||||
        uint8x16_t x0 = ggml_vqtbl1q_u8(x13b, mask0);
 | 
			
		||||
        uint8x16_t x1 = ggml_vqtbl1q_u8(x13b, mask1);
 | 
			
		||||
        uint8x16_t x2 = ggml_vqtbl1q_u8(x13b, mask2);
 | 
			
		||||
        uint8x16_t x3 = ggml_vqtbl1q_u8(x13b, mask3);
 | 
			
		||||
 | 
			
		||||
        x0 = vmulq_u8(x0, shift0);
 | 
			
		||||
        x1 = vmulq_u8(x1, shift0);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user