mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Fix type error in quantize_row_q4_1 for Arm NEON
This commit is contained in:
		 Håkon H. Hitland
					Håkon H. Hitland
				
			
				
					committed by
					
						 Georgi Gerganov
						Georgi Gerganov
					
				
			
			
				
	
			
			
			 Georgi Gerganov
						Georgi Gerganov
					
				
			
						parent
						
							4282f9b0f3
						
					
				
				
					commit
					bd166f7ffc
				
			
							
								
								
									
										2
									
								
								ggml.c
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								ggml.c
									
									
									
									
									
								
							| @@ -800,7 +800,7 @@ static void quantize_row_q4_0(const float * restrict x, void * restrict vy, int | ||||
|             const float32x4_t v  = vmulq_n_f32(srcv[l], id); | ||||
|             const float32x4_t vf = vaddq_f32(v, vdupq_n_f32(8.5f)); | ||||
|             const int32x4_t   vi = vcvtq_s32_f32(vf); | ||||
|             const int32x4     vc = vminq_u32(vi, vdupq_n_u32(15)); | ||||
|             const int32x4_t   vc = vminq_s32(vi, vdupq_n_s32(15)); | ||||
|  | ||||
|             y[i].qs[2*l + 0] = vgetq_lane_s32(vc, 0) | (vgetq_lane_s32(vc, 1) << 4); | ||||
|             y[i].qs[2*l + 1] = vgetq_lane_s32(vc, 2) | (vgetq_lane_s32(vc, 3) << 4); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user