mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	add avx2 for dot_q8_0_q8_0, 2x faster than scalar (#1211)
This commit is contained in:
		
							
								
								
									
										18
									
								
								ggml.c
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								ggml.c
									
									
									
									
									
								
							@@ -3626,6 +3626,24 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1);
 | 
					    *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1);
 | 
				
			||||||
 | 
					#elif defined(__AVX2__)
 | 
				
			||||||
 | 
					    // Initialize accumulator with zeros
 | 
				
			||||||
 | 
					    __m256 acc = _mm256_setzero_ps();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Main loop
 | 
				
			||||||
 | 
					    for (int i = 0; i < nb; ++i) {
 | 
				
			||||||
 | 
					        // Compute combined scale for the block
 | 
				
			||||||
 | 
					        const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[i].d ), _mm256_broadcast_ss( &y[i].d ) );
 | 
				
			||||||
 | 
					        __m256i bx = _mm256_loadu_si256((const __m256i *)x[i].qs);
 | 
				
			||||||
 | 
					        __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const __m256 q = mul_sum_i8_pairs_float(bx, by);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Multiply q with scale and accumulate
 | 
				
			||||||
 | 
					        acc = _mm256_fmadd_ps( d, q, acc );
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    *s = hsum_float_8(acc);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    // scalar
 | 
					    // scalar
 | 
				
			||||||
    float sumf = 0.0;
 | 
					    float sumf = 0.0;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user