mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	ggml-cpu: reduce asm calls for hsum (#14037)
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
		@@ -944,10 +944,8 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
 | 
				
			|||||||
    for (int i = 0; i < offset; ++i) {              \
 | 
					    for (int i = 0; i < offset; ++i) {              \
 | 
				
			||||||
        x[i] = vec_add(x[i], x[offset + i]);        \
 | 
					        x[i] = vec_add(x[i], x[offset + i]);        \
 | 
				
			||||||
    }                                               \
 | 
					    }                                               \
 | 
				
			||||||
    res = vec_extract(x[0], 0) +                    \
 | 
					    float32x4_t tmp = x[0] + vec_reve(x[0]);        \
 | 
				
			||||||
          vec_extract(x[0], 1) +                    \
 | 
					    res = tmp[0] + tmp[1];                          \
 | 
				
			||||||
          vec_extract(x[0], 2) +                    \
 | 
					 | 
				
			||||||
          vec_extract(x[0], 3);                     \
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GGML_F32_VEC        GGML_F32x4
 | 
					#define GGML_F32_VEC        GGML_F32x4
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user