mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	Enable Fused-Multiply-Add (FMA) and F16C/CVT16 vector extensions on MSVC (#375)
* Enable Fused-Multiply-Add (FMA) instructions on MSVC __FMA__ macro does not exist in MSVC * Enable F16C/CVT16 vector extensions on MSVC __F16C__ macro does not exist in MSVC, but is implied with AVX2/AVX512 * MSVC cvt intrinsics * Add __SSE3__ macro for MSVC too because why not even though it's not currently used for anything when AVX is defined
This commit is contained in:
		
							
								
								
									
										18
									
								
								ggml.c
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								ggml.c
									
									
									
									
									
								
							@@ -79,6 +79,19 @@ static int sched_yield (void) {
 | 
				
			|||||||
typedef void* thread_ret_t;
 | 
					typedef void* thread_ret_t;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
 | 
				
			||||||
 | 
					#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
 | 
				
			||||||
 | 
					#ifndef __FMA__
 | 
				
			||||||
 | 
					#define __FMA__
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifndef __F16C__
 | 
				
			||||||
 | 
					#define __F16C__
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifndef __SSE3__
 | 
				
			||||||
 | 
					#define __SSE3__
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef __HAIKU__
 | 
					#ifdef __HAIKU__
 | 
				
			||||||
#define static_assert(cond, msg) _Static_assert(cond, msg)
 | 
					#define static_assert(cond, msg) _Static_assert(cond, msg)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -172,8 +185,13 @@ typedef double ggml_float;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#ifdef __F16C__
 | 
					#ifdef __F16C__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef _MSC_VER
 | 
				
			||||||
 | 
					#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
 | 
				
			||||||
 | 
					#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
 | 
					#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
 | 
				
			||||||
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
 | 
					#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#elif defined(__POWER9_VECTOR__)
 | 
					#elif defined(__POWER9_VECTOR__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user