mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml : add SSE 4.2 and x64 base variant for CPUs without AVX (#12871)
* ggml : add SSE 4.2 variant for CPUs without AVX * ggml : add x64 base ABI variant
This commit is contained in:
		| @@ -107,6 +107,7 @@ message(DEBUG "INS_ENB             : ${INS_ENB}") | ||||
| option(GGML_CPU_HBM          "ggml: use memkind for CPU HBM" OFF) | ||||
| option(GGML_CPU_AARCH64      "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON) | ||||
| option(GGML_CPU_KLEIDIAI     "ggml: use KleidiAI optimized kernels if applicable" OFF) | ||||
| option(GGML_SSE42            "ggml: enable SSE 4.2"          ${INS_ENB}) | ||||
| option(GGML_AVX              "ggml: enable AVX"              ${INS_ENB}) | ||||
| option(GGML_AVX_VNNI         "ggml: enable AVX-VNNI"         OFF) | ||||
| option(GGML_AVX2             "ggml: enable AVX2"             ${INS_ENB}) | ||||
|   | ||||
| @@ -267,6 +267,7 @@ function(ggml_add_cpu_backend_variant tag_name) | ||||
|     set(GGML_CPU_TAG_NAME ${tag_name}) | ||||
|     # other: OPENMP LLAMAFILE CPU_HBM | ||||
|     foreach (feat NATIVE | ||||
|                   SSE42 | ||||
|                   AVX AVX2 BMI2 AVX_VNNI FMA F16C | ||||
|                   AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 | ||||
|                   AMX_TILE AMX_INT8 AMX_BF16) | ||||
| @@ -286,14 +287,16 @@ if (GGML_CPU_ALL_VARIANTS) | ||||
|     if (NOT GGML_BACKEND_DL) | ||||
|         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") | ||||
|     endif() | ||||
|     ggml_add_cpu_backend_variant(sandybridge    AVX) | ||||
|     ggml_add_cpu_backend_variant(haswell        AVX F16C AVX2 BMI2 FMA) | ||||
|     ggml_add_cpu_backend_variant(skylakex       AVX F16C AVX2 BMI2 FMA AVX512) | ||||
|     ggml_add_cpu_backend_variant(icelake        AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI) | ||||
|     ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 BMI2 FMA AVX_VNNI) | ||||
|     ggml_add_cpu_backend_variant(x64) | ||||
|     ggml_add_cpu_backend_variant(sse42        SSE42) | ||||
|     ggml_add_cpu_backend_variant(sandybridge  SSE42 AVX) | ||||
|     ggml_add_cpu_backend_variant(haswell      SSE42 AVX F16C AVX2 BMI2 FMA) | ||||
|     ggml_add_cpu_backend_variant(skylakex     SSE42 AVX F16C AVX2 BMI2 FMA AVX512) | ||||
|     ggml_add_cpu_backend_variant(icelake      SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI) | ||||
|     ggml_add_cpu_backend_variant(alderlake    SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI) | ||||
|     if (NOT MSVC) | ||||
|         # MSVC doesn't support AMX | ||||
|         ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) | ||||
|         ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) | ||||
|     endif() | ||||
| elseif (GGML_CPU) | ||||
|     ggml_add_cpu_backend_variant_impl("") | ||||
|   | ||||
| @@ -222,7 +222,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) | ||||
|             elseif (GGML_AVX) | ||||
|                 list(APPEND ARCH_FLAGS /arch:AVX) | ||||
|                 list(APPEND ARCH_DEFINITIONS GGML_AVX) | ||||
|             else () | ||||
|             elseif (GGML_SSE42) | ||||
|                 list(APPEND ARCH_FLAGS /arch:SSE4.2) | ||||
|                 list(APPEND ARCH_DEFINITIONS GGML_SSE42) | ||||
|             endif() | ||||
| @@ -237,8 +237,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name) | ||||
|             if (GGML_NATIVE) | ||||
|                 list(APPEND ARCH_FLAGS -march=native) | ||||
|             else () | ||||
|                 if (GGML_SSE42) | ||||
|                     list(APPEND ARCH_FLAGS -msse4.2) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_SSE42) | ||||
|                 endif() | ||||
|                 if (GGML_F16C) | ||||
|                     list(APPEND ARCH_FLAGS -mf16c) | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_F16C) | ||||
|   | ||||
| @@ -263,7 +263,7 @@ void test_x86_is() { | ||||
| static int ggml_backend_cpu_x86_score() { | ||||
|     // FIXME: this does not check for OS support | ||||
|  | ||||
|     int score = 0; | ||||
|     int score = 1; | ||||
|     cpuid_x86 is; | ||||
|  | ||||
| #ifdef GGML_FMA | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Diego Devesa
					Diego Devesa