mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml : add SSE 4.2 and x64 base variant for CPUs without AVX (#12871)
* ggml : add SSE 4.2 variant for CPUs without AVX * ggml : add x64 base ABI variant
This commit is contained in:
		| @@ -107,6 +107,7 @@ message(DEBUG "INS_ENB             : ${INS_ENB}") | |||||||
| option(GGML_CPU_HBM          "ggml: use memkind for CPU HBM" OFF) | option(GGML_CPU_HBM          "ggml: use memkind for CPU HBM" OFF) | ||||||
| option(GGML_CPU_AARCH64      "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON) | option(GGML_CPU_AARCH64      "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON) | ||||||
| option(GGML_CPU_KLEIDIAI     "ggml: use KleidiAI optimized kernels if applicable" OFF) | option(GGML_CPU_KLEIDIAI     "ggml: use KleidiAI optimized kernels if applicable" OFF) | ||||||
|  | option(GGML_SSE42            "ggml: enable SSE 4.2"          ${INS_ENB}) | ||||||
| option(GGML_AVX              "ggml: enable AVX"              ${INS_ENB}) | option(GGML_AVX              "ggml: enable AVX"              ${INS_ENB}) | ||||||
| option(GGML_AVX_VNNI         "ggml: enable AVX-VNNI"         OFF) | option(GGML_AVX_VNNI         "ggml: enable AVX-VNNI"         OFF) | ||||||
| option(GGML_AVX2             "ggml: enable AVX2"             ${INS_ENB}) | option(GGML_AVX2             "ggml: enable AVX2"             ${INS_ENB}) | ||||||
|   | |||||||
| @@ -267,6 +267,7 @@ function(ggml_add_cpu_backend_variant tag_name) | |||||||
|     set(GGML_CPU_TAG_NAME ${tag_name}) |     set(GGML_CPU_TAG_NAME ${tag_name}) | ||||||
|     # other: OPENMP LLAMAFILE CPU_HBM |     # other: OPENMP LLAMAFILE CPU_HBM | ||||||
|     foreach (feat NATIVE |     foreach (feat NATIVE | ||||||
|  |                   SSE42 | ||||||
|                   AVX AVX2 BMI2 AVX_VNNI FMA F16C |                   AVX AVX2 BMI2 AVX_VNNI FMA F16C | ||||||
|                   AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 |                   AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 | ||||||
|                   AMX_TILE AMX_INT8 AMX_BF16) |                   AMX_TILE AMX_INT8 AMX_BF16) | ||||||
| @@ -286,14 +287,16 @@ if (GGML_CPU_ALL_VARIANTS) | |||||||
|     if (NOT GGML_BACKEND_DL) |     if (NOT GGML_BACKEND_DL) | ||||||
|         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") |         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") | ||||||
|     endif() |     endif() | ||||||
|     ggml_add_cpu_backend_variant(sandybridge    AVX) |     ggml_add_cpu_backend_variant(x64) | ||||||
|     ggml_add_cpu_backend_variant(haswell        AVX F16C AVX2 BMI2 FMA) |     ggml_add_cpu_backend_variant(sse42        SSE42) | ||||||
|     ggml_add_cpu_backend_variant(skylakex       AVX F16C AVX2 BMI2 FMA AVX512) |     ggml_add_cpu_backend_variant(sandybridge  SSE42 AVX) | ||||||
|     ggml_add_cpu_backend_variant(icelake        AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI) |     ggml_add_cpu_backend_variant(haswell      SSE42 AVX F16C AVX2 BMI2 FMA) | ||||||
|     ggml_add_cpu_backend_variant(alderlake      AVX F16C AVX2 BMI2 FMA AVX_VNNI) |     ggml_add_cpu_backend_variant(skylakex     SSE42 AVX F16C AVX2 BMI2 FMA AVX512) | ||||||
|  |     ggml_add_cpu_backend_variant(icelake      SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI) | ||||||
|  |     ggml_add_cpu_backend_variant(alderlake    SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI) | ||||||
|     if (NOT MSVC) |     if (NOT MSVC) | ||||||
|         # MSVC doesn't support AMX |         # MSVC doesn't support AMX | ||||||
|         ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) |         ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) | ||||||
|     endif() |     endif() | ||||||
| elseif (GGML_CPU) | elseif (GGML_CPU) | ||||||
|     ggml_add_cpu_backend_variant_impl("") |     ggml_add_cpu_backend_variant_impl("") | ||||||
|   | |||||||
| @@ -222,7 +222,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) | |||||||
|             elseif (GGML_AVX) |             elseif (GGML_AVX) | ||||||
|                 list(APPEND ARCH_FLAGS /arch:AVX) |                 list(APPEND ARCH_FLAGS /arch:AVX) | ||||||
|                 list(APPEND ARCH_DEFINITIONS GGML_AVX) |                 list(APPEND ARCH_DEFINITIONS GGML_AVX) | ||||||
|             else () |             elseif (GGML_SSE42) | ||||||
|                 list(APPEND ARCH_FLAGS /arch:SSE4.2) |                 list(APPEND ARCH_FLAGS /arch:SSE4.2) | ||||||
|                 list(APPEND ARCH_DEFINITIONS GGML_SSE42) |                 list(APPEND ARCH_DEFINITIONS GGML_SSE42) | ||||||
|             endif() |             endif() | ||||||
| @@ -237,8 +237,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name) | |||||||
|             if (GGML_NATIVE) |             if (GGML_NATIVE) | ||||||
|                 list(APPEND ARCH_FLAGS -march=native) |                 list(APPEND ARCH_FLAGS -march=native) | ||||||
|             else () |             else () | ||||||
|                 list(APPEND ARCH_FLAGS -msse4.2) |                 if (GGML_SSE42) | ||||||
|                 list(APPEND ARCH_DEFINITIONS GGML_SSE42) |                     list(APPEND ARCH_FLAGS -msse4.2) | ||||||
|  |                     list(APPEND ARCH_DEFINITIONS GGML_SSE42) | ||||||
|  |                 endif() | ||||||
|                 if (GGML_F16C) |                 if (GGML_F16C) | ||||||
|                     list(APPEND ARCH_FLAGS -mf16c) |                     list(APPEND ARCH_FLAGS -mf16c) | ||||||
|                     list(APPEND ARCH_DEFINITIONS GGML_F16C) |                     list(APPEND ARCH_DEFINITIONS GGML_F16C) | ||||||
|   | |||||||
| @@ -263,7 +263,7 @@ void test_x86_is() { | |||||||
| static int ggml_backend_cpu_x86_score() { | static int ggml_backend_cpu_x86_score() { | ||||||
|     // FIXME: this does not check for OS support |     // FIXME: this does not check for OS support | ||||||
|  |  | ||||||
|     int score = 0; |     int score = 1; | ||||||
|     cpuid_x86 is; |     cpuid_x86 is; | ||||||
|  |  | ||||||
| #ifdef GGML_FMA | #ifdef GGML_FMA | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Diego Devesa
					Diego Devesa