mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Implement GGML_CPU_ALL_VARIANTS for ARM (#14080)
* ggml-cpu: Factor out feature detection build from x86 * ggml-cpu: Add ARM feature detection and scoring This is analogous to cpu-feats-x86.cpp. However, to detect compile-time activation of features, we rely on GGML_USE_<FEAT> which need to be set in cmake, instead of GGML_<FEAT> that users would set for x86. This is because on ARM, users specify features with GGML_CPU_ARM_ARCH, rather than with individual flags. * ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for ARM Like x86, however to pass around arch flags within cmake, we use GGML_INTERNAL_<FEAT> as we don't have GGML_<FEAT>. Some features are optional, so we may need to build multiple backends per arch version (armv8.2_1, armv8.2_2, ...), and let the scoring function sort out which one can be used. * ggml-cpu: Limit ARM GGML_CPU_ALL_VARIANTS to Linux for now The other platforms will need their own specific variants. This also fixes the bug that the the variant-building branch was always being executed as the else-branch of GGML_NATIVE=OFF. The branch is moved to an elseif-branch which restores the previous behavior.
This commit is contained in:
		 Christian Kastner
					Christian Kastner
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							d4e0d95cf5
						
					
				
				
					commit
					532802f938
				
			| @@ -270,6 +270,7 @@ endfunction() | |||||||
| function(ggml_add_cpu_backend_variant tag_name) | function(ggml_add_cpu_backend_variant tag_name) | ||||||
|     set(GGML_CPU_TAG_NAME ${tag_name}) |     set(GGML_CPU_TAG_NAME ${tag_name}) | ||||||
|     # other: OPENMP LLAMAFILE CPU_HBM |     # other: OPENMP LLAMAFILE CPU_HBM | ||||||
|  |     if (GGML_SYSTEM_ARCH STREQUAL "x86") | ||||||
|         foreach (feat NATIVE |         foreach (feat NATIVE | ||||||
|                       SSE42 |                       SSE42 | ||||||
|                       AVX AVX2 BMI2 AVX_VNNI FMA F16C |                       AVX AVX2 BMI2 AVX_VNNI FMA F16C | ||||||
| @@ -281,6 +282,11 @@ function(ggml_add_cpu_backend_variant tag_name) | |||||||
|         foreach (feat ${ARGN}) |         foreach (feat ${ARGN}) | ||||||
|             set(GGML_${feat} ON) |             set(GGML_${feat} ON) | ||||||
|         endforeach() |         endforeach() | ||||||
|  |     elseif (GGML_SYSTEM_ARCH STREQUAL "ARM") | ||||||
|  |         foreach (feat ${ARGN}) | ||||||
|  |             set(GGML_INTERNAL_${feat} ON) | ||||||
|  |         endforeach() | ||||||
|  |     endif() | ||||||
|  |  | ||||||
|     ggml_add_cpu_backend_variant_impl(${tag_name}) |     ggml_add_cpu_backend_variant_impl(${tag_name}) | ||||||
| endfunction() | endfunction() | ||||||
| @@ -290,6 +296,8 @@ ggml_add_backend(CPU) | |||||||
| if (GGML_CPU_ALL_VARIANTS) | if (GGML_CPU_ALL_VARIANTS) | ||||||
|     if (NOT GGML_BACKEND_DL) |     if (NOT GGML_BACKEND_DL) | ||||||
|         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") |         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") | ||||||
|  |     elseif (GGML_CPU_ARM_ARCH) | ||||||
|  |         message(FATAL_ERROR "Cannot use both GGML_CPU_ARM_ARCH and GGML_CPU_ALL_VARIANTS") | ||||||
|     endif() |     endif() | ||||||
|     if (GGML_SYSTEM_ARCH STREQUAL "x86") |     if (GGML_SYSTEM_ARCH STREQUAL "x86") | ||||||
|         ggml_add_cpu_backend_variant(x64) |         ggml_add_cpu_backend_variant(x64) | ||||||
| @@ -303,8 +311,20 @@ if (GGML_CPU_ALL_VARIANTS) | |||||||
|             # MSVC doesn't support AMX |             # MSVC doesn't support AMX | ||||||
|             ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) |             ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) | ||||||
|         endif() |         endif() | ||||||
|  |     elseif(GGML_SYSTEM_ARCH STREQUAL "ARM" AND CMAKE_SYSTEM_NAME MATCHES "Linux") | ||||||
|  |         # Many of these features are optional so we build versions with popular | ||||||
|  |         # combinations and name the backends based on the version they were | ||||||
|  |         # first released with | ||||||
|  |         ggml_add_cpu_backend_variant(armv8.0_1) | ||||||
|  |         ggml_add_cpu_backend_variant(armv8.2_1    DOTPROD) | ||||||
|  |         ggml_add_cpu_backend_variant(armv8.2_2    DOTPROD FP16_VECTOR_ARITHMETIC) | ||||||
|  |         ggml_add_cpu_backend_variant(armv8.2_3    DOTPROD FP16_VECTOR_ARITHMETIC SVE) | ||||||
|  |         ggml_add_cpu_backend_variant(armv8.6_1    DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8) | ||||||
|  |         ggml_add_cpu_backend_variant(armv8.6_2    DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2) | ||||||
|  |         ggml_add_cpu_backend_variant(armv9.2_1    DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME) | ||||||
|  |         ggml_add_cpu_backend_variant(armv9.2_2    DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME) | ||||||
|     else() |     else() | ||||||
|         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported on ${GGML_SYSTEM_ARCH}") |         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}") | ||||||
|     endif() |     endif() | ||||||
| elseif (GGML_CPU) | elseif (GGML_CPU) | ||||||
|     ggml_add_cpu_backend_variant_impl("") |     ggml_add_cpu_backend_variant_impl("") | ||||||
|   | |||||||
| @@ -1,3 +1,17 @@ | |||||||
|  | function(ggml_add_cpu_backend_features cpu_name arch) | ||||||
|  |     # The feature detection code is compiled as a separate target so that | ||||||
|  |     # it can be built without the architecture flags | ||||||
|  |     # Since multiple variants of the CPU backend may be included in the same | ||||||
|  |     # build, using set_source_files_properties() to set the arch flags is not possible | ||||||
|  |     set(GGML_CPU_FEATS_NAME ${cpu_name}-feats) | ||||||
|  |     add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp) | ||||||
|  |     target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include) | ||||||
|  |     target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN}) | ||||||
|  |     target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED) | ||||||
|  |     set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||||||
|  |     target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME}) | ||||||
|  | endfunction() | ||||||
|  |  | ||||||
| function(ggml_add_cpu_backend_variant_impl tag_name) | function(ggml_add_cpu_backend_variant_impl tag_name) | ||||||
|     if (tag_name) |     if (tag_name) | ||||||
|         set(GGML_CPU_NAME ggml-cpu-${tag_name}) |         set(GGML_CPU_NAME ggml-cpu-${tag_name}) | ||||||
| @@ -143,6 +157,49 @@ function(ggml_add_cpu_backend_variant_impl tag_name) | |||||||
|             else() |             else() | ||||||
|                 if (GGML_CPU_ARM_ARCH) |                 if (GGML_CPU_ARM_ARCH) | ||||||
|                     list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) |                     list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) | ||||||
|  |                 elseif(GGML_CPU_ALL_VARIANTS) | ||||||
|  |                     if (CMAKE_SYSTEM_NAME MATCHES "Linux") | ||||||
|  |                         # Begin with the lowest baseline | ||||||
|  |                         set(ARM_MCPU "armv8-a") | ||||||
|  |                         set(ARCH_TAGS "") | ||||||
|  |                         set(ARCH_DEFINITIONS "") | ||||||
|  |  | ||||||
|  |                         # When a feature is selected, bump the MCPU to the first | ||||||
|  |                         # version that supported it | ||||||
|  |                         if (GGML_INTERNAL_DOTPROD) | ||||||
|  |                             set(ARM_MCPU "armv8.2-a") | ||||||
|  |                             set(ARCH_TAGS "${ARCH_TAGS}+dotprod") | ||||||
|  |                             list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD) | ||||||
|  |                         endif() | ||||||
|  |                         if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC) | ||||||
|  |                             set(ARM_MCPU "armv8.2-a") | ||||||
|  |                             set(ARCH_TAGS "${ARCH_TAGS}+fp16") | ||||||
|  |                             list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC) | ||||||
|  |                         endif() | ||||||
|  |                         if (GGML_INTERNAL_SVE) | ||||||
|  |                             set(ARM_MCPU "armv8.2-a") | ||||||
|  |                             set(ARCH_TAGS "${ARCH_TAGS}+sve") | ||||||
|  |                             list(APPEND ARCH_DEFINITIONS GGML_USE_SVE) | ||||||
|  |                         endif() | ||||||
|  |                         if (GGML_INTERNAL_MATMUL_INT8) | ||||||
|  |                             set(ARM_MCPU "armv8.6-a") | ||||||
|  |                             set(ARCH_TAGS "${ARCH_TAGS}+i8mm") | ||||||
|  |                             list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8) | ||||||
|  |                         endif() | ||||||
|  |                         if (GGML_INTERNAL_SVE2) | ||||||
|  |                             set(ARM_MCPU "armv8.6-a") | ||||||
|  |                             set(ARCH_TAGS "${ARCH_TAGS}+sve2") | ||||||
|  |                             list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2) | ||||||
|  |                         endif() | ||||||
|  |                         if (GGML_INTERNAL_SME) | ||||||
|  |                             set(ARM_MCPU "armv9.2-a") | ||||||
|  |                             set(ARCH_TAGS "${ARCH_TAGS}+sme") | ||||||
|  |                             list(APPEND ARCH_DEFINITIONS GGML_USE_SME) | ||||||
|  |                         endif() | ||||||
|  |  | ||||||
|  |                         list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}") | ||||||
|  |                         ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS}) | ||||||
|  |                     endif() | ||||||
|                 endif() |                 endif() | ||||||
|             endif() |             endif() | ||||||
|  |  | ||||||
| @@ -306,18 +363,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) | |||||||
|                 # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE |                 # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE | ||||||
|                 message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS") |                 message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS") | ||||||
|             endif() |             endif() | ||||||
|  |             ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS}) | ||||||
|             # The feature detection code is compiled as a separate target so that |  | ||||||
|             # it can be built without the architecture flags |  | ||||||
|             # Since multiple variants of the CPU backend may be included in the same |  | ||||||
|             # build, using set_source_files_properties() to set the arch flags is not possible |  | ||||||
|             set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats) |  | ||||||
|             add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/x86/cpu-feats.cpp) |  | ||||||
|             target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include) |  | ||||||
|             target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS}) |  | ||||||
|             target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED) |  | ||||||
|             set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) |  | ||||||
|             target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME}) |  | ||||||
|         endif() |         endif() | ||||||
|     elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") |     elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") | ||||||
|         message(STATUS "PowerPC detected") |         message(STATUS "PowerPC detected") | ||||||
|   | |||||||
							
								
								
									
										94
									
								
								ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,94 @@ | |||||||
|  | #include "ggml-backend-impl.h" | ||||||
|  |  | ||||||
|  | #if defined(__aarch64__) | ||||||
|  |  | ||||||
|  | #if defined(__linux__) | ||||||
|  | #include <sys/auxv.h> | ||||||
|  | #elif defined(__APPLE__) | ||||||
|  | #include <sys/sysctl.h> | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #if !defined(HWCAP2_I8MM) | ||||||
|  | #define HWCAP2_I8MM (1 << 13) | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #if !defined(HWCAP2_SME) | ||||||
|  | #define HWCAP2_SME (1 << 23) | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | struct aarch64_features { | ||||||
|  |     // has_neon not needed, aarch64 has NEON guaranteed | ||||||
|  |     bool has_dotprod     = false; | ||||||
|  |     bool has_fp16_va     = false; | ||||||
|  |     bool has_sve         = false; | ||||||
|  |     bool has_sve2        = false; | ||||||
|  |     bool has_i8mm        = false; | ||||||
|  |     bool has_sme         = false; | ||||||
|  |  | ||||||
|  |     aarch64_features() { | ||||||
|  | #if defined(__linux__) | ||||||
|  |         uint32_t hwcap = getauxval(AT_HWCAP); | ||||||
|  |         uint32_t hwcap2 = getauxval(AT_HWCAP2); | ||||||
|  |  | ||||||
|  |         has_dotprod = !!(hwcap & HWCAP_ASIMDDP); | ||||||
|  |         has_fp16_va = !!(hwcap & HWCAP_FPHP); | ||||||
|  |         has_sve     = !!(hwcap & HWCAP_SVE); | ||||||
|  |         has_sve2    = !!(hwcap2 & HWCAP2_SVE2); | ||||||
|  |         has_i8mm    = !!(hwcap2 & HWCAP2_I8MM); | ||||||
|  |         has_sme     = !!(hwcap2 & HWCAP2_SME); | ||||||
|  | #elif defined(__APPLE__) | ||||||
|  |         int oldp = 0; | ||||||
|  |         size_t size = sizeof(oldp); | ||||||
|  |  | ||||||
|  |         if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) { | ||||||
|  |             has_dotprod = static_cast<bool>(oldp); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) { | ||||||
|  |             has_i8mm = static_cast<bool>(oldp); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) { | ||||||
|  |             has_sme = static_cast<bool>(oldp); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Apple apparently does not implement SVE yet | ||||||
|  | #endif | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | static int ggml_backend_cpu_aarch64_score() { | ||||||
|  |     int score = 1; | ||||||
|  |     aarch64_features af; | ||||||
|  |  | ||||||
|  | #ifdef GGML_USE_DOTPROD | ||||||
|  |     if (!af.has_dotprod) { return 0; } | ||||||
|  |     score += 1<<1; | ||||||
|  | #endif | ||||||
|  | #ifdef GGML_USE_FP16_VECTOR_ARITHMETIC | ||||||
|  |     if (!af.has_fp16_va) { return 0; } | ||||||
|  |     score += 1<<2; | ||||||
|  | #endif | ||||||
|  | #ifdef GGML_USE_SVE | ||||||
|  |     if (!af.has_sve) { return 0; } | ||||||
|  |     score += 1<<3; | ||||||
|  | #endif | ||||||
|  | #ifdef GGML_USE_MATMUL_INT8 | ||||||
|  |     if (!af.has_i8mm) { return 0; } | ||||||
|  |     score += 1<<4; | ||||||
|  | #endif | ||||||
|  | #ifdef GGML_USE_SVE2 | ||||||
|  |     if (!af.has_sve2) { return 0; } | ||||||
|  |     score += 1<<5; | ||||||
|  | #endif | ||||||
|  | #ifdef GGML_USE_SME | ||||||
|  |     if (!af.has_sme) { return 0; } | ||||||
|  |     score += 1<<6; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |     return score; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_aarch64_score) | ||||||
|  |  | ||||||
|  | # endif // defined(__aarch64__) | ||||||
		Reference in New Issue
	
	Block a user