mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml : fix arm build (#10890)
* ggml: GGML_NATIVE uses -mcpu=native on ARM Signed-off-by: Adrien Gallouët <angt@huggingface.co> * ggml: Show detected features with GGML_NATIVE Signed-off-by: Adrien Gallouët <angt@huggingface.co> * remove msvc support, add GGML_CPU_ARM_ARCH option * disable llamafile in android example * march -> mcpu, skip adding feature macros ggml-ci --------- Signed-off-by: Adrien Gallouët <angt@huggingface.co> Co-authored-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
		| @@ -19,6 +19,7 @@ android { | ||||
|         externalNativeBuild { | ||||
|             cmake { | ||||
|                 arguments += "-DLLAMA_BUILD_COMMON=ON" | ||||
|                 arguments += "-DGGML_LLAMAFILE=OFF" | ||||
|                 arguments += "-DCMAKE_BUILD_TYPE=Release" | ||||
|                 cppFlags += listOf() | ||||
|                 arguments += listOf() | ||||
|   | ||||
| @@ -74,10 +74,10 @@ if (NOT GGML_CUDA_GRAPHS_DEFAULT) | ||||
| endif() | ||||
|  | ||||
| # general | ||||
| option(GGML_STATIC "ggml: static link libraries"         OFF) | ||||
| option(GGML_NATIVE "ggml: enable -march=native flag"     ${GGML_NATIVE_DEFAULT}) | ||||
| option(GGML_LTO    "ggml: enable link time optimization" OFF) | ||||
| option(GGML_CCACHE "ggml: use ccache if available"       ON) | ||||
| option(GGML_STATIC "ggml: static link libraries"                     OFF) | ||||
| option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT}) | ||||
| option(GGML_LTO    "ggml: enable link time optimization"             OFF) | ||||
| option(GGML_CCACHE "ggml: use ccache if available"                   ON) | ||||
|  | ||||
| # debug | ||||
| option(GGML_ALL_WARNINGS           "ggml: enable all compiler warnings"                   ON) | ||||
| @@ -120,8 +120,9 @@ endif() | ||||
| option(GGML_LASX             "ggml: enable lasx"             ON) | ||||
| option(GGML_LSX              "ggml: enable lsx"              ON) | ||||
| option(GGML_RVV              "ggml: enable rvv"              ON) | ||||
| option(GGML_SVE              "ggml: enable SVE"              OFF) | ||||
|  | ||||
| option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF) | ||||
| set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM") | ||||
|  | ||||
|  | ||||
| if (WIN32) | ||||
|   | ||||
| @@ -74,112 +74,77 @@ function(ggml_add_cpu_backend_variant_impl tag_name) | ||||
|  | ||||
|     if (CMAKE_OSX_ARCHITECTURES      STREQUAL "arm64" OR | ||||
|         CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR | ||||
|         (NOT CMAKE_OSX_ARCHITECTURES      AND | ||||
|         NOT CMAKE_GENERATOR_PLATFORM_LWR AND | ||||
|         (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND | ||||
|             CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$")) | ||||
|  | ||||
|         message(STATUS "ARM detected") | ||||
|  | ||||
|         if (MSVC) | ||||
|             list(APPEND ARCH_DEFINITIONS __aarch64__) # MSVC defines _M_ARM64 instead | ||||
|             list(APPEND ARCH_DEFINITIONS __ARM_NEON) | ||||
|             list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_FMA) | ||||
|  | ||||
|             set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS}) | ||||
|             string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2") | ||||
|  | ||||
|             check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD) | ||||
|             if (GGML_COMPILER_SUPPORT_DOTPROD) | ||||
|                 list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD) | ||||
|  | ||||
|                 message(STATUS "ARM feature DOTPROD enabled") | ||||
|             endif () | ||||
|  | ||||
|             check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8) | ||||
|  | ||||
|             if (GGML_COMPILER_SUPPORT_MATMUL_INT8) | ||||
|                 list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8) | ||||
|  | ||||
|                 message(STATUS "ARM feature MATMUL_INT8 enabled") | ||||
|             endif () | ||||
|  | ||||
|             check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC) | ||||
|             if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC) | ||||
|                 list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_FP16_VECTOR_ARITHMETIC) | ||||
|  | ||||
|                 message(STATUS "ARM feature FP16_VECTOR_ARITHMETIC enabled") | ||||
|             endif () | ||||
|  | ||||
|             set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV}) | ||||
|         elseif (APPLE) | ||||
|             if (GGML_NATIVE) | ||||
|                 set(USER_PROVIDED_MARCH FALSE) | ||||
|                 foreach(flag_var IN ITEMS CMAKE_C_FLAGS CMAKE_CXX_FLAGS CMAKE_REQUIRED_FLAGS) | ||||
|                     if ("${${flag_var}}" MATCHES "-march=[a-zA-Z0-9+._-]+") | ||||
|                         set(USER_PROVIDED_MARCH TRUE) | ||||
|                         break() | ||||
|                     endif() | ||||
|                 endforeach() | ||||
|  | ||||
|                 if (NOT USER_PROVIDED_MARCH) | ||||
|                     set(MARCH_FLAGS "-march=armv8.2a") | ||||
|  | ||||
|                     check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD) | ||||
|                     if (GGML_COMPILER_SUPPORT_DOTPROD) | ||||
|                         set(MARCH_FLAGS "${MARCH_FLAGS}+dotprod") | ||||
|                         list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD) | ||||
|  | ||||
|                         message(STATUS "ARM feature DOTPROD enabled") | ||||
|                     endif () | ||||
|  | ||||
|                     set(TEST_I8MM_FLAGS "-march=armv8.2a+i8mm") | ||||
|  | ||||
|                     set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) | ||||
|                     set(CMAKE_REQUIRED_FLAGS     "${CMAKE_REQUIRED_FLAGS} ${TEST_I8MM_FLAGS}") | ||||
|  | ||||
|                     check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8) | ||||
|                     if (GGML_COMPILER_SUPPORT_MATMUL_INT8) | ||||
|                         set(MARCH_FLAGS "${MARCH_FLAGS}+i8mm") | ||||
|                         list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8) | ||||
|  | ||||
|                         message(STATUS "ARM feature MATMUL_INT8 enabled") | ||||
|                     endif () | ||||
|  | ||||
|                     set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) | ||||
|  | ||||
|                     list(APPEND ARCH_FLAGS "${MARCH_FLAGS}") | ||||
|                 endif () | ||||
|             endif () | ||||
|         if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||||
|             message(FATAL_ERROR "MSVC is not supported for ARM, use clang") | ||||
|         else() | ||||
|             check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E) | ||||
|             if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") | ||||
|                 list(APPEND ARCH_FLAGS -mfp16-format=ieee) | ||||
|             endif() | ||||
|             if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") | ||||
|                 # Raspberry Pi 1, Zero | ||||
|                 list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access) | ||||
|             endif() | ||||
|             if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") | ||||
|                 if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android") | ||||
|                     # Android armeabi-v7a | ||||
|                     list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations) | ||||
|                 else() | ||||
|                     # Raspberry Pi 2 | ||||
|                     list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations) | ||||
|  | ||||
|             if (GGML_NATIVE) | ||||
|                 list(APPEND ARCH_FLAGS -mcpu=native) | ||||
|  | ||||
|                 set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) | ||||
|  | ||||
|                 # -mcpu=native does not always enable all the features in some compilers, | ||||
|                 # so we check for them manually and enable them if available | ||||
|  | ||||
|                 include(CheckCXXSourceRuns) | ||||
|  | ||||
|                 set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}+dotprod") | ||||
|                 check_cxx_source_runs( | ||||
|                     "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" | ||||
|                     GGML_COMPILER_SUPPORT_DOTPROD) | ||||
|                 if (GGML_COMPILER_SUPPORT_DOTPROD) | ||||
|                     set(ARCH_FLAGS "${ARCH_FLAGS}+dotprod") | ||||
|                 endif() | ||||
|  | ||||
|                 set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}+i8mm") | ||||
|                 check_cxx_source_runs( | ||||
|                     "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" | ||||
|                     GGML_COMPILER_SUPPORT_I8MM) | ||||
|                 if (GGML_COMPILER_SUPPORT_I8MM) | ||||
|                     set(ARCH_FLAGS "${ARCH_FLAGS}+i8mm") | ||||
|                 endif() | ||||
|  | ||||
|                 set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) | ||||
|  | ||||
|             else() | ||||
|                 if (GGML_CPU_ARM_ARCH) | ||||
|                     list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) | ||||
|                 endif() | ||||
|             endif() | ||||
|             if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") | ||||
|                 # Android arm64-v8a | ||||
|                 # Raspberry Pi 3, 4, Zero 2 (32-bit) | ||||
|                 list(APPEND ARCH_FLAGS -mno-unaligned-access) | ||||
|             endif() | ||||
|             if (GGML_SVE) | ||||
|                 list(APPEND ARCH_FLAGS -march=armv8.6-a+sve) | ||||
|  | ||||
|             # show enabled features | ||||
|             execute_process( | ||||
|                 COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E - | ||||
|                 INPUT_FILE "/dev/null" | ||||
|                 OUTPUT_VARIABLE ARM_FEATURE | ||||
|                 RESULT_VARIABLE ARM_FEATURE_RESULT | ||||
|             ) | ||||
|             if (ARM_FEATURE_RESULT) | ||||
|                 message(FATAL_ERROR "Failed to get ARM features") | ||||
|             else() | ||||
|                 foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC) | ||||
|                     string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos) | ||||
|                     if (NOT ${feature_pos} EQUAL -1) | ||||
|                         message(STATUS "ARM feature ${feature} enabled") | ||||
|                     endif() | ||||
|                 endforeach() | ||||
|             endif() | ||||
|         endif() | ||||
|     elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR | ||||
|             (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND | ||||
|             CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$")) | ||||
|  | ||||
|         message(STATUS "x86 detected") | ||||
|  | ||||
|         if (MSVC) | ||||
|             # instruction set detection for MSVC only | ||||
|             if (GGML_NATIVE) | ||||
|   | ||||
| @@ -522,6 +522,12 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r | ||||
|         if (ggml_cpu_has_sve()) { | ||||
|             features.push_back({ "SVE", "1" }); | ||||
|         } | ||||
|         if (ggml_cpu_has_dotprod()) { | ||||
|             features.push_back({ "DOTPROD", "1" }); | ||||
|         } | ||||
|         if (ggml_cpu_has_matmul_int8()) { | ||||
|             features.push_back({ "MATMUL_INT8", "1" }); | ||||
|         } | ||||
|         if (ggml_cpu_get_sve_cnt() > 0) { | ||||
|             static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt()); | ||||
|             features.push_back({ "SVE_CNT", sve_cnt.c_str() }); | ||||
|   | ||||
| @@ -204,6 +204,7 @@ template <> inline float32x4_t load(const float *p) { | ||||
|     return vld1q_f32(p); | ||||
| } | ||||
| #if !defined(_MSC_VER) | ||||
| // FIXME: this should check for __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | ||||
| template <> inline float16x8_t load(const ggml_fp16_t *p) { | ||||
|     return vld1q_f16((const float16_t *)p); | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Diego Devesa
					Diego Devesa