mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	Implement GGML_CPU_ALL_VARIANTS for PowerPC (#14286)
* Add PowerPC feature detection and scoring * ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for PowerPC * ggml-cpu: Delay some initializations until function is called When using GGML_BACKEND_DL=ON, these initializations might use instructions that are not supported by the current CPU. --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
		 Christian Kastner
					Christian Kastner
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							88fc854b4b
						
					
				
				
					commit
					6369be0735
				
			| @@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name) | ||||
|         foreach (feat ${ARGN}) | ||||
|             set(GGML_INTERNAL_${feat} ON) | ||||
|         endforeach() | ||||
|     elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") | ||||
|         foreach (feat ${ARGN}) | ||||
|             set(GGML_INTERNAL_${feat} ON) | ||||
|         endforeach() | ||||
|     endif() | ||||
|  | ||||
|     ggml_add_cpu_backend_variant_impl(${tag_name}) | ||||
| @@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS) | ||||
|         else() | ||||
|             message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}") | ||||
|         endif() | ||||
|     elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") | ||||
|         if (CMAKE_SYSTEM_NAME MATCHES "Linux") | ||||
|             ggml_add_cpu_backend_variant(power0) | ||||
|             ggml_add_cpu_backend_variant(power7_1       POWER7) | ||||
|             ggml_add_cpu_backend_variant(power7_2       POWER7  VSX) | ||||
|             ggml_add_cpu_backend_variant(power8_1       POWER8) | ||||
|             ggml_add_cpu_backend_variant(power8_2       POWER8  VSX) | ||||
|             ggml_add_cpu_backend_variant(power9         POWER9  VSX) | ||||
|             ggml_add_cpu_backend_variant(power10        POWER10 VSX) | ||||
|             ggml_add_cpu_backend_variant(power11        POWER11 VSX) | ||||
|         else() | ||||
|             message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}") | ||||
|         endif() | ||||
|     else() | ||||
|         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}") | ||||
|     endif() | ||||
|   | ||||
| @@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name) | ||||
|             else() | ||||
|                 list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64) | ||||
|             endif() | ||||
|         elseif(GGML_CPU_ALL_VARIANTS) | ||||
|             # Begin with the lowest baseline | ||||
|             set(ARCH_DEFINITIONS "") | ||||
|  | ||||
|             # When a feature is selected, bump the MCPU to the first | ||||
|             # version that supported it | ||||
|             foreach(PVER RANGE 7 11) | ||||
|                 if(DEFINED GGML_INTERNAL_POWER${PVER}) | ||||
|                     set(POWERPC_MCPU "power${PVER}") | ||||
|                     list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER}) | ||||
|                 endif() | ||||
|             endforeach() | ||||
|             if (GGML_INTERNAL_VSX) | ||||
|                 list(APPEND ARCH_DEFINITIONS GGML_USE_VSX) | ||||
|                 list(APPEND ARCH_FLAGS -mvsx) | ||||
|             endif() | ||||
|  | ||||
|             if (DEFINED POWERPC_MCPU) | ||||
|                 list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU}) | ||||
|             endif() | ||||
|             ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS}) | ||||
|         else() | ||||
|             if (GGML_CPU_POWERPC_CPUTYPE) | ||||
|                 list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE}) | ||||
|   | ||||
							
								
								
									
										82
									
								
								ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| # include "ggml-backend-impl.h" | ||||
|  | ||||
| #if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) | ||||
|  | ||||
| #if defined(__linux__) | ||||
| #include <sys/auxv.h> | ||||
| #endif | ||||
|  | ||||
| #include <string> | ||||
|  | ||||
| struct powerpc_features { | ||||
|     std::string platform = ""; | ||||
|     int power_version    = -1; | ||||
|  | ||||
|     bool has_vsx         = false; | ||||
|  | ||||
|     powerpc_features() { | ||||
| #if defined(__linux__) | ||||
|         unsigned long auxval = getauxval(AT_PLATFORM); | ||||
|         if (auxval) { | ||||
|             platform = std::string(reinterpret_cast<const char*>(auxval)); | ||||
|             // TBD: Do systems exist that return this in uppercase? | ||||
|             if (platform.substr(0, 5) == "power") { | ||||
|                 // Extractt a numeric suffix, if one exists | ||||
|                 int vpos = -1; | ||||
|                 for (int i = platform.length() - 1; i >= 0; i--) { | ||||
|                     if (std::isdigit(platform[i])) { | ||||
|                         vpos = i; | ||||
|                     } else { | ||||
|                         break; | ||||
|                     } | ||||
|                 } | ||||
|                 if (vpos > -1) { | ||||
|                     power_version = std::stoi(platform.substr(vpos)); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| #endif | ||||
|         if (power_version >= 9) { | ||||
|             has_vsx = true; | ||||
|         } | ||||
|     } | ||||
| }; | ||||
|  | ||||
| static int ggml_backend_cpu_powerpc_score() { | ||||
|     int score = 1; | ||||
|     powerpc_features pf; | ||||
|  | ||||
| // Platform scores | ||||
| #if defined(GGML_USE_POWER7) | ||||
|     if (pf.power_version < 7) { return 0; } | ||||
|     score += 1<<1; | ||||
| #endif | ||||
| #if defined(GGML_USE_POWER8) | ||||
|     if (pf.power_version < 8) { return 0; } | ||||
|     score += 1<<2; | ||||
| #endif | ||||
| #if defined(GGML_USE_POWER9) | ||||
|     if (pf.power_version < 9) { return 0; } | ||||
|     score += 1<<3; | ||||
| #endif | ||||
| #if defined(GGML_USE_POWER10) | ||||
|     if (pf.power_version < 10) { return 0; } | ||||
|     score += 1<<4; | ||||
| #endif | ||||
| #if defined(GGML_USE_POWER11) | ||||
|     if (pf.power_version < 11) { return 0; } | ||||
|     score += 1<<5; | ||||
| #endif | ||||
|  | ||||
| // Feature scores | ||||
| #if defined(GGML_USE_VSX) | ||||
|     if (!pf.has_vsx) { return 0; } | ||||
|     score += 1<<6; | ||||
| #endif | ||||
|  | ||||
|     return score; | ||||
| } | ||||
|  | ||||
| GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score) | ||||
|  | ||||
| #endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) | ||||
| @@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR | ||||
|     } | ||||
| }; | ||||
|  | ||||
| // instance for Q4 | ||||
| static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0; | ||||
| static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0; | ||||
| static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0; | ||||
| static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K; | ||||
|  | ||||
| // instance for IQ4 | ||||
| static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0; | ||||
|  | ||||
| }  // namespace ggml::cpu::repack | ||||
|  | ||||
| static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) { | ||||
|  | ||||
|     // instance for Q4 | ||||
|     static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0; | ||||
|     static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0; | ||||
|     static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0; | ||||
|     static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K; | ||||
|  | ||||
|     // instance for IQ4 | ||||
|     static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0; | ||||
|  | ||||
|     if (cur->type == GGML_TYPE_Q4_0) { | ||||
|         if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) { | ||||
|             if (cur->ne[1] % 8 == 0) { | ||||
|                 return &ggml::cpu::repack::q4_0_8x8_q8_0; | ||||
|                 return &q4_0_8x8_q8_0; | ||||
|             } | ||||
|         } | ||||
|         if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) { | ||||
|             if (cur->ne[1] % 4 == 0) { | ||||
|                 return &ggml::cpu::repack::q4_0_4x8_q8_0; | ||||
|                 return &q4_0_4x8_q8_0; | ||||
|             } | ||||
|         } | ||||
|         if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { | ||||
|             if (cur->ne[1] % 4 == 0) { | ||||
|                 return &ggml::cpu::repack::q4_0_4x4_q8_0; | ||||
|                 return &q4_0_4x4_q8_0; | ||||
|             } | ||||
|         } | ||||
|     } else if (cur->type == GGML_TYPE_Q4_K) { | ||||
|         if (ggml_cpu_has_avx2()) { | ||||
|             if (cur->ne[1] % 8 == 0) { | ||||
|                 return &ggml::cpu::repack::q4_K_8x8_q8_K; | ||||
|                 return &q4_K_8x8_q8_K; | ||||
|             } | ||||
|         } | ||||
|     } else if (cur->type == GGML_TYPE_IQ4_NL) { | ||||
|         if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { | ||||
|             if (cur->ne[1] % 4 == 0) { | ||||
|                 return &ggml::cpu::repack::iq4_nl_4x4_q8_0; | ||||
|                 return &iq4_nl_4x4_q8_0; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user