diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 9cb2c228dcfb2..632112d7e5cb4 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -317,9 +317,7 @@ if (GGML_CPU_ALL_VARIANTS) endif() elseif(GGML_SYSTEM_ARCH STREQUAL "ARM") if (CMAKE_SYSTEM_NAME MATCHES "Linux") - # Many of these features are optional so we build versions with popular - # combinations and name the backends based on the version they were - # first released with + # Generic ARM builds ggml_add_cpu_backend_variant(armv8.0_1) ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD) ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC) @@ -328,6 +326,9 @@ if (GGML_CPU_ALL_VARIANTS) ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2) ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME) ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME) + # Builds optimized for specific cores + # neoverse-v2: AWS Graviton4, NVIDIA Grace + ggml_add_cpu_backend_variant(neoverse-v2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2) elseif (CMAKE_SYSTEM_NAME MATCHES "Android") # Android-specific backends with SoC-compatible feature sets ggml_add_cpu_backend_variant(android_armv8.0_1) diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 71b1d67b8d0a6..a7f6a064155ba 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -154,53 +154,58 @@ function(ggml_add_cpu_backend_variant_impl tag_name) check_arm_feature(sme "#include \n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }") list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}") - else() - if (GGML_CPU_ARM_ARCH) - list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) - elseif(GGML_CPU_ALL_VARIANTS) - # Begin with the lowest baseline - set(ARM_MCPU "armv8-a") - set(ARCH_TAGS "") - set(ARCH_DEFINITIONS "") - - # When a feature is selected, bump the MCPU to the first - # version that supported it - if (GGML_INTERNAL_DOTPROD) - set(ARM_MCPU "armv8.2-a") - set(ARCH_TAGS "${ARCH_TAGS}+dotprod") - list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD) - endif() - if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC) - set(ARM_MCPU "armv8.2-a") - set(ARCH_TAGS "${ARCH_TAGS}+fp16") - list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC) - endif() - if (GGML_INTERNAL_SVE) - set(ARM_MCPU "armv8.2-a") - set(ARCH_TAGS "${ARCH_TAGS}+sve") - list(APPEND ARCH_DEFINITIONS GGML_USE_SVE) - endif() - if (GGML_INTERNAL_MATMUL_INT8) - set(ARM_MCPU "armv8.6-a") - set(ARCH_TAGS "${ARCH_TAGS}+i8mm") - list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8) - endif() - if (GGML_INTERNAL_SVE2) - set(ARM_MCPU "armv8.6-a") - set(ARCH_TAGS "${ARCH_TAGS}+sve2") - list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2) - endif() - if (GGML_INTERNAL_NOSVE) - set(ARCH_TAGS "${ARCH_TAGS}+nosve") - endif() - if (GGML_INTERNAL_SME) - set(ARM_MCPU "armv9.2-a") - set(ARCH_TAGS "${ARCH_TAGS}+sme") - list(APPEND ARCH_DEFINITIONS GGML_USE_SME) - endif() - list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}") - ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS}) + elseif (GGML_CPU_ARM_ARCH) + list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) + elseif(GGML_CPU_ALL_VARIANTS) + # For the generic builds, begin with the lowest supported baseline + set(ARM_GENERIC_ARCH "armv8-a") + set(ARCH_TAGS "") + set(ARCH_DEFINITIONS "") + + # When a feature is selected, bump GENERIC_ARCH to the earliest + # version which supported that feature + if (GGML_INTERNAL_DOTPROD) + set(ARM_GENERIC_ARCH "armv8.2-a") + set(ARCH_TAGS "${ARCH_TAGS}+dotprod") + list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD) + endif() + if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC) + set(ARM_GENERIC_ARCH "armv8.2-a") + set(ARCH_TAGS "${ARCH_TAGS}+fp16") + list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC) + endif() + if (GGML_INTERNAL_SVE) + set(ARM_GENERIC_ARCH "armv8.2-a") + set(ARCH_TAGS "${ARCH_TAGS}+sve") + list(APPEND ARCH_DEFINITIONS GGML_USE_SVE) + endif() + if (GGML_INTERNAL_MATMUL_INT8) + set(ARM_GENERIC_ARCH "armv8.6-a") + set(ARCH_TAGS "${ARCH_TAGS}+i8mm") + list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8) + endif() + if (GGML_INTERNAL_SVE2) + set(ARM_MCPU "armv8.6-a") + set(ARCH_TAGS "${ARCH_TAGS}+sve2") + list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2) + endif() + if (GGML_INTERNAL_NOSVE) + set(ARCH_TAGS "${ARCH_TAGS}+nosve") + endif() + if (GGML_INTERNAL_SME) + set(ARM_MCPU "armv9.2-a") + set(ARCH_TAGS "${ARCH_TAGS}+sme") + list(APPEND ARCH_DEFINITIONS GGML_USE_SME) + endif() + + # CPU targeted builds first, else do the generic build + if (${tag_name} STREQUAL "neoverse-v2") + list(APPEND ARCH_FLAGS "-mcpu=neoverse-v2${ARCH_TAGS}") + list(APPEND ARCH_DEFINITIONS GGML_ARM_MCPU=NEOVERSE_V2) + else() + list(APPEND ARCH_FLAGS "-march=${ARM_GENERIC_ARCH}${ARCH_TAGS}") endif() + ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS}) endif() # show enabled features diff --git a/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp b/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp index 67369147ce851..fd47621ff113f 100644 --- a/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +++ b/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp @@ -4,6 +4,9 @@ #if defined(__linux__) #include + +#include +#include #elif defined(__APPLE__) #include #endif @@ -17,6 +20,7 @@ #endif struct aarch64_features { + int cpu_part = -1; // has_neon not needed, aarch64 has NEON guaranteed bool has_dotprod = false; bool has_fp16_va = false; @@ -36,6 +40,17 @@ struct aarch64_features { has_sve2 = !!(hwcap2 & HWCAP2_SVE2); has_i8mm = !!(hwcap2 & HWCAP2_I8MM); has_sme = !!(hwcap2 & HWCAP2_SME); + + std::ifstream cpuinfo("/proc/cpuinfo"); + std::string line; + while (std::getline(cpuinfo, line)) { + if (line.find("CPU part") == 0) { + // Parse the hex number after the colon + cpu_part = std::stoi(line.substr(line.find(':') + 1), nullptr, 16); + break; + } + } + cpuinfo.close(); #elif defined(__APPLE__) int oldp = 0; size_t size = sizeof(oldp); @@ -61,29 +76,38 @@ static int ggml_backend_cpu_aarch64_score() { int score = 1; aarch64_features af; + // Bits 2-8 are used to rank backends by architecture or core when they + // otherwise have identical features. +#if defined(GGML_ARM_MCPU) && GGML_ARM_MCPU == NEOVERSE_V2 + if (af.cpu_part == 0xd4f) { + score += 1<<5; + } +#endif + + // Bits 9+: Features always trump architecture or core. #ifdef GGML_USE_DOTPROD if (!af.has_dotprod) { return 0; } - score += 1<<1; + score += 1<<8; #endif #ifdef GGML_USE_FP16_VECTOR_ARITHMETIC if (!af.has_fp16_va) { return 0; } - score += 1<<2; + score += 1<<9; #endif #ifdef GGML_USE_SVE if (!af.has_sve) { return 0; } - score += 1<<3; + score += 1<<10; #endif #ifdef GGML_USE_MATMUL_INT8 if (!af.has_i8mm) { return 0; } - score += 1<<4; + score += 1<<11; #endif #ifdef GGML_USE_SVE2 if (!af.has_sve2) { return 0; } - score += 1<<5; + score += 1<<12; #endif #ifdef GGML_USE_SME if (!af.has_sme) { return 0; } - score += 1<<6; + score += 1<<13; #endif return score;