CMake: add option for CUDA compile threads and update flags

ggml-org · Feb 3, 2025 · 9172f86 · 9172f86
1 parent 5a9dc3e
commit 9172f86
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 1 deletion.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -152,6 +152,7 @@ option(GGML_CUDA_NO_PEER_COPY               "ggml: do not use peer to peer copie
 option(GGML_CUDA_NO_VMM                     "ggml: do not try to use CUDA VMM"                OFF)
 option(GGML_CUDA_FA_ALL_QUANTS              "ggml: compile all quants for FlashAttention"     OFF)
 option(GGML_CUDA_GRAPHS                     "ggml: use CUDA graphs (llama.cpp only)"          ${GGML_CUDA_GRAPHS_DEFAULT})
+option(GGML_CUDA_COMPILE_THREADS            "ggml: CUDA compile threads (0 - auto)"           0)
 
 option(GGML_HIP                             "ggml: use HIP"                                   OFF)
 option(GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental, slow"         OFF)

diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt
@@ -96,7 +96,7 @@ if (CUDAToolkit_FOUND)
 
     set(CUDA_CXX_FLAGS "")
 
-    set(CUDA_FLAGS -use_fast_math --threads=0 --split-compile=0)
+    set(CUDA_FLAGS -use_fast_math --threads=${GGML_CUDA_COMPILE_THREADS} --split-compile=${GGML_CUDA_COMPILE_THREADS})
 
     if (GGML_FATAL_WARNINGS)
         list(APPEND CUDA_FLAGS -Werror all-warnings)