diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..b84dc1de8 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,126 @@ +#---------------------------------------------------------------------------------------- +# INFO: +# - How to use: change the flags in the 'Set one of these ON and all others OFF' section +# - CUDA: it will use automatically the CUDA SDK version installed +# +#---------------------------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.8) +project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) +if(NOT MSVC) + message(FATAL_ERROR "This CMake file only works with MSVC.") +endif(NOT MSVC) + +#--------- Set one of these ON and all others OFF -------------------> +option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) +option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) +option(LLAMA_CUDA "CUDA (no AVX)" OFF) +#etc... add other setups +#<--------- Set one of these ON and all others OFF ------------------- + +# --------------- Don't change below this line ----------------------- + +# Variable Settings +if(LLAMA_CUDA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(LLAMA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" OFF) +elseif(LLAMA_CUDA) + option(LLAMA_AVX "llama: enable AVX" OFF) + option(LLAMA_AVX2 "llama: enable AVX2" OFF) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(OTHER_SETUPS) + #etc... +endif() + +# Fixed Settings +# general +option(BUILD_SHARED_LIBS "build shared libraries" ON) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" OFF) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" OFF) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" OFF) +option(LLAMA_MPI "llama: use MPI" OFF) +option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) + +option(LLAMA_BUILD_TESTS "llama: build tests" OFF) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) + +# add perf arguments +option(LLAMA_PERF "llama: enable perf" OFF) + +include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) + +include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) + +include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) + +include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) + +include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) + +include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) + +include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) + +add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in new file mode 100644 index 000000000..b84dc1de8 --- /dev/null +++ b/LLama.GenerateSolution/CMakeLists.txt.in @@ -0,0 +1,126 @@ +#---------------------------------------------------------------------------------------- +# INFO: +# - How to use: change the flags in the 'Set one of these ON and all others OFF' section +# - CUDA: it will use automatically the CUDA SDK version installed +# +#---------------------------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.8) +project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) +if(NOT MSVC) + message(FATAL_ERROR "This CMake file only works with MSVC.") +endif(NOT MSVC) + +#--------- Set one of these ON and all others OFF -------------------> +option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) +option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) +option(LLAMA_CUDA "CUDA (no AVX)" OFF) +#etc... add other setups +#<--------- Set one of these ON and all others OFF ------------------- + +# --------------- Don't change below this line ----------------------- + +# Variable Settings +if(LLAMA_CUDA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(LLAMA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" OFF) +elseif(LLAMA_CUDA) + option(LLAMA_AVX "llama: enable AVX" OFF) + option(LLAMA_AVX2 "llama: enable AVX2" OFF) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(OTHER_SETUPS) + #etc... +endif() + +# Fixed Settings +# general +option(BUILD_SHARED_LIBS "build shared libraries" ON) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" OFF) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" OFF) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" OFF) +option(LLAMA_MPI "llama: use MPI" OFF) +option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) + +option(LLAMA_BUILD_TESTS "llama: build tests" OFF) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) + +# add perf arguments +option(LLAMA_PERF "llama: enable perf" OFF) + +include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) + +include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) + +include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) + +include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) + +include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) + +include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) + +include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) + +add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj new file mode 100644 index 000000000..f28f91ba6 --- /dev/null +++ b/LLama.GenerateSolution/GenerateSolution.csproj @@ -0,0 +1,14 @@ + + + + Exe + net7.0 + enable + enable + + + + + + + diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln new file mode 100644 index 000000000..74c9e8e10 --- /dev/null +++ b/LLama.GenerateSolution/GenerateSolution.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.8.34525.116 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564} + EndGlobalSection +EndGlobal diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs new file mode 100644 index 000000000..ebf082b14 --- /dev/null +++ b/LLama.GenerateSolution/Program.cs @@ -0,0 +1,137 @@ +using Spectre.Console; +using System; +using System.Diagnostics; +using System.Text; +using static System.Runtime.InteropServices.JavaScript.JSType; + +namespace GenerateSolution +{ + internal class Program + { + static void Main(string[] args) + { + System.Console.InputEncoding = Encoding.Unicode; + System.Console.OutputEncoding = Encoding.Unicode; + + // Check if we can accept key strokes + if (!AnsiConsole.Profile.Capabilities.Interactive) + { + AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]"); + return; + } + + var options = AskOptions(); + var cmakePath = AskCMakePath(); + if(string.IsNullOrEmpty(cmakePath) == true) + { + cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe"; + } + AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath); + + string cmakeListsPath = @"..\..\..\..\CMakeLists.txt"; + + //cmake [] -B [-S ] + //TODO: get the chosen arguments from above (hardcoded values below) + //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options + cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\"; + + ProcessStartInfo startInfo = new ProcessStartInfo + { + FileName = cmakePath, + Arguments = cmakeListsPath, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + }; + + try + { + bool bSuccess = false; + string lastError = ""; + AnsiConsole.Progress() + .AutoClear(false) + .Columns(new ProgressColumn[] + { + new TaskDescriptionColumn(), + new SpinnerColumn(Spinner.Known.Ascii), + }) + .Start(ctx => + { + var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate(); + cmakeTask.StartTask(); + using (Process process = new Process()) + { + process.StartInfo = startInfo; + process.Start(); + string output = process.StandardOutput.ReadToEnd(); + lastError = process.StandardError.ReadToEnd(); + process.WaitForExit(); + cmakeTask.StopTask(); + if (process.ExitCode == 0) + { + bSuccess = true; + } + } + }); + + if (bSuccess == true) + { + AnsiConsole.WriteLine("VS solution generated successfully."); + } + else + { + AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}"); + } + } + catch (Exception ex) + { + AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message); + } + + Console.ReadLine(); + } + + public static string AskCMakePath() + { + return AnsiConsole.Prompt( + new TextPrompt("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?") + .AllowEmpty()); + } + + public static List AskOptions() + { + var options = AnsiConsole.Prompt( + new MultiSelectionPrompt() + .PageSize(10) + .Title("Select the preferred [green]options[/]?") + .MoreChoicesText("[grey](Move up and down to reveal more options)[/]") + .InstructionsText("[grey](Press [blue][/] to toggle an option, [green][/] to accept)[/]") + .AddChoiceGroup("Avx", new[] + { + "Avx2", "Avx512" + }) + .AddChoiceGroup("Cuda", new[] + { + "Cuda" + }) + .AddChoices(new[] + { + "x64", + }) + .AddChoiceGroup("Visual Studio", new[] + { + "Visual Studio 16 2019", + "Visual Studio 17 2022" + }) + ); + + if (options.Count > 0) + { + AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options)); + } + + return options; + } + } +}