Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync : llama.cpp #1089

Merged
merged 38 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
28ac740
SYCL: Add gated linear attention kernel (llama/11175)
qnixsynapse Jan 15, 2025
904a095
RoPE: fix back, CUDA support for back + noncont. (llama/11240)
JohannesGaessler Jan 15, 2025
ba41673
fix: ggml: fix vulkan-shaders-gen build (llama/10448)
sparkleholic Jan 15, 2025
0cab8e0
vulkan: scale caching for k quants + misc fixes (llama/11081)
netrunnereve Jan 15, 2025
e8ed706
ggml: aarch64: implement SVE kernels for q4_K_q8_K vector dot (llama/…
fj-y-saito Jan 16, 2025
9b5d224
CUDA: backwards pass for misc. ops, add tests (llama/11257)
JohannesGaessler Jan 16, 2025
934f7ec
vulkan: optimize coopmat2 q2_k dequant function (llama/11130)
jeffbolznv Jan 16, 2025
be82ddf
vulkan: optimize coopmat2 q4_k/q5_k dequant functions. (llama/11206)
jeffbolznv Jan 16, 2025
ba0e978
vulkan: support copy from f32 to q4_0/q4_1/q5_0/q5_1/q8_0/iq4_nl (lla…
jeffbolznv Jan 16, 2025
72a5ae0
rpc : early register backend devices (llama/11262)
rgerganov Jan 17, 2025
782151b
vulkan: fix coopmat2 flash attention for non-contiguous inputs (llama…
jeffbolznv Jan 18, 2025
dc90506
cmake : add sanitizer flags for llama.cpp (llama/11279)
ggerganov Jan 18, 2025
fbaffa1
SYCL: Introducing memory host pool (llama/11251)
s-Nick Jan 19, 2025
3cf35e1
vulkan: fix coopmat2 validation failures (llama/11284)
jeffbolznv Jan 20, 2025
4ec5cf7
metal : fix out-of-bounds write (llama/11314)
ggerganov Jan 21, 2025
54c1e2f
rpc : better caching of the base buffer pointer (llama/11331)
rgerganov Jan 21, 2025
d578c62
vulkan: fix diag_mask_inf (llama/11323)
jeffbolznv Jan 23, 2025
4bec73f
vulkan: sort shaders for more deterministic binary (llama/11315)
jeffbolznv Jan 23, 2025
05a0c1d
Vulkan-run-test: fix mmq_wg_denoms (llama/11343)
AMD-dwang Jan 23, 2025
027cb54
tests: fix some mul_mat test gaps (llama/11375)
jeffbolznv Jan 23, 2025
5eb4acd
cmake : avoid -march=native when reproducible build is wanted (llama/…
bmwiedemann Jan 24, 2025
cd3261d
CPU/CUDA: fix (GQA) mul mat back, add CUDA support (llama/11380)
JohannesGaessler Jan 24, 2025
8c59363
rocBLAS: Avoid fp32->fp16->fp32 conversion on cdna (llama/11356)
IMbackK Jan 24, 2025
7957144
CUDA: fix FP16 cuBLAS GEMM (llama/11396)
JohannesGaessler Jan 24, 2025
c8e6c31
hip : Add hipGraph and VMM support to ROCM (llama/11362)
IMbackK Jan 24, 2025
2a0f12c
Hip: disable VMM on hip as it seams that it dosent work in some confi…
IMbackK Jan 25, 2025
d886b3b
vulkan: compile shaders on-demand (llama/11406)
jeffbolznv Jan 25, 2025
0e85d87
cmake: add ggml find package (llama/11369)
bandoti Jan 26, 2025
a756ee5
metal : use residency sets (llama/11427)
ggerganov Jan 26, 2025
bc64584
metal: Handle null returned from MTLCreateSystemDefaultDevice() (llam…
booxter Jan 27, 2025
ec4d3e7
AMD: parse the architecture as supplied by gcnArchName (llama/11244)
Haus1 Jan 27, 2025
2aa6088
SYCL : SOFTMAX F16 mask support and other fixes (llama/11261)
qnixsynapse Jan 28, 2025
8ac9155
cmake : don't fail on `GGML_CPU=OFF` (llama/11457)
someone13574 Jan 28, 2025
807b5f2
HIP: Only call rocblas_initialize on rocblas versions with the multip…
sARY77 Jan 28, 2025
df283f7
HIP: Supress transformation warning in softmax.cu
IMbackK Jan 28, 2025
5bcbe65
sync : llama.cpp
ggerganov Jan 29, 2025
5ce4ce2
scripts : sync cmake
ggerganov Jan 29, 2025
41ae935
cmake : sync new file
ggerganov Jan 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 78 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ else()
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
endif()

if (CMAKE_CROSSCOMPILING)
if (CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH})
message(STATUS "Setting GGML_NATIVE_DEFAULT to OFF")
set(GGML_NATIVE_DEFAULT OFF)
else()
set(GGML_NATIVE_DEFAULT ON)
Expand Down Expand Up @@ -153,6 +154,8 @@ option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashA
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})

option(GGML_HIP "ggml: use HIP" OFF)
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
option(GGML_VULKAN "ggml: use Vulkan" OFF)
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
Expand Down Expand Up @@ -185,6 +188,9 @@ option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increas
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)

# toolchain for vulkan-shaders-gen
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")

# extra artifacts
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
Expand Down Expand Up @@ -261,3 +267,74 @@ if (GGML_STANDALONE)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
DESTINATION share/pkgconfig)
endif()

#
# Create CMake package
#

# Generate version info based on git commit.

find_program(GIT_EXE NAMES git git.exe REQUIRED NO_CMAKE_FIND_ROOT_PATH)
execute_process(COMMAND ${GIT_EXE} rev-list --count HEAD
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GGML_BUILD_NUMBER
OUTPUT_STRIP_TRAILING_WHITESPACE
)

if(GGML_BUILD_NUMBER EQUAL 1)
message(WARNING "GGML build version fixed at 1 likely due to a shallow clone.")
endif()

execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GGML_BUILD_COMMIT
OUTPUT_STRIP_TRAILING_WHITESPACE
)

# Capture variables prefixed with GGML_.

set(variable_set_statements
"
####### Expanded from @GGML_VARIABLES_EXPANED@ by configure_package_config_file() #######
####### Any changes to this file will be overwritten by the next CMake run #######

")

set(GGML_SHARED_LIB ${BUILD_SHARED_LIBS})

get_cmake_property(all_variables VARIABLES)
foreach(variable_name IN LISTS all_variables)
if(variable_name MATCHES "^GGML_")
string(REPLACE ";" "\\;"
variable_value "${${variable_name}}")

set(variable_set_statements
"${variable_set_statements}set(${variable_name} \"${variable_value}\")\n")
endif()
endforeach()

set(GGML_VARIABLES_EXPANDED ${variable_set_statements})

# Create the CMake package and set install location.

set(GGML_INSTALL_VERSION 0.0.${GGML_BUILD_NUMBER})
set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")

configure_package_config_file(
${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml
PATH_VARS GGML_INCLUDE_INSTALL_DIR
GGML_LIB_INSTALL_DIR
GGML_BIN_INSTALL_DIR)

write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
VERSION ${GGML_INSTALL_VERSION}
COMPATIBILITY SameMajorVersion)

install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml)
147 changes: 147 additions & 0 deletions cmake/ggml-config.cmake.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@

@GGML_VARIABLES_EXPANDED@

@PACKAGE_INIT@

set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")

find_package(Threads REQUIRED)

find_library(GGML_LIBRARY ggml
REQUIRED
HINTS ${GGML_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH)

add_library(ggml::ggml UNKNOWN IMPORTED)
set_target_properties(ggml::ggml
PROPERTIES
IMPORTED_LOCATION "${GGML_LIBRARY}")

find_library(GGML_BASE_LIBRARY ggml-base
REQUIRED
HINTS ${GGML_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH)

add_library(ggml::ggml-base UNKNOWN IMPORTED)
set_target_properties(ggml::ggml-base
PROPERTIES
IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")

if (NOT GGML_SHARED_LIB)
if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${ACCELERATE_FRAMEWORK})
endif()

if (GGML_OPENMP)
find_package(OpenMP REQUIRED)
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
endif()

if (GGML_CPU_HBM)
find_library(memkind memkind REQUIRED)
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES memkind)
endif()

if (GGML_BLAS)
find_package(BLAS REQUIRED)
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES})
list(APPEND GGML_CPU_INTERFACE_LINK_OPTIONS ${BLAS_LINKER_FLAGS})
endif()

if (GGML_CUDA)
find_package(CUDAToolkit REQUIRED)
endif()

if (GGML_METAL)
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED)
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)

list(APPEND GGML_METAL_INTERFACE_LINK_LIBRARIES
${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
endif()

if (GGML_VULKAN)
find_package(Vulkan REQUIRED)
list(APPEND GGML_VULKAN_INTERFACE_LINK_LIBRARIES Vulkan::Vulkan)
endif()

if (GGML_HIP)
find_package(hip REQUIRED)
find_package(hipblas REQUIRED)
find_package(rocblas REQUIRED)
list(APPEND GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas)
endif()

if (GGML_SYCL)
find_package(DNNL)
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl)
endif()
if (WIN32)
find_package(IntelSYCL REQUIRED)
find_package(MKL REQUIRED)
list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
endif()
endif()
endif()

set(_ggml_all_targets "")
foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx)

find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend}
REQUIRED
HINTS ${GGML_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH)

message(STATUS "Found ${${_ggml_backend_pfx}_LIBRARY}")

add_library(ggml::${_ggml_backend} UNKNOWN IMPORTED)
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
IMPORTED_LOCATION "${${_ggml_backend_pfx}_LIBRARY}"
INTERFACE_COMPILE_FEATURES c_std_90
POSITION_INDEPENDENT_CODE ON)

string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
if(is_cpu_variant)
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")

if(GGML_CPU_INTERFACE_LINK_OPTIONS)
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_OPTIONS "${GGML_CPU_INTERFACE_LINK_OPTIONS}")
endif()

else()
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")

if(${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS)
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_OPTIONS "${${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS}")
endif()
endif()

list(APPEND _ggml_all_targets ggml::${_ggml_backend})
endforeach()

add_library(ggml::all INTERFACE IMPORTED)
set_target_properties(ggml::all
PROPERTIES
INTERFACE_LINK_LIBRARIES "${_ggml_all_targets}")

check_required_components(ggml)
2 changes: 2 additions & 0 deletions include/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ extern "C" {
// Backend registry
//

GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);

// Backend (reg) enumeration
GGML_API size_t ggml_backend_reg_count(void);
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
Expand Down
31 changes: 26 additions & 5 deletions include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -1384,16 +1384,20 @@ extern "C" {
float scale,
float max_bias);

GGML_API struct ggml_tensor * ggml_soft_max_back(
GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
struct ggml_tensor * b,
float scale,
float max_bias);

// in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
GGML_API struct ggml_tensor * ggml_soft_max_ext_back_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
struct ggml_tensor * b,
float scale,
float max_bias);

// rotary position embedding
// if (mode & 1) - skip n_past elements (NOT SUPPORTED)
Expand Down Expand Up @@ -1500,7 +1504,7 @@ extern "C" {

// rotary position embedding backward, i.e compute dx from dy
// a - dy
GGML_API struct ggml_tensor * ggml_rope_back(
GGML_API struct ggml_tensor * ggml_rope_ext_back(
struct ggml_context * ctx,
struct ggml_tensor * a, // gradients of ggml_rope result
struct ggml_tensor * b, // positions
Expand All @@ -1515,6 +1519,23 @@ extern "C" {
float beta_fast,
float beta_slow);

GGML_API struct ggml_tensor * ggml_rope_multi_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
int n_dims,
int sections[4],
int mode,
int n_ctx_orig,
float freq_base,
float freq_scale,
float ext_factor,
float attn_factor,
float beta_fast,
float beta_slow);


// clamp
// in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_clamp(
Expand Down
2 changes: 1 addition & 1 deletion scripts/sync-llama.last
Original file line number Diff line number Diff line change
@@ -1 +1 @@
504af20ee4eae72080a56d59d744f6774f7901ce
815857791d3639a4d544d0a8cf25a49b0325c08c
2 changes: 1 addition & 1 deletion scripts/sync-llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

cp -rpv ../llama.cpp/ggml/CMakeLists.txt CMakeLists.txt
cp -rpv ../llama.cpp/ggml/src/CMakeLists.txt src/CMakeLists.txt
cp -rpv ../llama.cpp/ggml/cmake/FindSIMD.cmake cmake/FindSIMD.cmake
cp -rpv ../llama.cpp/ggml/cmake/* cmake/

cp -rpv ../llama.cpp/ggml/src/ggml*.c src/
cp -rpv ../llama.cpp/ggml/src/ggml*.cpp src/
Expand Down
13 changes: 12 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,17 @@ function(ggml_add_backend_library backend)
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
endif()

if(NOT GGML_AVAILABLE_BACKENDS)
set(GGML_AVAILABLE_BACKENDS "${backend}"
CACHE INTERNAL "List of backends for cmake package")
else()
list(FIND GGML_AVAILABLE_BACKENDS "${backend}" has_backend)
if(has_backend EQUAL -1)
set(GGML_AVAILABLE_BACKENDS "${GGML_AVAILABLE_BACKENDS};${backend}"
CACHE INTERNAL "List of backends for cmake package")
endif()
endif()
endfunction()

function(ggml_add_backend backend)
Expand Down Expand Up @@ -297,7 +308,7 @@ if (GGML_CPU_ALL_VARIANTS)
# MSVC doesn't support AMX
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
endif()
else ()
elseif (GGML_CPU)
ggml_add_cpu_backend_variant_impl("")
endif()

Expand Down
5 changes: 5 additions & 0 deletions src/ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml
return true;
}

// ops that return true for this function must not use restrict pointers for their backend implementations
static bool ggml_op_can_inplace(enum ggml_op op) {
switch (op) {
case GGML_OP_SCALE:
Expand All @@ -52,8 +53,12 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
case GGML_OP_LOG:
case GGML_OP_UNARY:
case GGML_OP_ROPE:
case GGML_OP_ROPE_BACK:
case GGML_OP_SILU_BACK:
case GGML_OP_RMS_NORM:
case GGML_OP_RMS_NORM_BACK:
case GGML_OP_SOFT_MAX:
case GGML_OP_SOFT_MAX_BACK:
return true;

default:
Expand Down
1 change: 0 additions & 1 deletion src/ggml-backend-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ extern "C" {

// Internal backend registry API
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);

// Add backend dynamic loading support to the backend

Expand Down
Loading