Skip to content

Commit a6d6cc4

Browse files
committed
synced to llama.cpp tag:b4102 short:db4cfd5
1 parent f2d14b8 commit a6d6cc4

18 files changed

+175
-160
lines changed

scripts/setup.sh

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ get_llamacpp() {
2525
cmake .. -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON && \
2626
cmake --build . --config Release && \
2727
cmake --install . --prefix ${PREFIX} && \
28+
cp ggml/src/libggml-base.a ${LIB} && \
29+
cp ggml/src/ggml-blas/libggml-blas.a ${LIB} && \
30+
cp ggml/src/ggml-cpu/libggml-cpu.a ${LIB} && \
31+
cp ggml/src/ggml-metal/libggml-metal.a ${LIB} && \
2832
cp common/libcommon.a ${LIB} && \
2933
cp examples/llava/libllava_static.a ${LIB}/libllava.a && \
3034
mv ${PREFIX}/bin ${CWD}/bin && \

setup.py

+4
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@
4747
f'{LLAMACPP_LIBS_DIR}/libcommon.a',
4848
f'{LLAMACPP_LIBS_DIR}/libllama.a',
4949
f'{LLAMACPP_LIBS_DIR}/libggml.a',
50+
f'{LLAMACPP_LIBS_DIR}/libggml-base.a',
51+
f'{LLAMACPP_LIBS_DIR}/libggml-blas.a',
52+
f'{LLAMACPP_LIBS_DIR}/libggml-cpu.a',
53+
f'{LLAMACPP_LIBS_DIR}/libggml-metal.a',
5054
])
5155

5256
INCLUDE_DIRS.append(os.path.join(CWD, 'include'))

src/cyllama/cyllama.pyx

+5
Original file line numberDiff line numberDiff line change
@@ -4281,3 +4281,8 @@ def common_batch_clear(LlamaBatch batch):
42814281
def llama_backend_free():
42824282
"""Call once at the end of the program - currently only used for MPI"""
42834283
llama_cpp.llama_backend_free()
4284+
4285+
# cdef void ggml_abort(const char * file, int line, const char * fmt):
4286+
# llama_cpp.ggml_abort(file, line, fmt)
4287+
4288+

src/cyllama/llama_cpp.pxd

+2
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ cdef extern from "ggml.h":
252252
cdef int64_t ggml_cycles()
253253
cdef int64_t ggml_cycles_per_ms()
254254

255+
# cdef void ggml_abort(const char * file, int line, const char * fmt, ...)
256+
255257

256258
#------------------------------------------------------------------------------
257259
# ggml-backend.h

tests/test_context.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,8 @@ def test_context(model_path):
2626
assert ctx.model.n_head == 32
2727
assert ctx.model.rope_freq_scale_train == 1.0
2828
assert ctx.model.desc == "llama 1B Q8_0"
29-
if PLATFORM == "Darwin":
30-
assert ctx.model.size == 1592336512
31-
assert ctx.model.n_params == 1498482720
32-
elif PLATFORM == "Linux":
33-
assert ctx.model.size == 1313251456
34-
assert ctx.model.n_params == 1235814432
29+
assert ctx.model.size == 1313251456
30+
assert ctx.model.n_params == 1235814432
3531
assert ctx.model.has_decoder() == True
3632
assert ctx.model.has_encoder() == False
3733
assert ctx.model.is_recurrent() == False

tests/test_model.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,8 @@ def test_autorelease(model_path):
3333
assert model.n_head == 32
3434
assert model.rope_freq_scale_train == 1.0
3535
assert model.desc == "llama 1B Q8_0"
36-
if PLATFORM == "Darwin":
37-
assert model.size == 1592336512
38-
assert model.n_params == 1498482720
39-
elif PLATFORM == "Linux":
40-
assert model.size == 1313251456
41-
assert model.n_params == 1235814432
36+
assert model.size == 1313251456
37+
assert model.n_params == 1235814432
4238
assert model.has_decoder() == True
4339
assert model.has_encoder() == False
4440
assert model.is_recurrent() == False

thirdparty/llama.cpp/include/ggml-amx.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@ extern "C" {
99
#endif
1010

1111
// buffer_type API
12-
GGML_API ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
12+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
1313

14-
GGML_API bool ggml_backend_is_amx(ggml_backend_t backend);
14+
GGML_BACKEND_API bool ggml_backend_is_amx(ggml_backend_t backend);
1515

1616
// backend API
17-
GGML_API ggml_backend_t ggml_backend_amx_init(void);
17+
GGML_BACKEND_API ggml_backend_t ggml_backend_amx_init(void);
1818

19-
GGML_API void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads);
19+
GGML_BACKEND_API void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads);
2020

21-
GGML_API ggml_backend_reg_t ggml_backend_amx_reg(void);
21+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_amx_reg(void);
2222

2323
#ifdef __cplusplus
2424
}

thirdparty/llama.cpp/include/ggml-backend.h

+14
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,20 @@
33
#include "ggml.h"
44
#include "ggml-alloc.h"
55

6+
#ifdef GGML_BACKEND_SHARED
7+
# if defined(_WIN32) && !defined(__MINGW32__)
8+
# ifdef GGML_BACKEND_BUILD
9+
# define GGML_BACKEND_API __declspec(dllexport) extern
10+
# else
11+
# define GGML_BACKEND_API __declspec(dllimport) extern
12+
# endif
13+
# else
14+
# define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
15+
# endif
16+
#else
17+
# define GGML_BACKEND_API extern
18+
#endif
19+
620
#ifdef __cplusplus
721
extern "C" {
822
#endif

thirdparty/llama.cpp/include/ggml-blas.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ extern "C" {
99
#endif
1010

1111
// backend API
12-
GGML_API ggml_backend_t ggml_backend_blas_init(void);
12+
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
1313

14-
GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
14+
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
1515

1616
// number of threads used for conversion to float
1717
// for openblas and blis, this will also set the number of threads used for blas operations
18-
GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
18+
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
1919

20-
GGML_API ggml_backend_reg_t ggml_backend_blas_reg(void);
20+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
2121

2222

2323
#ifdef __cplusplus

thirdparty/llama.cpp/include/ggml-cann.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ extern "C" {
3434
*/
3535
#define GGML_CANN_MAX_DEVICES 16
3636

37-
GGML_API ggml_backend_reg_t ggml_backend_cann_reg(void);
37+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
3838

3939
/**
4040
* @brief Initializes the CANN backend for a specified device.
@@ -46,7 +46,7 @@ GGML_API ggml_backend_reg_t ggml_backend_cann_reg(void);
4646
* @param device The index of the device to initialize.
4747
* @return A pointer to the initialized backend instance, or nullptr on failure.
4848
*/
49-
GGML_API ggml_backend_t ggml_backend_cann_init(int32_t device);
49+
GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
5050

5151
/**
5252
* @brief Checks if a given backend is a CANN backend.
@@ -57,7 +57,7 @@ GGML_API ggml_backend_t ggml_backend_cann_init(int32_t device);
5757
* @param backend The backend instance to check.
5858
* @return True if the backend is a CANN backend, false otherwise.
5959
*/
60-
GGML_API bool ggml_backend_is_cann(ggml_backend_t backend);
60+
GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
6161

6262
/**
6363
* @brief Retrieves the CANN buffer type for a specified device.
@@ -69,7 +69,7 @@ GGML_API bool ggml_backend_is_cann(ggml_backend_t backend);
6969
* @return A pointer to the buffer type interface for the specified device, or
7070
* nullptr if the device index is out of range.
7171
*/
72-
GGML_API ggml_backend_buffer_type_t
72+
GGML_BACKEND_API ggml_backend_buffer_type_t
7373
ggml_backend_cann_buffer_type(int32_t device);
7474

7575
/**
@@ -80,14 +80,14 @@ ggml_backend_cann_buffer_type(int32_t device);
8080
*
8181
* @return The number of CANN devices available.
8282
*/
83-
GGML_API int32_t ggml_backend_cann_get_device_count(void);
83+
GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
8484

8585
/**
8686
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
8787
*
8888
* @return A pointer to the host buffer type interface.
8989
*/
90-
GGML_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
90+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
9191

9292
/**
9393
* @brief Retrieves the description of a specific CANN device.
@@ -99,7 +99,7 @@ GGML_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
9999
* @param description Pointer to a buffer where the description will be written.
100100
* @param description_size Size of the description buffer.
101101
*/
102-
GGML_API void ggml_backend_cann_get_device_description(
102+
GGML_BACKEND_API void ggml_backend_cann_get_device_description(
103103
int32_t device, char* description, size_t description_size);
104104

105105
/**
@@ -114,7 +114,7 @@ GGML_API void ggml_backend_cann_get_device_description(
114114
* @param total Pointer to a variable where the total memory size will be
115115
* stored.
116116
*/
117-
GGML_API void ggml_backend_cann_get_device_memory(int32_t device,
117+
GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
118118
size_t* free,
119119
size_t* total);
120120

thirdparty/llama.cpp/include/ggml-cpu.h

+67-40
Original file line numberDiff line numberDiff line change
@@ -54,54 +54,77 @@ extern "C" {
5454
GGML_NUMA_STRATEGY_COUNT
5555
};
5656

57-
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
58-
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
57+
GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
58+
GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
5959

60-
GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
61-
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
60+
GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
61+
GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
6262

63-
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
64-
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
63+
GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
64+
GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
6565

66-
GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
67-
GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
66+
GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
67+
GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
6868

69-
GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
70-
GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
69+
GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
70+
GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
7171

72-
GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
73-
GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
72+
GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
73+
GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
7474

75-
GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
76-
GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
75+
GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
76+
GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
7777

78-
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
79-
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
80-
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
81-
GGML_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
82-
GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
83-
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
84-
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
85-
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
78+
GGML_BACKEND_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
79+
GGML_BACKEND_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
80+
GGML_BACKEND_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
81+
GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
82+
GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
83+
GGML_BACKEND_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
84+
GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
85+
GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
8686

8787
// ggml_graph_plan() has to be called before ggml_graph_compute()
8888
// when plan.work_size > 0, caller must allocate memory for plan.work_data
89-
GGML_API struct ggml_cplan ggml_graph_plan(
89+
GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
9090
const struct ggml_cgraph * cgraph,
9191
int n_threads, /* = GGML_DEFAULT_N_THREADS */
9292
struct ggml_threadpool * threadpool /* = NULL */ );
93-
GGML_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
93+
GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
9494

9595
// same as ggml_graph_compute() but the work data is allocated as a part of the context
9696
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
97-
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
97+
GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
9898

99-
// TODO: move to backend interface
100-
GGML_API int ggml_cpu_has_neon (void);
101-
GGML_API int ggml_cpu_has_sve (void);
102-
GGML_API int ggml_cpu_has_matmul_int8(void);
103-
// get the sve vector length in bytes
104-
GGML_API int ggml_cpu_get_sve_cnt(void);
99+
//
100+
// system info
101+
//
102+
103+
// x86
104+
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
105+
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
106+
GGML_BACKEND_API int ggml_cpu_has_avx (void);
107+
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
108+
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
109+
GGML_BACKEND_API int ggml_cpu_has_fma (void);
110+
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
111+
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
112+
GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
113+
GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
114+
GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
115+
GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
116+
// ARM
117+
GGML_BACKEND_API int ggml_cpu_has_neon (void);
118+
GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
119+
GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
120+
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
121+
GGML_BACKEND_API int ggml_cpu_has_sve (void);
122+
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
123+
// other
124+
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
125+
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
126+
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
127+
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
105128

106129
// Internal types and functions exposed for tests and benchmarks
107130

@@ -115,6 +138,7 @@ extern "C" {
115138
const void * GGML_RESTRICT y, int nr, int nc);
116139

117140
struct ggml_type_traits_cpu {
141+
ggml_from_float_t from_float;
118142
ggml_from_float_to_mat_t from_float_to_mat;
119143
ggml_vec_dot_t vec_dot;
120144
enum ggml_type vec_dot_type;
@@ -124,27 +148,30 @@ extern "C" {
124148
ggml_gemm_t gemm;
125149
};
126150

127-
GGML_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
151+
GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
128152

129-
GGML_API void ggml_cpu_init(void);
153+
GGML_BACKEND_API void ggml_cpu_init(void);
130154

131155
//
132156
// CPU backend
133157
//
134158

135-
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
159+
GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
136160

137-
GGML_API bool ggml_backend_is_cpu (ggml_backend_t backend);
138-
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
139-
GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
140-
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
161+
GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
162+
GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
163+
GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
164+
GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
141165

142-
GGML_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
166+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
143167

144168
#ifdef GGML_USE_CPU_HBM
145-
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
169+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
146170
#endif
147171

172+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
173+
GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);
174+
148175
#ifdef __cplusplus
149176
}
150177
#endif

0 commit comments

Comments
 (0)