@@ -54,54 +54,77 @@ extern "C" {
54
54
GGML_NUMA_STRATEGY_COUNT
55
55
};
56
56
57
- GGML_API void ggml_numa_init (enum ggml_numa_strategy numa ); // call once for better performance on NUMA systems
58
- GGML_API bool ggml_is_numa (void ); // true if init detected that system has >1 NUMA node
57
+ GGML_BACKEND_API void ggml_numa_init (enum ggml_numa_strategy numa ); // call once for better performance on NUMA systems
58
+ GGML_BACKEND_API bool ggml_is_numa (void ); // true if init detected that system has >1 NUMA node
59
59
60
- GGML_API struct ggml_tensor * ggml_new_i32 (struct ggml_context * ctx , int32_t value );
61
- GGML_API struct ggml_tensor * ggml_new_f32 (struct ggml_context * ctx , float value );
60
+ GGML_BACKEND_API struct ggml_tensor * ggml_new_i32 (struct ggml_context * ctx , int32_t value );
61
+ GGML_BACKEND_API struct ggml_tensor * ggml_new_f32 (struct ggml_context * ctx , float value );
62
62
63
- GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor , int32_t value );
64
- GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor , float value );
63
+ GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor , int32_t value );
64
+ GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor , float value );
65
65
66
- GGML_API int32_t ggml_get_i32_1d (const struct ggml_tensor * tensor , int i );
67
- GGML_API void ggml_set_i32_1d (const struct ggml_tensor * tensor , int i , int32_t value );
66
+ GGML_BACKEND_API int32_t ggml_get_i32_1d (const struct ggml_tensor * tensor , int i );
67
+ GGML_BACKEND_API void ggml_set_i32_1d (const struct ggml_tensor * tensor , int i , int32_t value );
68
68
69
- GGML_API int32_t ggml_get_i32_nd (const struct ggml_tensor * tensor , int i0 , int i1 , int i2 , int i3 );
70
- GGML_API void ggml_set_i32_nd (const struct ggml_tensor * tensor , int i0 , int i1 , int i2 , int i3 , int32_t value );
69
+ GGML_BACKEND_API int32_t ggml_get_i32_nd (const struct ggml_tensor * tensor , int i0 , int i1 , int i2 , int i3 );
70
+ GGML_BACKEND_API void ggml_set_i32_nd (const struct ggml_tensor * tensor , int i0 , int i1 , int i2 , int i3 , int32_t value );
71
71
72
- GGML_API float ggml_get_f32_1d (const struct ggml_tensor * tensor , int i );
73
- GGML_API void ggml_set_f32_1d (const struct ggml_tensor * tensor , int i , float value );
72
+ GGML_BACKEND_API float ggml_get_f32_1d (const struct ggml_tensor * tensor , int i );
73
+ GGML_BACKEND_API void ggml_set_f32_1d (const struct ggml_tensor * tensor , int i , float value );
74
74
75
- GGML_API float ggml_get_f32_nd (const struct ggml_tensor * tensor , int i0 , int i1 , int i2 , int i3 );
76
- GGML_API void ggml_set_f32_nd (const struct ggml_tensor * tensor , int i0 , int i1 , int i2 , int i3 , float value );
75
+ GGML_BACKEND_API float ggml_get_f32_nd (const struct ggml_tensor * tensor , int i0 , int i1 , int i2 , int i3 );
76
+ GGML_BACKEND_API void ggml_set_f32_nd (const struct ggml_tensor * tensor , int i0 , int i1 , int i2 , int i3 , float value );
77
77
78
- GGML_API struct ggml_threadpool_params ggml_threadpool_params_default (int n_threads );
79
- GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p , int n_threads );
80
- GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0 , const struct ggml_threadpool_params * p1 );
81
- GGML_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params );
82
- GGML_API void ggml_threadpool_free (struct ggml_threadpool * threadpool );
83
- GGML_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool );
84
- GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool );
85
- GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool );
78
+ GGML_BACKEND_API struct ggml_threadpool_params ggml_threadpool_params_default (int n_threads );
79
+ GGML_BACKEND_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p , int n_threads );
80
+ GGML_BACKEND_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0 , const struct ggml_threadpool_params * p1 );
81
+ GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params );
82
+ GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool );
83
+ GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool );
84
+ GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool );
85
+ GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool );
86
86
87
87
// ggml_graph_plan() has to be called before ggml_graph_compute()
88
88
// when plan.work_size > 0, caller must allocate memory for plan.work_data
89
- GGML_API struct ggml_cplan ggml_graph_plan (
89
+ GGML_BACKEND_API struct ggml_cplan ggml_graph_plan (
90
90
const struct ggml_cgraph * cgraph ,
91
91
int n_threads , /* = GGML_DEFAULT_N_THREADS */
92
92
struct ggml_threadpool * threadpool /* = NULL */ );
93
- GGML_API enum ggml_status ggml_graph_compute (struct ggml_cgraph * cgraph , struct ggml_cplan * cplan );
93
+ GGML_BACKEND_API enum ggml_status ggml_graph_compute (struct ggml_cgraph * cgraph , struct ggml_cplan * cplan );
94
94
95
95
// same as ggml_graph_compute() but the work data is allocated as a part of the context
96
96
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
97
- GGML_API enum ggml_status ggml_graph_compute_with_ctx (struct ggml_context * ctx , struct ggml_cgraph * cgraph , int n_threads );
97
+ GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx (struct ggml_context * ctx , struct ggml_cgraph * cgraph , int n_threads );
98
98
99
- // TODO: move to backend interface
100
- GGML_API int ggml_cpu_has_neon (void );
101
- GGML_API int ggml_cpu_has_sve (void );
102
- GGML_API int ggml_cpu_has_matmul_int8 (void );
103
- // get the sve vector length in bytes
104
- GGML_API int ggml_cpu_get_sve_cnt (void );
99
+ //
100
+ // system info
101
+ //
102
+
103
+ // x86
104
+ GGML_BACKEND_API int ggml_cpu_has_sse3 (void );
105
+ GGML_BACKEND_API int ggml_cpu_has_ssse3 (void );
106
+ GGML_BACKEND_API int ggml_cpu_has_avx (void );
107
+ GGML_BACKEND_API int ggml_cpu_has_avx2 (void );
108
+ GGML_BACKEND_API int ggml_cpu_has_f16c (void );
109
+ GGML_BACKEND_API int ggml_cpu_has_fma (void );
110
+ GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void );
111
+ GGML_BACKEND_API int ggml_cpu_has_avx512 (void );
112
+ GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi (void );
113
+ GGML_BACKEND_API int ggml_cpu_has_avx512_vnni (void );
114
+ GGML_BACKEND_API int ggml_cpu_has_avx512_bf16 (void );
115
+ GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void );
116
+ // ARM
117
+ GGML_BACKEND_API int ggml_cpu_has_neon (void );
118
+ GGML_BACKEND_API int ggml_cpu_has_arm_fma (void );
119
+ GGML_BACKEND_API int ggml_cpu_has_fp16_va (void );
120
+ GGML_BACKEND_API int ggml_cpu_has_matmul_int8 (void );
121
+ GGML_BACKEND_API int ggml_cpu_has_sve (void );
122
+ GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void ); // sve vector length in bytes
123
+ // other
124
+ GGML_BACKEND_API int ggml_cpu_has_riscv_v (void );
125
+ GGML_BACKEND_API int ggml_cpu_has_vsx (void );
126
+ GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void );
127
+ GGML_BACKEND_API int ggml_cpu_has_llamafile (void );
105
128
106
129
// Internal types and functions exposed for tests and benchmarks
107
130
@@ -115,6 +138,7 @@ extern "C" {
115
138
const void * GGML_RESTRICT y , int nr , int nc );
116
139
117
140
struct ggml_type_traits_cpu {
141
+ ggml_from_float_t from_float ;
118
142
ggml_from_float_to_mat_t from_float_to_mat ;
119
143
ggml_vec_dot_t vec_dot ;
120
144
enum ggml_type vec_dot_type ;
@@ -124,27 +148,30 @@ extern "C" {
124
148
ggml_gemm_t gemm ;
125
149
};
126
150
127
- GGML_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu (enum ggml_type type );
151
+ GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu (enum ggml_type type );
128
152
129
- GGML_API void ggml_cpu_init (void );
153
+ GGML_BACKEND_API void ggml_cpu_init (void );
130
154
131
155
//
132
156
// CPU backend
133
157
//
134
158
135
- GGML_API ggml_backend_t ggml_backend_cpu_init (void );
159
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init (void );
136
160
137
- GGML_API bool ggml_backend_is_cpu (ggml_backend_t backend );
138
- GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu , int n_threads );
139
- GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu , ggml_threadpool_t threadpool );
140
- GGML_API void ggml_backend_cpu_set_abort_callback (ggml_backend_t backend_cpu , ggml_abort_callback abort_callback , void * abort_callback_data );
161
+ GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend );
162
+ GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu , int n_threads );
163
+ GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu , ggml_threadpool_t threadpool );
164
+ GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback (ggml_backend_t backend_cpu , ggml_abort_callback abort_callback , void * abort_callback_data );
141
165
142
- GGML_API ggml_backend_reg_t ggml_backend_cpu_reg (void );
166
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg (void );
143
167
144
168
#ifdef GGML_USE_CPU_HBM
145
- GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type (void );
169
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type (void );
146
170
#endif
147
171
172
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type (void );
173
+ GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64 (ggml_backend_buffer_type_t buft );
174
+
148
175
#ifdef __cplusplus
149
176
}
150
177
#endif
0 commit comments