Skip to content

Commit 7048605

Browse files
committed
ggml-hexagon:sycn with branch self-build
1 parent fafca74 commit 7048605

File tree

8 files changed

+236
-162
lines changed

8 files changed

+236
-162
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,5 @@ poetry.toml
146146
# Local scripts
147147
/run-vim.sh
148148
/run-chat.sh
149+
150+
/prebuilts

ggml/include/ggml-hexagon.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_hexagon_reg(void);
4040

4141
GGML_BACKEND_API const char * ggml_backend_hexagon_get_devname(size_t dev_num);
4242

43-
GGML_BACKEND_API void ggml_backend_set_hexagon_cfg(int new_hexagon_backend, int new_hwaccel_approach);
43+
GGML_BACKEND_API void ggml_backend_hexagon_set_cfg(int new_hexagon_backend, int new_hwaccel_approach);
44+
45+
GGML_BACKEND_API int ggml_backend_hexagon_get_mulmat_algotype(void);
4446

4547
#ifdef __cplusplus
4648
}

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ struct hexagon_appcfg_t {
342342
int profiler_duration; // threshold of duration in profiler, per seconds
343343
int profiler_counts; // threshold of counts in profiler
344344
int thread_counts; // thread_counts on cDSP side
345+
int mulmat_algotype; // algorithm type of mulmat on cDSP side
345346
const char * cfgfilename;
346347
const char * runtime_libpath;
347348
char ggml_hexagon_version[GGMLHEXAGON_TMPBUF_LEN];
@@ -367,6 +368,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
367368
.profiler_duration = 5, //seconds
368369
.profiler_counts = 100,
369370
.thread_counts = 4,
371+
.mulmat_algotype = 0,
370372
.cfgfilename = "ggml-hexagon.cfg",
371373
#if defined(__ANDROID__)
372374
#if defined(STANDARD_ANDROID_APP)
@@ -379,7 +381,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
379381
#elif defined(_WIN32)
380382
.qnn_runtimelib_path = "C:\\",
381383
#endif
382-
.ggml_hexagon_version = {"1.11"},
384+
.ggml_hexagon_version = {"1.12"},
383385
.ggml_dsp_version = {"0.63"},
384386
};
385387

@@ -643,6 +645,7 @@ static constexpr const qnn_op_caps ggmlqnn_k_op_caps[] = {
643645
{false, GGML_OP_UPSCALE, 0, nullptr},
644646
{false, GGML_OP_PAD, 0, nullptr},
645647
{false, GGML_OP_PAD_REFLECT_1D, 0, nullptr},
648+
{false, GGML_OP_ROLL, 0, nullptr},
646649
{false, GGML_OP_ARANGE, 0, nullptr},
647650
{false, GGML_OP_TIMESTEP_EMBEDDING, 0, nullptr},
648651
{false, GGML_OP_ARGSORT, 0, nullptr},
@@ -751,6 +754,7 @@ static constexpr const hexagon_op_caps ggmlhexagon_k_op_caps[] = {
751754
{false, GGML_OP_UPSCALE, 0, nullptr, nullptr},
752755
{false, GGML_OP_PAD, 0, nullptr, nullptr},
753756
{false, GGML_OP_PAD_REFLECT_1D, 0, nullptr, nullptr},
757+
{false, GGML_OP_ROLL, 0, nullptr, nullptr},
754758
{false, GGML_OP_ARANGE, 0, nullptr, nullptr},
755759
{false, GGML_OP_TIMESTEP_EMBEDDING, 0, nullptr, nullptr},
756760
{false, GGML_OP_ARGSORT, 0, nullptr, nullptr},
@@ -1322,11 +1326,9 @@ class hexagon_perf {
13221326
// had to expose two public function in hexagon_profiler class
13231327
if (g_hexagon_profiler.profiler_get_frame_index() <= g_hexagon_profiler.profiler_get_threshold_count()) {
13241328
const char * devname = ggml_backend_hexagon_get_devname(g_hexagon_appcfg.hexagon_backend);
1329+
//the logic here is make sense because already checked in ggml_backend_hexagon_device_init_backend
13251330
if (g_hexagon_appcfg.hexagon_backend != HEXAGON_BACKEND_GGML) {
1326-
//add this check for a special scenario: an invalid value passed from user's program
1327-
if (0 != memcmp(devname, "unknown", strlen("unknown"))) {
1328-
devname += 16;
1329-
}
1331+
devname += 16;
13301332
}
13311333
GGMLHEXAGON_LOG_VERBOSE("inference duration of %s through %s: %lld microseconds",
13321334
_perf_name.c_str(), devname, _duration);
@@ -2006,6 +2008,7 @@ static void ggmlhexagon_load_cfg() {
20062008
hexagoncfg_instance.get_intvalue("cdsp", "enable_rpc_ion_mempool", g_hexagon_appcfg.enable_rpc_ion_mempool, 0);
20072009
hexagoncfg_instance.get_intvalue("cdsp", "enable_all_q_mulmat", g_hexagon_appcfg.enable_all_q_mulmat, 0);
20082010
hexagoncfg_instance.get_intvalue("cdsp", "thread_counts", g_hexagon_appcfg.thread_counts, 4);
2011+
hexagoncfg_instance.get_intvalue("cdsp", "mulmat_algotype", g_hexagon_appcfg.mulmat_algotype, 0);
20092012

20102013
memcpy(g_hexagon_appcfg.ggml_dsp_version, ggmldsp_version.c_str(), strlen(ggmldsp_version.c_str()));
20112014

@@ -2053,7 +2056,7 @@ static void ggmlhexagon_load_cfg() {
20532056
initialized = true;
20542057
}
20552058

2056-
void ggml_backend_set_hexagon_cfg(int new_hexagon_backend, int new_hwaccel_approach) {
2059+
void ggml_backend_hexagon_set_cfg(int new_hexagon_backend, int new_hwaccel_approach) {
20572060
std::string cfg_filename = std::string(g_hexagon_appcfg.runtime_libpath) + std::string(g_hexagon_appcfg.cfgfilename);
20582061
GGMLHEXAGON_LOG_VERBOSE("load hexagon appcfg from %s", cfg_filename.c_str());
20592062
hexagon_appcfg hexagoncfg_instance;
@@ -2063,14 +2066,25 @@ void ggml_backend_set_hexagon_cfg(int new_hexagon_backend, int new_hwaccel_appro
20632066
hexagoncfg_instance.dump([](const std::string & section, const std::string & key, const std::string value) {
20642067
std::ostringstream tmposs;
20652068
tmposs << "section[" << std::setw(10) << std::left << section << "],[" << std::setw(25) << std::left << key << "] = [" << value << "]";
2069+
#if 0
20662070
if (ggmlhexagon_is_llamabench_running()) {
20672071
GGMLHEXAGON_LOG_VERBOSE("%s", tmposs.str().c_str());
20682072
} else {
20692073
GGMLHEXAGON_LOG_INFO("%s", tmposs.str().c_str());
20702074
}
2075+
#endif
2076+
GGMLHEXAGON_LOG_VERBOSE("%s", tmposs.str().c_str());
20712077
});
20722078
}
20732079

2080+
int ggml_backend_hexagon_get_mulmat_algotype() {
2081+
std::string cfg_filename = std::string(g_hexagon_appcfg.runtime_libpath) + std::string(g_hexagon_appcfg.cfgfilename);
2082+
hexagon_appcfg hexagoncfg_instance;
2083+
hexagoncfg_instance.load(cfg_filename);
2084+
hexagoncfg_instance.get_intvalue("cdsp", "mulmat_algotype", g_hexagon_appcfg.mulmat_algotype, 0);
2085+
return g_hexagon_appcfg.mulmat_algotype;
2086+
}
2087+
20742088
static bool ggmlhexagon_check_valid_appcfg() {
20752089
bool is_valid_appcfg = true;
20762090

@@ -5641,7 +5655,9 @@ static int ggmlhexagon_init_dsp(ggml_backend_hexagon_context * ctx) {
56415655
}
56425656
ggmlhexagon_probe_dspinfo(ctx);
56435657
//FIXME: re-use this function to pass thread_counts info to code on cDSP side before fully understand qidl mechanism
5644-
ggmlop_dsp_setclocks(ctx->ggmlop_handle, HAP_DCVS_VCORNER_TURBO_PLUS, 40, 1, g_hexagon_appcfg.thread_counts);
5658+
//ggmlop_dsp_setclocks(ctx->ggmlop_handle, HAP_DCVS_VCORNER_TURBO_PLUS, 40, 1, g_hexagon_appcfg.thread_counts);
5659+
//backward compatible with previous codes on cDSP side
5660+
ggmlop_dsp_setclocks(ctx->ggmlop_handle, HAP_DCVS_VCORNER_TURBO_PLUS, 40, g_hexagon_appcfg.mulmat_algotype, g_hexagon_appcfg.thread_counts);
56455661
ggmlhexagon_set_rpc_latency(ctx->ggmlop_handle, RPC_POLL_QOS, 100);
56465662
int result = ggmlhexagon_init_rpcmempool(ctx);
56475663
if (0 != result) {
@@ -6427,7 +6443,10 @@ static ggml_backend_t ggml_backend_hexagon_device_init_backend(ggml_backend_dev_
64276443
if (dev_index < 0) {
64286444
GGMLHEXAGON_LOG_VERBOSE("it shouldn't happend\n");
64296445
//test-thread-safety might-be running at the moment or an invalid value passed from user's program
6430-
dev_index = 0;
6446+
dev_index = HEXAGON_BACKEND_QNNCPU; //0
6447+
}
6448+
if (dev_index > GGML_HEXAGON_MAX_DEVICES) {
6449+
dev_index = HEXAGON_BACKEND_GGML; //4
64316450
}
64326451
g_hexagon_appcfg.hexagon_backend = dev_index;
64336452
GGMLHEXAGON_LOG_VERBOSE("program specified dev_index %d\n", dev_index);

ggml/src/ggml-hexagon/kernels/entry.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ int ggmlop_dsp_close(remote_handle64 handle) {
3434
return 0;
3535
}
3636

37-
AEEResult ggmlop_dsp_setclocks(remote_handle64 handle, int32 power_level, int32 latency, int32 dcvs_enabled, int32 thread_counts) {
37+
AEEResult ggmlop_dsp_setclocks(remote_handle64 handle, int32 power_level, int32 latency, int32 mulmat_algo, int32 thread_counts) {
3838
GGMLHEXAGON_LOG_DEBUG("enter %s", __func__);
3939
HAP_power_request_t request;
4040
memset(&request, 0, sizeof(HAP_power_request_t));
@@ -60,7 +60,7 @@ AEEResult ggmlop_dsp_setclocks(remote_handle64 handle, int32 power_level, int32
6060
request.type = HAP_power_set_DCVS_v2;
6161
request.dcvs_v2.dcvs_enable = TRUE;
6262
request.dcvs_v2.dcvs_params.target_corner = (HAP_dcvs_voltage_corner_t)power_level;
63-
if (dcvs_enabled) {
63+
if (mulmat_algo) {
6464
request.dcvs_v2.dcvs_params.min_corner = HAP_DCVS_VCORNER_DISABLE;
6565
request.dcvs_v2.dcvs_params.max_corner = HAP_DCVS_VCORNER_DISABLE;
6666
} else {

ggml/src/ggml-hexagon/kernels/skel.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ __QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_open)(const char* uri, remote_
272272
* @retval, 0 on success, should always succeed
273273
*/
274274
__QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_close)(remote_handle64 h) __QAIC_HEADER_ATTRIBUTE;
275-
__QAIC_HEADER_EXPORT AEEResult __QAIC_HEADER(ggmlop_dsp_setclocks)(remote_handle64 _h, int32 power_level, int32 latency, int32 dcvs_enable, int32 threads) __QAIC_HEADER_ATTRIBUTE;
275+
__QAIC_HEADER_EXPORT AEEResult __QAIC_HEADER(ggmlop_dsp_setclocks)(remote_handle64 _h, int32 power_level, int32 latency, int32 mulmat_algotype, int32 thread_counts) __QAIC_HEADER_ATTRIBUTE;
276276
__QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_add)(remote_handle64 _h, const dsptensor* src0, const dsptensor* src1, dsptensor* dst) __QAIC_HEADER_ATTRIBUTE;
277277
__QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_mulmat)(remote_handle64 _h, const dsptensor* src0, const dsptensor* src1, dsptensor* dst) __QAIC_HEADER_ATTRIBUTE;
278278
__QAIC_HEADER_EXPORT int __QAIC_HEADER(ggmlop_dsp_softmax)(remote_handle64 _h, const dsptensor* src0, const dsptensor* src1, dsptensor* dst) __QAIC_HEADER_ATTRIBUTE;

ggml/src/ggml-hexagon/kernels/stub.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,9 +312,9 @@ static __inline int _stub_method(remote_handle64 _handle, uint32_t _mid, uint32_
312312
}
313313
return _nErr;
314314
}
315-
__QAIC_STUB_EXPORT AEEResult __QAIC_STUB(ggmlop_dsp_setclocks)(remote_handle64 _handle, int32 power_level, int32 latency, int32 dcvs_enable, int32 threads) __QAIC_STUB_ATTRIBUTE {
315+
__QAIC_STUB_EXPORT AEEResult __QAIC_STUB(ggmlop_dsp_setclocks)(remote_handle64 _handle, int32 power_level, int32 latency, int32 mulmat_algotype, int32 threads) __QAIC_STUB_ATTRIBUTE {
316316
uint32_t _mid = 2;
317-
return _stub_method(_handle, _mid, (uint32_t*)&power_level, (uint32_t*)&latency, (uint32_t*)&dcvs_enable, (uint32_t*)&threads);
317+
return _stub_method(_handle, _mid, (uint32_t*)&power_level, (uint32_t*)&latency, (uint32_t*)&mulmat_algotype, (uint32_t*)&threads);
318318
}
319319
static __inline int _stub_unpack(_ATTRIBUTE_UNUSED remote_arg* _praROutPost, _ATTRIBUTE_UNUSED remote_arg* _ppraROutPost[1], _ATTRIBUTE_UNUSED void* _primROut, _ATTRIBUTE_UNUSED uint32_t _rout0[1], _ATTRIBUTE_UNUSED uint32_t _rout1[4], _ATTRIBUTE_UNUSED uint32_t _rout2[4], _ATTRIBUTE_UNUSED uint32_t _rout3[1], _ATTRIBUTE_UNUSED uint32_t _rout4[16], _ATTRIBUTE_UNUSED uint32_t _rout5[1], _ATTRIBUTE_UNUSED char* _rout6[1], _ATTRIBUTE_UNUSED uint32_t _rout6Len[1]) {
320320
int _nErr = 0;

0 commit comments

Comments
 (0)