@@ -342,6 +342,7 @@ struct hexagon_appcfg_t {
342
342
int profiler_duration; // threshold of duration in profiler, per seconds
343
343
int profiler_counts; // threshold of counts in profiler
344
344
int thread_counts; // thread_counts on cDSP side
345
+ int mulmat_algotype; // algorithm type of mulmat on cDSP side
345
346
const char * cfgfilename;
346
347
const char * runtime_libpath;
347
348
char ggml_hexagon_version[GGMLHEXAGON_TMPBUF_LEN];
@@ -367,6 +368,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
367
368
.profiler_duration = 5 , // seconds
368
369
.profiler_counts = 100 ,
369
370
.thread_counts = 4 ,
371
+ .mulmat_algotype = 0 ,
370
372
.cfgfilename = " ggml-hexagon.cfg" ,
371
373
#if defined(__ANDROID__)
372
374
#if defined(STANDARD_ANDROID_APP)
@@ -379,7 +381,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
379
381
#elif defined(_WIN32)
380
382
.qnn_runtimelib_path = " C:\\ " ,
381
383
#endif
382
- .ggml_hexagon_version = {" 1.11 " },
384
+ .ggml_hexagon_version = {" 1.12 " },
383
385
.ggml_dsp_version = {" 0.63" },
384
386
};
385
387
@@ -643,6 +645,7 @@ static constexpr const qnn_op_caps ggmlqnn_k_op_caps[] = {
643
645
{false , GGML_OP_UPSCALE, 0 , nullptr },
644
646
{false , GGML_OP_PAD, 0 , nullptr },
645
647
{false , GGML_OP_PAD_REFLECT_1D, 0 , nullptr },
648
+ {false , GGML_OP_ROLL, 0 , nullptr },
646
649
{false , GGML_OP_ARANGE, 0 , nullptr },
647
650
{false , GGML_OP_TIMESTEP_EMBEDDING, 0 , nullptr },
648
651
{false , GGML_OP_ARGSORT, 0 , nullptr },
@@ -751,6 +754,7 @@ static constexpr const hexagon_op_caps ggmlhexagon_k_op_caps[] = {
751
754
{false , GGML_OP_UPSCALE, 0 , nullptr , nullptr },
752
755
{false , GGML_OP_PAD, 0 , nullptr , nullptr },
753
756
{false , GGML_OP_PAD_REFLECT_1D, 0 , nullptr , nullptr },
757
+ {false , GGML_OP_ROLL, 0 , nullptr , nullptr },
754
758
{false , GGML_OP_ARANGE, 0 , nullptr , nullptr },
755
759
{false , GGML_OP_TIMESTEP_EMBEDDING, 0 , nullptr , nullptr },
756
760
{false , GGML_OP_ARGSORT, 0 , nullptr , nullptr },
@@ -1322,11 +1326,9 @@ class hexagon_perf {
1322
1326
// had to expose two public function in hexagon_profiler class
1323
1327
if (g_hexagon_profiler.profiler_get_frame_index () <= g_hexagon_profiler.profiler_get_threshold_count ()) {
1324
1328
const char * devname = ggml_backend_hexagon_get_devname (g_hexagon_appcfg.hexagon_backend );
1329
+ // the logic here is make sense because already checked in ggml_backend_hexagon_device_init_backend
1325
1330
if (g_hexagon_appcfg.hexagon_backend != HEXAGON_BACKEND_GGML) {
1326
- // add this check for a special scenario: an invalid value passed from user's program
1327
- if (0 != memcmp (devname, " unknown" , strlen (" unknown" ))) {
1328
- devname += 16 ;
1329
- }
1331
+ devname += 16 ;
1330
1332
}
1331
1333
GGMLHEXAGON_LOG_VERBOSE (" inference duration of %s through %s: %lld microseconds" ,
1332
1334
_perf_name.c_str (), devname, _duration);
@@ -2006,6 +2008,7 @@ static void ggmlhexagon_load_cfg() {
2006
2008
hexagoncfg_instance.get_intvalue (" cdsp" , " enable_rpc_ion_mempool" , g_hexagon_appcfg.enable_rpc_ion_mempool , 0 );
2007
2009
hexagoncfg_instance.get_intvalue (" cdsp" , " enable_all_q_mulmat" , g_hexagon_appcfg.enable_all_q_mulmat , 0 );
2008
2010
hexagoncfg_instance.get_intvalue (" cdsp" , " thread_counts" , g_hexagon_appcfg.thread_counts , 4 );
2011
+ hexagoncfg_instance.get_intvalue (" cdsp" , " mulmat_algotype" , g_hexagon_appcfg.mulmat_algotype , 0 );
2009
2012
2010
2013
memcpy (g_hexagon_appcfg.ggml_dsp_version , ggmldsp_version.c_str (), strlen (ggmldsp_version.c_str ()));
2011
2014
@@ -2053,7 +2056,7 @@ static void ggmlhexagon_load_cfg() {
2053
2056
initialized = true ;
2054
2057
}
2055
2058
2056
- void ggml_backend_set_hexagon_cfg (int new_hexagon_backend, int new_hwaccel_approach) {
2059
+ void ggml_backend_hexagon_set_cfg (int new_hexagon_backend, int new_hwaccel_approach) {
2057
2060
std::string cfg_filename = std::string (g_hexagon_appcfg.runtime_libpath ) + std::string (g_hexagon_appcfg.cfgfilename );
2058
2061
GGMLHEXAGON_LOG_VERBOSE (" load hexagon appcfg from %s" , cfg_filename.c_str ());
2059
2062
hexagon_appcfg hexagoncfg_instance;
@@ -2063,14 +2066,25 @@ void ggml_backend_set_hexagon_cfg(int new_hexagon_backend, int new_hwaccel_appro
2063
2066
hexagoncfg_instance.dump ([](const std::string & section, const std::string & key, const std::string value) {
2064
2067
std::ostringstream tmposs;
2065
2068
tmposs << " section[" << std::setw (10 ) << std::left << section << " ],[" << std::setw (25 ) << std::left << key << " ] = [" << value << " ]" ;
2069
+ #if 0
2066
2070
if (ggmlhexagon_is_llamabench_running()) {
2067
2071
GGMLHEXAGON_LOG_VERBOSE("%s", tmposs.str().c_str());
2068
2072
} else {
2069
2073
GGMLHEXAGON_LOG_INFO("%s", tmposs.str().c_str());
2070
2074
}
2075
+ #endif
2076
+ GGMLHEXAGON_LOG_VERBOSE (" %s" , tmposs.str ().c_str ());
2071
2077
});
2072
2078
}
2073
2079
2080
+ int ggml_backend_hexagon_get_mulmat_algotype () {
2081
+ std::string cfg_filename = std::string (g_hexagon_appcfg.runtime_libpath ) + std::string (g_hexagon_appcfg.cfgfilename );
2082
+ hexagon_appcfg hexagoncfg_instance;
2083
+ hexagoncfg_instance.load (cfg_filename);
2084
+ hexagoncfg_instance.get_intvalue (" cdsp" , " mulmat_algotype" , g_hexagon_appcfg.mulmat_algotype , 0 );
2085
+ return g_hexagon_appcfg.mulmat_algotype ;
2086
+ }
2087
+
2074
2088
static bool ggmlhexagon_check_valid_appcfg () {
2075
2089
bool is_valid_appcfg = true ;
2076
2090
@@ -5641,7 +5655,9 @@ static int ggmlhexagon_init_dsp(ggml_backend_hexagon_context * ctx) {
5641
5655
}
5642
5656
ggmlhexagon_probe_dspinfo (ctx);
5643
5657
// FIXME: re-use this function to pass thread_counts info to code on cDSP side before fully understand qidl mechanism
5644
- ggmlop_dsp_setclocks (ctx->ggmlop_handle , HAP_DCVS_VCORNER_TURBO_PLUS, 40 , 1 , g_hexagon_appcfg.thread_counts );
5658
+ // ggmlop_dsp_setclocks(ctx->ggmlop_handle, HAP_DCVS_VCORNER_TURBO_PLUS, 40, 1, g_hexagon_appcfg.thread_counts);
5659
+ // backward compatible with previous codes on cDSP side
5660
+ ggmlop_dsp_setclocks (ctx->ggmlop_handle , HAP_DCVS_VCORNER_TURBO_PLUS, 40 , g_hexagon_appcfg.mulmat_algotype , g_hexagon_appcfg.thread_counts );
5645
5661
ggmlhexagon_set_rpc_latency (ctx->ggmlop_handle , RPC_POLL_QOS, 100 );
5646
5662
int result = ggmlhexagon_init_rpcmempool (ctx);
5647
5663
if (0 != result) {
@@ -6427,7 +6443,10 @@ static ggml_backend_t ggml_backend_hexagon_device_init_backend(ggml_backend_dev_
6427
6443
if (dev_index < 0 ) {
6428
6444
GGMLHEXAGON_LOG_VERBOSE (" it shouldn't happend\n " );
6429
6445
// test-thread-safety might-be running at the moment or an invalid value passed from user's program
6430
- dev_index = 0 ;
6446
+ dev_index = HEXAGON_BACKEND_QNNCPU; // 0
6447
+ }
6448
+ if (dev_index > GGML_HEXAGON_MAX_DEVICES) {
6449
+ dev_index = HEXAGON_BACKEND_GGML; // 4
6431
6450
}
6432
6451
g_hexagon_appcfg.hexagon_backend = dev_index;
6433
6452
GGMLHEXAGON_LOG_VERBOSE (" program specified dev_index %d\n " , dev_index);
0 commit comments