From b0f2e55216bc241a174783252bd2301b0089f283 Mon Sep 17 00:00:00 2001 From: slaren Date: Mon, 4 Dec 2023 16:30:42 +0100 Subject: [PATCH] mul_mat_id : fix missing init task --- src/ggml-cuda.cu | 4 ++-- src/ggml.c | 8 ++++++-- tests/test-backend-ops.cpp | 6 +++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/ggml-cuda.cu b/src/ggml-cuda.cu index 2bf100ff0b..19245ed3c4 100644 --- a/src/ggml-cuda.cu +++ b/src/ggml-cuda.cu @@ -8756,8 +8756,8 @@ static ggml_backend_t ggml_backend_reg_cuda_init(const char * params, void * use } static int ggml_backend_cuda_reg_devices() { - //int device_count = ggml_cuda_get_device_count(); - int device_count = 1; // DEBUG: some tools require delaying CUDA initialization + int device_count = ggml_cuda_get_device_count(); + //int device_count = 1; // DEBUG: some tools require delaying CUDA initialization for (int i = 0; i < device_count; i++) { char name[128]; snprintf(name, sizeof(name), "%s%d", GGML_CUDA_NAME, i); diff --git a/src/ggml.c b/src/ggml.c index 5480bbdaba..86b0102efa 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -1775,6 +1775,7 @@ static void ggml_setup_op_has_task_pass(void) { p[GGML_OP_ACC ] = true; p[GGML_OP_MUL_MAT ] = true; + p[GGML_OP_MUL_MAT_ID ] = true; p[GGML_OP_OUT_PROD ] = true; p[GGML_OP_SET ] = true; p[GGML_OP_GET_ROWS_BACK ] = true; @@ -4111,7 +4112,6 @@ struct ggml_tensor * ggml_mul_mat_id( } return result; - } // ggml_out_prod @@ -9605,6 +9605,8 @@ static void ggml_compute_forward_mul_mat( char * wdata = params->wdata; const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type); + assert(params->wsize >= ne11*ne12*ne13*row_size); + for (int64_t i13 = 0; i13 < ne13; ++i13) { for (int64_t i12 = 0; i12 < ne12; ++i12) { for (int64_t i11 = 0; i11 < ne11; ++i11) { @@ -9719,6 +9721,8 @@ static void ggml_compute_forward_mul_mat_id( const int a_id = ((int32_t *)ids->data)[id]; + GGML_ASSERT(a_id >= 0 && a_id < ids->ne[0]); + const struct ggml_tensor * src0 = dst->src[a_id + 2]; ggml_compute_forward_mul_mat(params, src0, src1, dst); @@ -16056,7 +16060,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { const enum ggml_type vec_dot_type = type_traits[a->type].vec_dot_type; #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) if (ggml_compute_forward_mul_mat_use_blas(a, b, node)) { - if (node->src[0]->type != GGML_TYPE_F32) { + if (a->type != GGML_TYPE_F32) { // here we need memory just for single 2D matrix from src0 cur = ggml_type_size(GGML_TYPE_F32)*(a->ne[0]*a->ne[1]); } diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 0cbb29b422..201fcf16d7 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -261,7 +261,7 @@ struct test_case { for (size_t i = 0; i < f1.size(); i++) { // check for nans if (std::isnan(f1[i]) || std::isnan(f2[i])) { - printf(" Error: %s: NaN\n", ggml_op_desc(t1)); + printf(" Error: %s: NaN at index %zu\n", ggml_op_desc(t1), i); *ok = false; return true; } @@ -989,8 +989,8 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op for (ggml_type type_a : {GGML_TYPE_F32, GGML_TYPE_F16}) { for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) { - for (int n_mats : {2, 4}) { - for (int id = 0; id < 2; id++) { + for (int n_mats : {1, 2, 4}) { + for (int id = 0; id < n_mats; id++) { test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, id, 32, 32, 32, {1, 1}, {1, 1})); } }