Skip to content

Commit

Permalink
mul_mat_id : fix missing init task
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Dec 4, 2023
1 parent b5572d9 commit b0f2e55
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 7 deletions.
4 changes: 2 additions & 2 deletions src/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8756,8 +8756,8 @@ static ggml_backend_t ggml_backend_reg_cuda_init(const char * params, void * use
}

static int ggml_backend_cuda_reg_devices() {
//int device_count = ggml_cuda_get_device_count();
int device_count = 1; // DEBUG: some tools require delaying CUDA initialization
int device_count = ggml_cuda_get_device_count();
//int device_count = 1; // DEBUG: some tools require delaying CUDA initialization
for (int i = 0; i < device_count; i++) {
char name[128];
snprintf(name, sizeof(name), "%s%d", GGML_CUDA_NAME, i);
Expand Down
8 changes: 6 additions & 2 deletions src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -1775,6 +1775,7 @@ static void ggml_setup_op_has_task_pass(void) {

p[GGML_OP_ACC ] = true;
p[GGML_OP_MUL_MAT ] = true;
p[GGML_OP_MUL_MAT_ID ] = true;
p[GGML_OP_OUT_PROD ] = true;
p[GGML_OP_SET ] = true;
p[GGML_OP_GET_ROWS_BACK ] = true;
Expand Down Expand Up @@ -4111,7 +4112,6 @@ struct ggml_tensor * ggml_mul_mat_id(
}

return result;

}

// ggml_out_prod
Expand Down Expand Up @@ -9605,6 +9605,8 @@ static void ggml_compute_forward_mul_mat(
char * wdata = params->wdata;
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);

assert(params->wsize >= ne11*ne12*ne13*row_size);

for (int64_t i13 = 0; i13 < ne13; ++i13) {
for (int64_t i12 = 0; i12 < ne12; ++i12) {
for (int64_t i11 = 0; i11 < ne11; ++i11) {
Expand Down Expand Up @@ -9719,6 +9721,8 @@ static void ggml_compute_forward_mul_mat_id(

const int a_id = ((int32_t *)ids->data)[id];

GGML_ASSERT(a_id >= 0 && a_id < ids->ne[0]);

const struct ggml_tensor * src0 = dst->src[a_id + 2];

ggml_compute_forward_mul_mat(params, src0, src1, dst);
Expand Down Expand Up @@ -16056,7 +16060,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
const enum ggml_type vec_dot_type = type_traits[a->type].vec_dot_type;
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(a, b, node)) {
if (node->src[0]->type != GGML_TYPE_F32) {
if (a->type != GGML_TYPE_F32) {
// here we need memory just for single 2D matrix from src0
cur = ggml_type_size(GGML_TYPE_F32)*(a->ne[0]*a->ne[1]);
}
Expand Down
6 changes: 3 additions & 3 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ struct test_case {
for (size_t i = 0; i < f1.size(); i++) {
// check for nans
if (std::isnan(f1[i]) || std::isnan(f2[i])) {
printf(" Error: %s: NaN\n", ggml_op_desc(t1));
printf(" Error: %s: NaN at index %zu\n", ggml_op_desc(t1), i);
*ok = false;
return true;
}
Expand Down Expand Up @@ -989,8 +989,8 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op

for (ggml_type type_a : {GGML_TYPE_F32, GGML_TYPE_F16}) {
for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) {
for (int n_mats : {2, 4}) {
for (int id = 0; id < 2; id++) {
for (int n_mats : {1, 2, 4}) {
for (int id = 0; id < n_mats; id++) {
test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, id, 32, 32, 32, {1, 1}, {1, 1}));
}
}
Expand Down

0 comments on commit b0f2e55

Please sign in to comment.