diff --git a/AUTOTEST/cmake.sh b/AUTOTEST/cmake.sh index 132d63c997..724bb30449 100755 --- a/AUTOTEST/cmake.sh +++ b/AUTOTEST/cmake.sh @@ -6,7 +6,7 @@ testname=`basename $0 .sh` -drivers="ij sstruct struct ams_driver maxwell_unscaled sstruct_fac ij_mv struct_migrate" +drivers="ij sstruct struct ams_driver maxwell_unscaled sstruct_fac ij_mv struct_migrate ij_assembly" # Echo usage information case $1 in diff --git a/src/IJ_mv/IJMatrix_parcsr_device.c b/src/IJ_mv/IJMatrix_parcsr_device.c index 09c14a7e6c..199c6fd191 100644 --- a/src/IJ_mv/IJMatrix_parcsr_device.c +++ b/src/IJ_mv/IJMatrix_parcsr_device.c @@ -579,6 +579,7 @@ hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix) HYPRE_BigInt row_end = row_partitioning[1]; HYPRE_BigInt col_start = col_partitioning[0]; HYPRE_BigInt col_end = col_partitioning[1]; + HYPRE_BigInt col_first = hypre_IJMatrixGlobalFirstCol(matrix); HYPRE_Int nrows = row_end - row_start; HYPRE_Int ncols = col_end - col_start; @@ -743,6 +744,24 @@ hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix) _1 - row_start ); #endif + /* adjust col indices wrt the global first index */ + if (col_first) + { +#if defined(HYPRE_USING_SYCL) + HYPRE_ONEDPL_CALL( std::transform, + new_j, + new_j + new_nnz, + new_j, + [col_first = col_first] (const auto & x) {return x - col_first;} ); +#else + HYPRE_THRUST_CALL( transform, + new_j, + new_j + new_nnz, + new_j, + _1 - col_first ); +#endif + } + hypre_TFree(new_i, HYPRE_MEMORY_DEVICE); HYPRE_Int num_cols_offd_new; @@ -769,9 +788,9 @@ hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix) new_j, NULL, NULL, - col_start, - col_end - 1, - hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(par_matrix)), + col_start - col_first, + col_end - col_first - 1, + -1, NULL, NULL, NULL, @@ -817,8 +836,8 @@ hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix) new_j, new_data, diag_nnz_existed || offd_nnz_existed ? new_sora : NULL, - col_start, - col_end - 1, + col_start - col_first, + col_end - col_first - 1, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(par_matrix)), hypre_ParCSRMatrixDeviceColMapOffd(par_matrix), &col_map_offd_map, diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 3dc3a2449f..d93d34de4e 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -18,6 +18,7 @@ set(TEST_SRCS struct_migrate.c sstruct_fac.c ij_mv.c + ij_assembly.c ) add_hypre_executables(TEST_SRCS) diff --git a/src/test/TEST_ij/air.jobs b/src/test/TEST_ij/air.jobs old mode 100644 new mode 100755 diff --git a/src/test/TEST_ij/assembly.jobs b/src/test/TEST_ij/assembly.jobs new file mode 100755 index 0000000000..0f2d901794 --- /dev/null +++ b/src/test/TEST_ij/assembly.jobs @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +#============================================================================= +# ij_assembly: Run default case +#============================================================================= + +mpirun -np 1 ./ij_assembly > assembly.out.0 + +mpirun -np 2 ./ij_assembly > assembly.out.1 + +mpirun -np 3 ./ij_assembly > assembly.out.2 + +mpirun -np 7 ./ij_assembly > assembly.out.3 + diff --git a/src/test/TEST_ij/assembly.sh b/src/test/TEST_ij/assembly.sh new file mode 100755 index 0000000000..61e8875c6b --- /dev/null +++ b/src/test/TEST_ij/assembly.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +TNAME=`basename $0 .sh` +RTOL=$1 +ATOL=$2 + +#============================================================================= +# compare with baseline case +#============================================================================= + +FILES="\ + ${TNAME}.out.0\ + ${TNAME}.out.1\ + ${TNAME}.out.2\ + ${TNAME}.out.3\ +" + +for i in $FILES +do + echo "# Output file: $i" + tail -1 $i +done > ${TNAME}.out + +# Make sure that the output file is reasonable +RUNCOUNT=`echo $FILES | wc -w` +OUTCOUNT=`grep "error code" ${TNAME}.out | wc -l` +if [ "$OUTCOUNT" != "$RUNCOUNT" ]; then + echo "Incorrect number of runs in ${TNAME}.out" >&2 +fi + +# Make sure that the output file has no errors +for i in $FILES +do + echo "# Output file: $i" + echo "Test error code = 0" +done > ${TNAME}.testdata + +diff ${TNAME}.testdata ${TNAME}.out >&2 + +#============================================================================= +# remove temporary files +#============================================================================= + +rm -f ${TNAME}.testdata diff --git a/src/test/TEST_ij/fsai.jobs b/src/test/TEST_ij/fsai.jobs old mode 100644 new mode 100755 diff --git a/src/test/TEST_ij/ilu.jobs b/src/test/TEST_ij/ilu.jobs old mode 100644 new mode 100755 diff --git a/src/test/TEST_ij/lazy.jobs b/src/test/TEST_ij/lazy.jobs old mode 100644 new mode 100755 diff --git a/src/test/TEST_ij/posneg.jobs b/src/test/TEST_ij/posneg.jobs old mode 100644 new mode 100755 diff --git a/src/test/TEST_ij/state.jobs b/src/test/TEST_ij/state.jobs old mode 100644 new mode 100755 diff --git a/src/test/TEST_ij/vector.jobs b/src/test/TEST_ij/vector.jobs old mode 100644 new mode 100755 diff --git a/src/test/ij_assembly.c b/src/test/ij_assembly.c index f790b36d7f..1be56aa306 100644 --- a/src/test/ij_assembly.c +++ b/src/test/ij_assembly.c @@ -21,53 +21,51 @@ HYPRE_Int buildMatrixEntries(MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Int Px, HYPRE_Int Py, HYPRE_Int Pz, - HYPRE_Real cx, HYPRE_Real cy, HYPRE_Real cz, + HYPRE_Real cx, HYPRE_Real cy, HYPRE_Real cz, HYPRE_Int base, HYPRE_BigInt *ilower, HYPRE_BigInt *iupper, HYPRE_BigInt *jlower, HYPRE_BigInt *jupper, HYPRE_Int *nrows, HYPRE_BigInt *num_nonzeros, HYPRE_Int **nnzrow_ptr, HYPRE_BigInt **rows_ptr, - HYPRE_BigInt **rows2_ptr, HYPRE_BigInt **cols_ptr, + HYPRE_BigInt **rows_coo_ptr, HYPRE_BigInt **cols_ptr, HYPRE_Real **coefs_ptr, HYPRE_Int stencil, HYPRE_ParCSRMatrix *parcsr_ptr); -HYPRE_Int getParCSRMatrixData(HYPRE_ParCSRMatrix A, HYPRE_Int *nrows_ptr, +HYPRE_Int getParCSRMatrixData(HYPRE_ParCSRMatrix A, HYPRE_Int base, HYPRE_Int *nrows_ptr, HYPRE_BigInt *num_nonzeros_ptr, - HYPRE_Int **nnzrow_ptr, HYPRE_BigInt **rows_ptr, HYPRE_BigInt **rows2_ptr, + HYPRE_Int **nnzrow_ptr, HYPRE_BigInt **rows_ptr, HYPRE_BigInt **rows_coo_ptr, HYPRE_BigInt **cols_ptr, HYPRE_Real **coefs_ptr); -HYPRE_Int checkMatrix(HYPRE_ParCSRMatrix parcsr_ref, HYPRE_IJMatrix ij_A); - -HYPRE_Int test_Set(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, - HYPRE_BigInt ilower, HYPRE_BigInt iupper, - HYPRE_Int nrows, HYPRE_BigInt num_nonzeros, - HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, - HYPRE_BigInt *rows, HYPRE_BigInt *cols, - HYPRE_Real *coefs, HYPRE_IJMatrix *ij_A_ptr); - -HYPRE_Int test_SetOffProc(HYPRE_ParCSRMatrix parcsr_A, HYPRE_MemoryLocation memory_location, - HYPRE_Int nchunks, HYPRE_Int option, HYPRE_IJMatrix *ij_AT_ptr); - -HYPRE_Int test_SetSet(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, - HYPRE_BigInt ilower, HYPRE_BigInt iupper, - HYPRE_Int nrows, HYPRE_BigInt num_nonzeros, - HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, - HYPRE_BigInt *rows, HYPRE_BigInt *cols, - HYPRE_Real *coefs, HYPRE_IJMatrix *ij_A_ptr); - -HYPRE_Int test_AddSet(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, - HYPRE_BigInt ilower, HYPRE_BigInt iupper, - HYPRE_Int nrows, HYPRE_BigInt num_nonzeros, - HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, - HYPRE_BigInt *rows, HYPRE_BigInt *cols, - HYPRE_Real *coefs, HYPRE_IJMatrix *ij_A_ptr); - -HYPRE_Int test_SetAddSet(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, - HYPRE_BigInt ilower, HYPRE_BigInt iupper, - HYPRE_Int nrows, HYPRE_BigInt num_nonzeros, - HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, - HYPRE_BigInt *rows, HYPRE_BigInt *cols, - HYPRE_Real *coefs, HYPRE_IJMatrix *ij_A_ptr); - -//#define CUDA_PROFILER +HYPRE_Real checkMatrix(HYPRE_ParCSRMatrix parcsr_ref, HYPRE_IJMatrix ij_A); + +HYPRE_Int test_Set(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, HYPRE_BigInt ilower, + HYPRE_BigInt iupper, HYPRE_BigInt jlower, HYPRE_BigInt jupper, HYPRE_Int nrows, + HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, + HYPRE_BigInt *rows, HYPRE_BigInt *cols, HYPRE_Real *coefs, HYPRE_IJMatrix *ij_A_ptr); + +HYPRE_Int test_AddTranspose(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, HYPRE_BigInt ilower, + HYPRE_BigInt iupper, HYPRE_BigInt jlower, HYPRE_BigInt jupper, HYPRE_Int nrows, + HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, + HYPRE_BigInt *rows, HYPRE_BigInt *cols, HYPRE_Real *coefs, HYPRE_IJMatrix *ij_AT_ptr); + +HYPRE_Int test_SetSet(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, HYPRE_BigInt ilower, + HYPRE_BigInt iupper, HYPRE_BigInt jlower, HYPRE_BigInt jupper, HYPRE_Int nrows, + HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, + HYPRE_BigInt *rows, HYPRE_BigInt *cols, HYPRE_Real *coefs, HYPRE_IJMatrix *ij_AT_ptr); + +HYPRE_Int test_AddSet(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, HYPRE_BigInt ilower, + HYPRE_BigInt iupper, HYPRE_BigInt jlower, HYPRE_BigInt jupper, HYPRE_Int nrows, + HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, + HYPRE_BigInt *rows, HYPRE_BigInt *cols, HYPRE_Real *coefs, HYPRE_IJMatrix *ij_AT_ptr); + +HYPRE_Int test_SetAdd(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, HYPRE_BigInt ilower, + HYPRE_BigInt iupper, HYPRE_BigInt jlower, HYPRE_BigInt jupper, HYPRE_Int nrows, + HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, + HYPRE_BigInt *rows, HYPRE_BigInt *cols, HYPRE_Real *coefs, HYPRE_IJMatrix *ij_AT_ptr); + +HYPRE_Int test_SetAddSet(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, HYPRE_BigInt ilower, + HYPRE_BigInt iupper, HYPRE_BigInt jlower, HYPRE_BigInt jupper, HYPRE_Int nrows, + HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, + HYPRE_BigInt *rows, HYPRE_BigInt *cols, HYPRE_Real *coefs, HYPRE_IJMatrix *ij_AT_ptr); + hypre_int main( hypre_int argc, @@ -84,27 +82,25 @@ main( hypre_int argc, HYPRE_ExecutionPolicy default_exec_policy; #endif char memory_location_name[8]; - HYPRE_Int nrows; HYPRE_BigInt num_nonzeros; HYPRE_BigInt ilower, iupper; HYPRE_BigInt jlower, jupper; HYPRE_Int *nnzrow = NULL, *h_nnzrow, *d_nnzrow = NULL; HYPRE_BigInt *rows = NULL, *h_rows, *d_rows = NULL; - HYPRE_BigInt *rows2 = NULL, *h_rows2, *d_rows2 = NULL; + HYPRE_BigInt *rows_coo = NULL, *h_rows_coo, *d_rows_coo = NULL; HYPRE_BigInt *cols = NULL, *h_cols, *d_cols = NULL; HYPRE_Real *coefs = NULL, *h_coefs, *d_coefs = NULL; - HYPRE_IJMatrix ij_A; - HYPRE_IJMatrix ij_AT; - HYPRE_ParCSRMatrix parcsr_ref; - + HYPRE_IJMatrix ij_A, ij_AT; + HYPRE_ParCSRMatrix parcsr_ref = NULL, parcsr_trans = NULL; // Driver input parameters HYPRE_Int Px, Py, Pz; HYPRE_Int nx, ny, nz; HYPRE_Real cx, cy, cz; HYPRE_Int nchunks; - HYPRE_Int mode; - HYPRE_Int option; + HYPRE_Int mode, ierr = 0; + HYPRE_Real tol = 0.; + HYPRE_Int option, base; HYPRE_Int stencil; HYPRE_Int print_matrix; @@ -149,9 +145,10 @@ main( hypre_int argc, default_exec_policy = HYPRE_EXEC_DEVICE; #endif memory_location = HYPRE_MEMORY_DEVICE; - mode = 1; + mode = (1 << 6) - 1; option = 1; - nchunks = 1; + nchunks = 3; + base = 0; print_matrix = 0; stencil = 7; @@ -213,6 +210,11 @@ main( hypre_int argc, arg_index++; nchunks = atoi(argv[arg_index++]); } + else if ( strcmp(argv[arg_index], "-base") == 0 ) + { + arg_index++; + base = atoi(argv[arg_index++]); + } else if ( strcmp(argv[arg_index], "-print") == 0 ) { arg_index++; @@ -250,6 +252,7 @@ main( hypre_int argc, hypre_printf(" 0 = HOST\n"); hypre_printf(" 1 = DEVICE (default)\n"); hypre_printf(" -nchunks : number of chunks passed to Set/AddValues\n"); + hypre_printf(" -base : matrix index base\n"); hypre_printf(" -mode : tests to be performed\n"); hypre_printf(" 1 = Set (default)\n"); hypre_printf(" 2 = SetOffProc\n"); @@ -297,16 +300,16 @@ main( hypre_int argc, /*----------------------------------------------------------- * Build matrix entries *-----------------------------------------------------------*/ - buildMatrixEntries(comm, nx, ny, nz, Px, Py, Pz, cx, cy, cz, + buildMatrixEntries(comm, nx, ny, nz, Px, Py, Pz, cx, cy, cz, base, &ilower, &iupper, &jlower, &jupper, &nrows, &num_nonzeros, - &h_nnzrow, &h_rows, &h_rows2, &h_cols, &h_coefs, stencil, &parcsr_ref); + &h_nnzrow, &h_rows, &h_rows_coo, &h_cols, &h_coefs, stencil, &parcsr_ref); switch (memory_location) { case HYPRE_MEMORY_DEVICE: d_nnzrow = hypre_TAlloc(HYPRE_Int, nrows, HYPRE_MEMORY_DEVICE); d_rows = hypre_TAlloc(HYPRE_BigInt, nrows, HYPRE_MEMORY_DEVICE); - d_rows2 = hypre_TAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE); + d_rows_coo = hypre_TAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE); d_cols = hypre_TAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE); d_coefs = hypre_TAlloc(HYPRE_Real, num_nonzeros, HYPRE_MEMORY_DEVICE); @@ -314,7 +317,7 @@ main( hypre_int argc, HYPRE_MEMORY_HOST); hypre_TMemcpy(d_rows, h_rows, HYPRE_BigInt, nrows, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST); - hypre_TMemcpy(d_rows2, h_rows2, HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE, + hypre_TMemcpy(d_rows_coo, h_rows_coo, HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST); hypre_TMemcpy(d_cols, h_cols, HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST); @@ -323,7 +326,7 @@ main( hypre_int argc, nnzrow = d_nnzrow; rows = d_rows; - rows2 = d_rows2; + rows_coo = d_rows_coo; cols = d_cols; coefs = d_coefs; break; @@ -331,7 +334,7 @@ main( hypre_int argc, case HYPRE_MEMORY_HOST: nnzrow = h_nnzrow; rows = h_rows; - rows2 = h_rows2; + rows_coo = h_rows_coo; cols = h_cols; coefs = h_coefs; break; @@ -346,10 +349,10 @@ main( hypre_int argc, /* Test Set */ if (mode & 1) { - test_Set(comm, memory_location, option, ilower, iupper, nrows, num_nonzeros, - nchunks, h_nnzrow, nnzrow, option == 1 ? rows : rows2, cols, coefs, &ij_A); + test_Set(comm, memory_location, option, ilower, iupper, jlower, jupper, nrows, num_nonzeros, + nchunks, h_nnzrow, nnzrow, option == 1 ? rows : rows_coo, cols, coefs, &ij_A); - checkMatrix(parcsr_ref, ij_A); + ierr += checkMatrix(parcsr_ref, ij_A) > tol; if (print_matrix) { HYPRE_IJMatrixPrint(ij_A, "ij_Set"); @@ -357,25 +360,30 @@ main( hypre_int argc, HYPRE_IJMatrixDestroy(ij_A); } - /* Test SetOffProc */ + /* Test AddTranspose */ if (mode & 2) { - test_SetOffProc(parcsr_ref, memory_location, nchunks, option, &ij_AT); - checkMatrix(parcsr_ref, ij_AT); + test_AddTranspose(comm, memory_location, 2, ilower, iupper, jlower, jupper, nrows, num_nonzeros, + nchunks, h_nnzrow, nnzrow, rows_coo, cols, coefs, &ij_AT); + + hypre_ParCSRMatrixTranspose(parcsr_ref, &parcsr_trans, 1); + + ierr += checkMatrix(parcsr_trans, ij_AT) > tol; if (print_matrix) { - HYPRE_IJMatrixPrint(ij_A, "ij_SetOffProc"); + HYPRE_IJMatrixPrint(ij_AT, "ij_AddTrans"); } HYPRE_IJMatrixDestroy(ij_AT); + HYPRE_ParCSRMatrixDestroy(parcsr_trans); } /* Test Set/Set */ if (mode & 4) { - test_SetSet(comm, memory_location, option, ilower, iupper, nrows, num_nonzeros, - nchunks, h_nnzrow, nnzrow, option == 1 ? rows : rows2, cols, coefs, &ij_A); + test_SetSet(comm, memory_location, option, ilower, iupper, jlower, jupper, nrows, num_nonzeros, + nchunks, h_nnzrow, nnzrow, option == 1 ? rows : rows_coo, cols, coefs, &ij_A); - checkMatrix(parcsr_ref, ij_A); + ierr += checkMatrix(parcsr_ref, ij_A) > tol; if (print_matrix) { HYPRE_IJMatrixPrint(ij_A, "ij_SetSet"); @@ -386,10 +394,10 @@ main( hypre_int argc, /* Test Add/Set */ if (mode & 8) { - test_AddSet(comm, memory_location, option, ilower, iupper, nrows, num_nonzeros, - nchunks, h_nnzrow, nnzrow, option == 1 ? rows : rows2, cols, coefs, &ij_A); + test_AddSet(comm, memory_location, option, ilower, iupper, jlower, jupper, nrows, num_nonzeros, + nchunks, h_nnzrow, nnzrow, option == 1 ? rows : rows_coo, cols, coefs, &ij_A); - checkMatrix(parcsr_ref, ij_A); + ierr += checkMatrix(parcsr_ref, ij_A) > tol; if (print_matrix) { HYPRE_IJMatrixPrint(ij_A, "ij_AddSet"); @@ -397,13 +405,31 @@ main( hypre_int argc, HYPRE_IJMatrixDestroy(ij_A); } - /* Test Set/Add/Set */ + /* Test Set/Add */ if (mode & 16) { - test_SetAddSet(comm, memory_location, option, ilower, iupper, nrows, num_nonzeros, - nchunks, h_nnzrow, nnzrow, option == 1 ? rows : rows2, cols, coefs, &ij_A); + test_SetAdd(comm, memory_location, option, ilower, iupper, jlower, jupper, nrows, num_nonzeros, + nchunks, h_nnzrow, nnzrow, option == 1 ? rows : rows_coo, cols, coefs, &ij_A); + + hypre_ParCSRMatrix *parcsr_ref2 = hypre_ParCSRMatrixClone(parcsr_ref, 1); + hypre_ParCSRMatrixScale(parcsr_ref2, 2.0); - checkMatrix(parcsr_ref, ij_A); + ierr += checkMatrix(parcsr_ref2, ij_A) > tol; + if (print_matrix) + { + HYPRE_IJMatrixPrint(ij_A, "ij_SetAdd"); + } + HYPRE_IJMatrixDestroy(ij_A); + HYPRE_ParCSRMatrixDestroy(parcsr_ref2); + } + + /* Test Set/Add/Set */ + if (mode & 32) + { + test_SetAddSet(comm, memory_location, option, ilower, iupper, jlower, jupper, nrows, num_nonzeros, + nchunks, h_nnzrow, nnzrow, option == 1 ? rows : rows_coo, cols, coefs, &ij_A); + + ierr += checkMatrix(parcsr_ref, ij_A) > tol; if (print_matrix) { HYPRE_IJMatrixPrint(ij_A, "ij_SetAddSet"); @@ -411,6 +437,9 @@ main( hypre_int argc, HYPRE_IJMatrixDestroy(ij_A); } + /* Print the error code */ + hypre_ParPrintf(comm, "Test error code = %d\n", ierr); + /*----------------------------------------------------------- * Free memory *-----------------------------------------------------------*/ @@ -418,13 +447,13 @@ main( hypre_int argc, { hypre_TFree(d_nnzrow, HYPRE_MEMORY_DEVICE); hypre_TFree(d_rows, HYPRE_MEMORY_DEVICE); - hypre_TFree(d_rows2, HYPRE_MEMORY_DEVICE); + hypre_TFree(d_rows_coo, HYPRE_MEMORY_DEVICE); hypre_TFree(d_cols, HYPRE_MEMORY_DEVICE); hypre_TFree(d_coefs, HYPRE_MEMORY_DEVICE); } hypre_TFree(h_nnzrow, HYPRE_MEMORY_HOST); hypre_TFree(h_rows, HYPRE_MEMORY_HOST); - hypre_TFree(h_rows2, HYPRE_MEMORY_HOST); + hypre_TFree(h_rows_coo, HYPRE_MEMORY_HOST); hypre_TFree(h_cols, HYPRE_MEMORY_HOST); hypre_TFree(h_coefs, HYPRE_MEMORY_HOST); @@ -438,7 +467,7 @@ main( hypre_int argc, /* when using cuda-memcheck --leak-check full, uncomment this */ #if defined(HYPRE_USING_GPU) - hypre_ResetCudaDevice(hypre_handle()); + hypre_ResetCudaDevice(NULL); #endif return (0); @@ -452,9 +481,10 @@ buildMatrixEntries(MPI_Comm comm, HYPRE_Int Px, HYPRE_Int Py, HYPRE_Int Pz, - HYPRE_Real cx, - HYPRE_Real cy, - HYPRE_Real cz, + HYPRE_Real cx, + HYPRE_Real cy, + HYPRE_Real cz, + HYPRE_Int base, HYPRE_BigInt *ilower_ptr, HYPRE_BigInt *iupper_ptr, HYPRE_BigInt *jlower_ptr, @@ -463,7 +493,7 @@ buildMatrixEntries(MPI_Comm comm, HYPRE_BigInt *num_nonzeros_ptr, HYPRE_Int **nnzrow_ptr, HYPRE_BigInt **rows_ptr, /* row indices of length nrows */ - HYPRE_BigInt **rows2_ptr, /* row indices of length nnz */ + HYPRE_BigInt **rows_coo_ptr, /* row indices of length nnz */ HYPRE_BigInt **cols_ptr, /* col indices of length nnz */ HYPRE_Real **coefs_ptr, /* values of length nnz */ HYPRE_Int stencil, @@ -504,14 +534,51 @@ buildMatrixEntries(MPI_Comm comm, } hypre_ParCSRMatrixMigrate(A, HYPRE_MEMORY_HOST); - getParCSRMatrixData(A, nrows_ptr, num_nonzeros_ptr, nnzrow_ptr, rows_ptr, rows2_ptr, cols_ptr, + + /* modify the upper triangular part to A nonsymmetric */ + HYPRE_Int i, j; + HYPRE_Int nrows = hypre_ParCSRMatrixNumRows(A); + hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); + hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); + HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); + HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); + HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); + HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); + HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A); + HYPRE_BigInt ilower = hypre_ParCSRMatrixFirstRowIndex(A); + HYPRE_BigInt jlower = hypre_ParCSRMatrixFirstColDiag(A); + + for (i = 0; i < nrows; i++) + { + for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) + { + HYPRE_BigInt row = ilower + (HYPRE_BigInt) i; + HYPRE_BigInt col = jlower + (HYPRE_BigInt) A_diag_j[j]; + if (row < col) + { + hypre_CSRMatrixData(A_diag)[j] += myid + .89; + } + } + for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) + { + HYPRE_BigInt row = ilower + (HYPRE_BigInt) i; + HYPRE_BigInt col = col_map_offd_A[A_offd_j[j]]; + if (row < col) + { + hypre_CSRMatrixData(A_offd)[j] += myid + .64; + } + } + } + + /* get I, J, data from A */ + getParCSRMatrixData(A, base, nrows_ptr, num_nonzeros_ptr, nnzrow_ptr, rows_ptr, rows_coo_ptr, cols_ptr, coefs_ptr); // Set pointers - *ilower_ptr = hypre_ParCSRMatrixFirstRowIndex(A); - *iupper_ptr = hypre_ParCSRMatrixLastRowIndex(A); - *jlower_ptr = hypre_ParCSRMatrixFirstColDiag(A); - *jupper_ptr = hypre_ParCSRMatrixLastColDiag(A); + *ilower_ptr = hypre_ParCSRMatrixFirstRowIndex(A) + base; + *iupper_ptr = hypre_ParCSRMatrixLastRowIndex(A) + base; + *jlower_ptr = hypre_ParCSRMatrixFirstColDiag(A) + base; + *jupper_ptr = hypre_ParCSRMatrixLastColDiag(A) + base; *parcsr_ptr = A; return hypre_error_flag; @@ -519,11 +586,12 @@ buildMatrixEntries(MPI_Comm comm, HYPRE_Int getParCSRMatrixData(HYPRE_ParCSRMatrix A, + HYPRE_Int base, HYPRE_Int *nrows_ptr, HYPRE_BigInt *num_nonzeros_ptr, HYPRE_Int **nnzrow_ptr, HYPRE_BigInt **rows_ptr, - HYPRE_BigInt **rows2_ptr, + HYPRE_BigInt **rows_coo_ptr, HYPRE_BigInt **cols_ptr, HYPRE_Real **coefs_ptr) { @@ -542,7 +610,7 @@ getParCSRMatrixData(HYPRE_ParCSRMatrix A, HYPRE_BigInt num_nonzeros; HYPRE_Int *nnzrow; HYPRE_BigInt *rows; - HYPRE_BigInt *rows2; + HYPRE_BigInt *rows_coo; HYPRE_BigInt *cols; HYPRE_Real *coefs; HYPRE_Int i, j, k; @@ -551,7 +619,7 @@ getParCSRMatrixData(HYPRE_ParCSRMatrix A, num_nonzeros = hypre_CSRMatrixNumNonzeros(A_diag) + hypre_CSRMatrixNumNonzeros(A_offd); nnzrow = hypre_CTAlloc(HYPRE_Int, nrows, HYPRE_MEMORY_HOST); rows = hypre_CTAlloc(HYPRE_BigInt, nrows, HYPRE_MEMORY_HOST); - rows2 = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_HOST); + rows_coo = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_HOST); cols = hypre_CTAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_HOST); coefs = hypre_CTAlloc(HYPRE_Real, num_nonzeros, HYPRE_MEMORY_HOST); @@ -565,13 +633,13 @@ getParCSRMatrixData(HYPRE_ParCSRMatrix A, for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) { - rows2[k] = ilower + (HYPRE_BigInt) i; + rows_coo[k] = ilower + (HYPRE_BigInt) i; cols[k] = jlower + (HYPRE_BigInt) A_diag_j[j]; coefs[k++] = hypre_CSRMatrixData(A_diag)[j]; } for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) { - rows2[k] = ilower + (HYPRE_BigInt) i; + rows_coo[k] = ilower + (HYPRE_BigInt) i; cols[k] = hypre_ParCSRMatrixColMapOffd(A)[A_offd_j[j]]; coefs[k++] = hypre_CSRMatrixData(A_offd)[j]; } @@ -581,18 +649,18 @@ getParCSRMatrixData(HYPRE_ParCSRMatrix A, { nnzrow[nrows - 1 - i] = A_diag_i[i + 1] - A_diag_i[i] + A_offd_i[i + 1] - A_offd_i[i]; - rows[nrows - 1 - i] = ilower + i; + rows[nrows - 1 - i] = ilower + i + base; for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) { - rows2[k] = ilower + (HYPRE_BigInt) i; - cols[k] = jlower + (HYPRE_BigInt) A_diag_j[j]; + rows_coo[k] = ilower + (HYPRE_BigInt) i + base; + cols[k] = jlower + (HYPRE_BigInt) A_diag_j[j] + base; coefs[k++] = hypre_CSRMatrixData(A_diag)[j]; } for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) { - rows2[k] = ilower + (HYPRE_BigInt) i; - cols[k] = col_map_offd_A[A_offd_j[j]]; + rows_coo[k] = ilower + (HYPRE_BigInt) i + base; + cols[k] = col_map_offd_A[A_offd_j[j]] + base; coefs[k++] = hypre_CSRMatrixData(A_offd)[j]; } } @@ -605,7 +673,7 @@ getParCSRMatrixData(HYPRE_ParCSRMatrix A, *num_nonzeros_ptr = num_nonzeros; *nnzrow_ptr = nnzrow; *rows_ptr = rows; - *rows2_ptr = rows2; + *rows_coo_ptr = rows_coo; *cols_ptr = cols; *coefs_ptr = coefs; @@ -613,48 +681,50 @@ getParCSRMatrixData(HYPRE_ParCSRMatrix A, } -HYPRE_Int +HYPRE_Real checkMatrix(HYPRE_ParCSRMatrix h_parcsr_ref, HYPRE_IJMatrix ij_A) { MPI_Comm comm = hypre_IJMatrixComm(ij_A); HYPRE_ParCSRMatrix parcsr_A = (HYPRE_ParCSRMatrix) hypre_IJMatrixObject(ij_A); HYPRE_ParCSRMatrix h_parcsr_A; HYPRE_ParCSRMatrix parcsr_error; - HYPRE_Int myid; HYPRE_Real fnorm; - hypre_MPI_Comm_rank(comm, &myid); - h_parcsr_A = hypre_ParCSRMatrixClone_v2(parcsr_A, 1, HYPRE_MEMORY_HOST); // Check norm of (parcsr_ref - parcsr_A) hypre_ParCSRMatrixAdd(1.0, h_parcsr_ref, -1.0, h_parcsr_A, &parcsr_error); fnorm = hypre_ParCSRMatrixFnorm(parcsr_error); - if (myid == 0) - { - hypre_printf("Frobenius norm of (A_ref - A): %e\n", fnorm); - } + hypre_ParPrintf(comm, "Frobenius norm of (A_ref - A): %e\n", fnorm); HYPRE_ParCSRMatrixDestroy(h_parcsr_A); HYPRE_ParCSRMatrixDestroy(parcsr_error); - return hypre_error_flag; + return fnorm; } +/* ---------------------------------- * + * All test functions for assembly * + * option = 1: length of nrows, * + * = 2: length of num_nonzeros * + * ---------------------------------- */ + +/* set values */ HYPRE_Int test_Set(MPI_Comm comm, HYPRE_MemoryLocation memory_location, - HYPRE_Int option, /* 1 or 2 */ + HYPRE_Int option, HYPRE_BigInt ilower, HYPRE_BigInt iupper, + HYPRE_BigInt jlower, + HYPRE_BigInt jupper, HYPRE_Int nrows, HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, - HYPRE_BigInt - *rows, /* option = 1: length of nrows, = 2: length of num_nonzeros */ + HYPRE_BigInt *rows, HYPRE_BigInt *cols, HYPRE_Real *coefs, HYPRE_IJMatrix *ij_A_ptr) @@ -662,20 +732,19 @@ test_Set(MPI_Comm comm, HYPRE_IJMatrix ij_A; HYPRE_Int i, chunk, chunk_size; HYPRE_Int time_index; - HYPRE_Int *h_rowptr; - - HYPRE_IJMatrixCreate(comm, ilower, iupper, ilower, iupper, &ij_A); - HYPRE_IJMatrixSetObjectType(ij_A, HYPRE_PARCSR); - HYPRE_IJMatrixInitialize_v2(ij_A, memory_location); - HYPRE_IJMatrixSetOMPFlag(ij_A, 1); + HYPRE_Int *h_rowptr = hypre_CTAlloc(HYPRE_Int, nrows + 1, HYPRE_MEMORY_HOST); - h_rowptr = hypre_CTAlloc(HYPRE_Int, nrows + 1, HYPRE_MEMORY_HOST); for (i = 1; i < nrows + 1; i++) { h_rowptr[i] = h_rowptr[i - 1] + h_nnzrow[i - 1]; } hypre_assert(h_rowptr[nrows] == num_nonzeros); + HYPRE_IJMatrixCreate(comm, ilower, iupper, jlower, jupper, &ij_A); + HYPRE_IJMatrixSetObjectType(ij_A, HYPRE_PARCSR); + HYPRE_IJMatrixInitialize_v2(ij_A, memory_location); + HYPRE_IJMatrixSetOMPFlag(ij_A, 1); + chunk_size = nrows / nchunks; #if defined(HYPRE_USING_GPU) @@ -729,165 +798,88 @@ test_Set(MPI_Comm comm, return hypre_error_flag; } +/* set values with (row, col) reversed, i.e., the transpose of A + * in this way, we can test off-proc set values */ HYPRE_Int -test_SetOffProc(HYPRE_ParCSRMatrix parcsr_A, - HYPRE_MemoryLocation memory_location, - HYPRE_Int nchunks, - HYPRE_Int option, /* 1 or 2 */ - HYPRE_IJMatrix *ij_AT_ptr) -{ - MPI_Comm comm = hypre_ParCSRMatrixComm(parcsr_A); - HYPRE_ParCSRMatrix parcsr_AT; - HYPRE_IJMatrix ij_AT; - - HYPRE_Int nrows; - HYPRE_BigInt num_nonzeros; - HYPRE_BigInt ilower, iupper; - - HYPRE_Int *h_nnzrow; - HYPRE_BigInt *h_rows1; - HYPRE_BigInt *h_rows2; - HYPRE_BigInt *h_cols; - HYPRE_Real *h_coefs; - - HYPRE_Int *d_nnzrow = NULL; - HYPRE_BigInt *d_rows = NULL; - HYPRE_BigInt *d_cols = NULL; - HYPRE_Real *d_coefs = NULL; - - HYPRE_Int *nnzrow = NULL; - HYPRE_BigInt *rows = NULL; - HYPRE_BigInt *cols = NULL; - HYPRE_Real *coefs = NULL; - - HYPRE_Int *h_rowptr; - - HYPRE_Int time_index; - HYPRE_Int chunk_size; - HYPRE_Int chunk; - HYPRE_Int i; - - hypre_ParCSRMatrixTranspose(parcsr_A, &parcsr_AT, 1); - ilower = hypre_ParCSRMatrixFirstRowIndex(parcsr_AT); - iupper = hypre_ParCSRMatrixLastRowIndex(parcsr_AT); - getParCSRMatrixData(parcsr_AT, &nrows, &num_nonzeros, &h_nnzrow, &h_rows1, &h_rows2, &h_cols, - &h_coefs); - HYPRE_ParCSRMatrixDestroy(parcsr_AT); +test_AddTranspose(MPI_Comm comm, + HYPRE_MemoryLocation memory_location, + HYPRE_Int option, + HYPRE_BigInt ilower, + HYPRE_BigInt iupper, + HYPRE_BigInt jlower, + HYPRE_BigInt jupper, + HYPRE_Int nrows, + HYPRE_BigInt num_nonzeros, + HYPRE_Int nchunks, + HYPRE_Int *h_nnzrow, + HYPRE_Int *nnzrow, + HYPRE_BigInt *rows, + HYPRE_BigInt *cols, + HYPRE_Real *coefs, + HYPRE_IJMatrix *ij_AT_ptr) - switch (memory_location) - { - case HYPRE_MEMORY_DEVICE: - d_nnzrow = hypre_TAlloc(HYPRE_Int, nrows, HYPRE_MEMORY_DEVICE); - d_cols = hypre_TAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE); - d_coefs = hypre_TAlloc(HYPRE_Real, num_nonzeros, HYPRE_MEMORY_DEVICE); - if (option == 1) - { - d_rows = hypre_TAlloc(HYPRE_BigInt, nrows, HYPRE_MEMORY_DEVICE); - hypre_TMemcpy(d_rows, h_rows1, HYPRE_BigInt, nrows, HYPRE_MEMORY_DEVICE, - HYPRE_MEMORY_HOST); - } - else - { - d_rows = hypre_TAlloc(HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE); - hypre_TMemcpy(d_rows, h_rows2, HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE, - HYPRE_MEMORY_HOST); - } - hypre_TMemcpy(d_nnzrow, h_nnzrow, HYPRE_Int, nrows, HYPRE_MEMORY_DEVICE, - HYPRE_MEMORY_HOST); - hypre_TMemcpy(d_cols, h_cols, HYPRE_BigInt, num_nonzeros, HYPRE_MEMORY_DEVICE, - HYPRE_MEMORY_HOST); - hypre_TMemcpy(d_coefs, h_coefs, HYPRE_Real, num_nonzeros, HYPRE_MEMORY_DEVICE, - HYPRE_MEMORY_HOST); - - nnzrow = d_nnzrow; - rows = d_rows; - cols = d_cols; - coefs = d_coefs; - break; - case HYPRE_MEMORY_HOST: - nnzrow = h_nnzrow; - rows = (option == 1) ? h_rows1 : h_rows2; - cols = h_cols; - coefs = h_coefs; - break; - - case HYPRE_MEMORY_UNDEFINED: - return -1; - } +{ + hypre_assert(option == 2); - // Create transpose with SetValues - HYPRE_IJMatrixCreate(comm, ilower, iupper, ilower, iupper, &ij_AT); - HYPRE_IJMatrixSetObjectType(ij_AT, HYPRE_PARCSR); - HYPRE_IJMatrixInitialize_v2(ij_AT, memory_location); - HYPRE_IJMatrixSetOMPFlag(ij_AT, 1); + HYPRE_IJMatrix ij_AT; + HYPRE_Int i, chunk, chunk_size; + HYPRE_Int time_index; + HYPRE_Int *h_rowptr = hypre_CTAlloc(HYPRE_Int, nrows + 1, HYPRE_MEMORY_HOST); - h_rowptr = hypre_CTAlloc(HYPRE_Int, nrows + 1, HYPRE_MEMORY_HOST); for (i = 1; i < nrows + 1; i++) { h_rowptr[i] = h_rowptr[i - 1] + h_nnzrow[i - 1]; } hypre_assert(h_rowptr[nrows] == num_nonzeros); + HYPRE_IJMatrixCreate(comm, jlower, jupper, ilower, iupper, &ij_AT); + HYPRE_IJMatrixSetObjectType(ij_AT, HYPRE_PARCSR); + HYPRE_IJMatrixInitialize_v2(ij_AT, memory_location); + HYPRE_IJMatrixSetOMPFlag(ij_AT, 1); + chunk_size = nrows / nchunks; #if defined(HYPRE_USING_GPU) hypre_SyncCudaDevice(hypre_handle()); +#if defined(CUDA_PROFILER) + cudaProfilerStart(); +#endif #endif - time_index = hypre_InitializeTiming("Test SetValues OffProc"); + time_index = hypre_InitializeTiming("Test SetValuesTranspose"); hypre_BeginTiming(time_index); - //cudaProfilerStart(); - for (chunk = 0; chunk < nrows; chunk += chunk_size) { chunk_size = hypre_min(chunk_size, nrows - chunk); - if (1 == option) - { - HYPRE_IJMatrixSetValues(ij_AT, chunk_size, &nnzrow[chunk], &rows[chunk], - &cols[h_rowptr[chunk]], &coefs[h_rowptr[chunk]]); - } - else - { - HYPRE_IJMatrixSetValues(ij_AT, h_rowptr[chunk + chunk_size] - h_rowptr[chunk], - NULL, &rows[h_rowptr[chunk]], - &cols[h_rowptr[chunk]], &coefs[h_rowptr[chunk]]); - } + HYPRE_IJMatrixAddToValues(ij_AT, h_rowptr[chunk + chunk_size] - h_rowptr[chunk], + NULL, &cols[h_rowptr[chunk]], + &rows[h_rowptr[chunk]], &coefs[h_rowptr[chunk]]); } // Assemble matrix HYPRE_IJMatrixAssemble(ij_AT); - //cudaProfilerStop(); - #if defined(HYPRE_USING_GPU) hypre_SyncCudaDevice(hypre_handle()); +#if defined(CUDA_PROFILER) + cudaProfilerStop(); +#endif #endif + // Finalize timer hypre_EndTiming(time_index); - hypre_PrintTiming("Test SetValues OffProc", hypre_MPI_COMM_WORLD); + hypre_PrintTiming("Test SetValuesTranspose", hypre_MPI_COMM_WORLD); hypre_FinalizeTiming(time_index); hypre_ClearTiming(); - // Set pointer to output - *ij_AT_ptr = ij_AT; - // Free memory hypre_TFree(h_rowptr, HYPRE_MEMORY_HOST); - if (memory_location == HYPRE_MEMORY_DEVICE) - { - hypre_TFree(d_nnzrow, HYPRE_MEMORY_DEVICE); - hypre_TFree(d_rows, HYPRE_MEMORY_DEVICE); - hypre_TFree(d_cols, HYPRE_MEMORY_DEVICE); - hypre_TFree(d_coefs, HYPRE_MEMORY_DEVICE); - } - hypre_TFree(h_nnzrow, HYPRE_MEMORY_HOST); - hypre_TFree(h_rows1, HYPRE_MEMORY_HOST); - hypre_TFree(h_rows2, HYPRE_MEMORY_HOST); - hypre_TFree(h_cols, HYPRE_MEMORY_HOST); - hypre_TFree(h_coefs, HYPRE_MEMORY_HOST); + + // Set pointer to output + *ij_AT_ptr = ij_AT; return hypre_error_flag; } @@ -895,16 +887,17 @@ test_SetOffProc(HYPRE_ParCSRMatrix parcsr_A, HYPRE_Int test_SetSet(MPI_Comm comm, HYPRE_MemoryLocation memory_location, - HYPRE_Int option, /* 1 or 2 */ + HYPRE_Int option, HYPRE_BigInt ilower, HYPRE_BigInt iupper, + HYPRE_BigInt jlower, + HYPRE_BigInt jupper, HYPRE_Int nrows, HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, - HYPRE_BigInt - *rows, /* option = 1: length of nrows, = 2: length of num_nonzeros */ + HYPRE_BigInt *rows, HYPRE_BigInt *cols, HYPRE_Real *coefs, HYPRE_IJMatrix *ij_A_ptr) @@ -915,7 +908,7 @@ test_SetSet(MPI_Comm comm, HYPRE_Int *h_rowptr; HYPRE_Real *new_coefs; - HYPRE_IJMatrixCreate(comm, ilower, iupper, ilower, iupper, &ij_A); + HYPRE_IJMatrixCreate(comm, ilower, iupper, jlower, jupper, &ij_A); HYPRE_IJMatrixSetObjectType(ij_A, HYPRE_PARCSR); HYPRE_IJMatrixInitialize_v2(ij_A, memory_location); HYPRE_IJMatrixSetOMPFlag(ij_A, 1); @@ -1022,16 +1015,17 @@ test_SetSet(MPI_Comm comm, HYPRE_Int test_AddSet(MPI_Comm comm, HYPRE_MemoryLocation memory_location, - HYPRE_Int option, /* 1 or 2 */ + HYPRE_Int option, HYPRE_BigInt ilower, HYPRE_BigInt iupper, + HYPRE_BigInt jlower, + HYPRE_BigInt jupper, HYPRE_Int nrows, HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, HYPRE_Int *h_nnzrow, HYPRE_Int *nnzrow, - HYPRE_BigInt - *rows, /* option = 1: length of nrows, = 2: length of num_nonzeros */ + HYPRE_BigInt *rows, HYPRE_BigInt *cols, HYPRE_Real *coefs, HYPRE_IJMatrix *ij_A_ptr) @@ -1042,7 +1036,7 @@ test_AddSet(MPI_Comm comm, HYPRE_Int *h_rowptr; HYPRE_Real *new_coefs; - HYPRE_IJMatrixCreate(comm, ilower, iupper, ilower, iupper, &ij_A); + HYPRE_IJMatrixCreate(comm, ilower, iupper, jlower, jupper, &ij_A); HYPRE_IJMatrixSetObjectType(ij_A, HYPRE_PARCSR); HYPRE_IJMatrixInitialize_v2(ij_A, memory_location); HYPRE_IJMatrixSetOMPFlag(ij_A, 1); @@ -1143,12 +1137,121 @@ test_AddSet(MPI_Comm comm, return hypre_error_flag; } +HYPRE_Int +test_SetAdd(MPI_Comm comm, + HYPRE_MemoryLocation memory_location, + HYPRE_Int option, + HYPRE_BigInt ilower, + HYPRE_BigInt iupper, + HYPRE_BigInt jlower, + HYPRE_BigInt jupper, + HYPRE_Int nrows, + HYPRE_BigInt num_nonzeros, + HYPRE_Int nchunks, + HYPRE_Int *h_nnzrow, + HYPRE_Int *nnzrow, + HYPRE_BigInt *rows, + HYPRE_BigInt *cols, + HYPRE_Real *coefs, + HYPRE_IJMatrix *ij_A_ptr) +{ + HYPRE_IJMatrix ij_A; + HYPRE_Int i, chunk, chunk_size; + HYPRE_Int time_index; + HYPRE_Int *h_rowptr; + + HYPRE_IJMatrixCreate(comm, ilower, iupper, jlower, jupper, &ij_A); + HYPRE_IJMatrixSetObjectType(ij_A, HYPRE_PARCSR); + HYPRE_IJMatrixInitialize_v2(ij_A, memory_location); + HYPRE_IJMatrixSetOMPFlag(ij_A, 1); + + h_rowptr = hypre_CTAlloc(HYPRE_Int, nrows + 1, HYPRE_MEMORY_HOST); + for (i = 1; i < nrows + 1; i++) + { + h_rowptr[i] = h_rowptr[i - 1] + h_nnzrow[i - 1]; + } + hypre_assert(h_rowptr[nrows] == num_nonzeros); + + chunk_size = nrows / nchunks; + +#if defined(HYPRE_USING_GPU) + hypre_SyncCudaDevice(hypre_handle()); +#if defined(CUDA_PROFILER) + cudaProfilerStart(); +#endif +#endif + + // First Set + time_index = hypre_InitializeTiming("Test Set/Add"); + hypre_BeginTiming(time_index); + for (chunk = 0; chunk < nrows; chunk += chunk_size) + { + chunk_size = hypre_min(chunk_size, nrows - chunk); + + if (1 == option) + { + HYPRE_IJMatrixSetValues(ij_A, chunk_size, &nnzrow[chunk], &rows[chunk], + &cols[h_rowptr[chunk]], &coefs[h_rowptr[chunk]]); + } + else + { + HYPRE_IJMatrixSetValues(ij_A, h_rowptr[chunk + chunk_size] - h_rowptr[chunk], + NULL, &rows[h_rowptr[chunk]], + &cols[h_rowptr[chunk]], &coefs[h_rowptr[chunk]]); + } + } + + // Second Add + for (chunk = 0; chunk < nrows; chunk += chunk_size) + { + chunk_size = hypre_min(chunk_size, nrows - chunk); + + if (1 == option) + { + HYPRE_IJMatrixAddToValues(ij_A, chunk_size, &nnzrow[chunk], &rows[chunk], + &cols[h_rowptr[chunk]], &coefs[h_rowptr[chunk]]); + } + else + { + HYPRE_IJMatrixAddToValues(ij_A, h_rowptr[chunk + chunk_size] - h_rowptr[chunk], + NULL, &rows[h_rowptr[chunk]], + &cols[h_rowptr[chunk]], &coefs[h_rowptr[chunk]]); + } + } + + // Assemble matrix + HYPRE_IJMatrixAssemble(ij_A); + +#if defined(HYPRE_USING_GPU) + hypre_SyncCudaDevice(hypre_handle()); +#if defined(CUDA_PROFILER) + cudaProfilerStop(); +#endif +#endif + + // Finalize timer + hypre_EndTiming(time_index); + hypre_PrintTiming("Test Set/Add", hypre_MPI_COMM_WORLD); + hypre_FinalizeTiming(time_index); + hypre_ClearTiming(); + + // Free memory + hypre_TFree(h_rowptr, HYPRE_MEMORY_HOST); + + // Set pointer to matrix + *ij_A_ptr = ij_A; + + return hypre_error_flag; +} + HYPRE_Int test_SetAddSet(MPI_Comm comm, HYPRE_MemoryLocation memory_location, HYPRE_Int option, /* 1 or 2 */ HYPRE_BigInt ilower, HYPRE_BigInt iupper, + HYPRE_BigInt jlower, + HYPRE_BigInt jupper, HYPRE_Int nrows, HYPRE_BigInt num_nonzeros, HYPRE_Int nchunks, @@ -1165,7 +1268,7 @@ test_SetAddSet(MPI_Comm comm, HYPRE_Int time_index; HYPRE_Int *h_rowptr; - HYPRE_IJMatrixCreate(comm, ilower, iupper, ilower, iupper, &ij_A); + HYPRE_IJMatrixCreate(comm, ilower, iupper, jlower, jupper, &ij_A); HYPRE_IJMatrixSetObjectType(ij_A, HYPRE_PARCSR); HYPRE_IJMatrixInitialize_v2(ij_A, memory_location); HYPRE_IJMatrixSetOMPFlag(ij_A, 1);