diff --git a/clients/benchmarks/client.cpp b/clients/benchmarks/client.cpp index 16a244199..75882fc2a 100644 --- a/clients/benchmarks/client.cpp +++ b/clients/benchmarks/client.cpp @@ -176,6 +176,12 @@ try " This will additionally print the relative error of the computations.\n" " ") + ("hash", + value(&argus.hash_check)->default_value(0), + "Print hash of GPU results? 0 = No, 1 = Yes.\n" + " Meant for checking reproducibility of computations.\n" + " ") + // size options ("k", value(), diff --git a/clients/common/containers/host_strided_batch_vector.hpp b/clients/common/containers/host_strided_batch_vector.hpp index 1698ebef7..cb0cc9137 100644 --- a/clients/common/containers/host_strided_batch_vector.hpp +++ b/clients/common/containers/host_strided_batch_vector.hpp @@ -264,6 +264,15 @@ class host_strided_batch_vector return ((bool)*this) ? hipSuccess : hipErrorOutOfMemory; } + //! + //! @brief Get size of vector + //! @return number of elements + //! + size_t size() const + { + return this->m_nmemb; + } + private: storage m_storage{storage::block}; int64_t m_n{}; diff --git a/clients/common/lapack/testing_getf2_getrf.hpp b/clients/common/lapack/testing_getf2_getrf.hpp index 29096e761..1ad5c548a 100644 --- a/clients/common/lapack/testing_getf2_getrf.hpp +++ b/clients/common/lapack/testing_getf2_getrf.hpp @@ -226,12 +226,18 @@ void getf2_getrf_getError(const rocblas_handle handle, Uh& hInfo, Ih& hInfoRes, double* max_err, - const bool singular) + const bool singular, + size_t& hashA, + size_t& hashARes, + size_t& hashIpivRes) { // input data initialization getf2_getrf_initData(handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hIpiv, singular); + // compute input hashes + hashA = deterministic_hash(hA, bc); + // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(rocsolver_getf2_getrf(STRIDED, GETRF, handle, m, n, dA.data(), lda, stA, @@ -247,6 +253,10 @@ void getf2_getrf_getError(const rocblas_handle handle, : cpu_getf2(m, n, hA[b], lda, hIpiv[b], hInfo[b]); } + // compute output hashes + hashARes = deterministic_hash(hARes, bc); + hashIpivRes = deterministic_hash(hIpivRes); + // expecting original matrix to be non-singular // error is ||hA - hARes|| / ||hA|| (ideally ||LU - Lres Ures|| / ||LU||) // (THIS DOES NOT ACCOUNT FOR NUMERICAL REPRODUCIBILITY ISSUES. @@ -373,8 +383,8 @@ void testing_getf2_getrf(Arguments& argus) I bc = argus.batch_count; int hot_calls = argus.iters; - rocblas_stride stARes = (argus.unit_check || argus.norm_check) ? stA : 0; - rocblas_stride stPRes = (argus.unit_check || argus.norm_check) ? stP : 0; + rocblas_stride stARes = (argus.unit_check || argus.norm_check || argus.hash_check) ? stA : 0; + rocblas_stride stPRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? stP : 0; // check non-supported values // N/A @@ -383,9 +393,10 @@ void testing_getf2_getrf(Arguments& argus) size_t size_A = size_t(lda) * n; size_t size_P = size_t(min(m, n)); double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; + size_t hashA = 0, hashARes = 0, hashIpivRes = 0; - size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; - size_t size_PRes = (argus.unit_check || argus.norm_check) ? size_P : 0; + size_t size_ARes = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_A : 0; + size_t size_PRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_P : 0; // check invalid sizes bool invalid_size = (m < 0 || n < 0 || lda < m || bc < 0); @@ -460,10 +471,10 @@ void testing_getf2_getrf(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) - getf2_getrf_getError(handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, - bc, hA, hARes, hIpiv, hIpivRes, hInfo, hInfoRes, - &max_error, argus.singular); + if(argus.unit_check || argus.norm_check || argus.hash_check) + getf2_getrf_getError( + handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hARes, hIpiv, hIpivRes, + hInfo, hInfoRes, &max_error, argus.singular, hashA, hashARes, hashIpivRes); // collect performance data if(argus.timing) @@ -504,10 +515,10 @@ void testing_getf2_getrf(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) - getf2_getrf_getError(handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, - bc, hA, hARes, hIpiv, hIpivRes, hInfo, hInfoRes, - &max_error, argus.singular); + if(argus.unit_check || argus.norm_check || argus.hash_check) + getf2_getrf_getError( + handle, m, n, dA, lda, stA, dIpiv, stP, dInfo, bc, hA, hARes, hIpiv, hIpivRes, + hInfo, hInfoRes, &max_error, argus.singular, hashA, hashARes, hashIpivRes); // collect performance data if(argus.timing) @@ -555,6 +566,13 @@ void testing_getf2_getrf(Arguments& argus) rocsolver_bench_output(cpu_time_used, gpu_time_used); } rocsolver_bench_endl(); + if(argus.hash_check) + { + rocsolver_bench_output("hash(A)", "hash(ARes)", "hash(ipivRes)"); + rocsolver_bench_output(ROCSOLVER_FORMAT_HASH(hashA), ROCSOLVER_FORMAT_HASH(hashARes), + ROCSOLVER_FORMAT_HASH(hashIpivRes)); + rocsolver_bench_endl(); + } } else { diff --git a/clients/common/lapack/testing_potf2_potrf.hpp b/clients/common/lapack/testing_potf2_potrf.hpp index 3212638d1..411887de9 100644 --- a/clients/common/lapack/testing_potf2_potrf.hpp +++ b/clients/common/lapack/testing_potf2_potrf.hpp @@ -185,12 +185,17 @@ void potf2_potrf_getError(const rocblas_handle handle, Uh& hInfo, Ih& hInfoRes, double* max_err, - const bool singular) + const bool singular, + size_t& hashA, + size_t& hashARes) { // input data initialization potf2_potrf_initData(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hInfo, singular); + // hash input + hashA = deterministic_hash(hA, bc); + // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(rocsolver_potf2_potrf(STRIDED, POTRF, handle, uplo, n, dA.data(), lda, stA, @@ -198,6 +203,9 @@ void potf2_potrf_getError(const rocblas_handle handle, CHECK_HIP_ERROR(hARes.transfer_from(dA)); CHECK_HIP_ERROR(hInfoRes.transfer_from(dInfo)); + // hash output + hashARes = deterministic_hash(hARes, bc); + // CPU lapack for(I b = 0; b < bc; ++b) { @@ -321,7 +329,7 @@ void testing_potf2_potrf(Arguments& argus) I bc = argus.batch_count; rocblas_int hot_calls = argus.iters; - rocblas_stride stARes = (argus.unit_check || argus.norm_check) ? stA : 0; + rocblas_stride stARes = (argus.unit_check || argus.norm_check || argus.hash_check) ? stA : 0; // check non-supported values if(uplo != rocblas_fill_upper && uplo != rocblas_fill_lower) @@ -344,8 +352,9 @@ void testing_potf2_potrf(Arguments& argus) // determine sizes size_t size_A = size_t(lda) * n; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; + size_t hashA = 0, hashARes = 0; - size_t size_ARes = (argus.unit_check || argus.norm_check) ? size_A : 0; + size_t size_ARes = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_A : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || bc < 0); @@ -414,10 +423,10 @@ void testing_potf2_potrf(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) + if(argus.unit_check || argus.norm_check || argus.hash_check) potf2_potrf_getError(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hARes, hInfo, hInfoRes, &max_error, - argus.singular); + argus.singular, hashA, hashARes); // collect performance data if(argus.timing) @@ -452,10 +461,10 @@ void testing_potf2_potrf(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) + if(argus.unit_check || argus.norm_check || argus.hash_check) potf2_potrf_getError(handle, uplo, n, dA, lda, stA, dInfo, bc, hA, hARes, hInfo, hInfoRes, &max_error, - argus.singular); + argus.singular, hashA, hashARes); // collect performance data if(argus.timing) @@ -502,6 +511,12 @@ void testing_potf2_potrf(Arguments& argus) rocsolver_bench_output(cpu_time_used, gpu_time_used); } rocsolver_bench_endl(); + if(argus.hash_check) + { + rocsolver_bench_output("hash(A)", "hash(ARes)"); + rocsolver_bench_output(ROCSOLVER_FORMAT_HASH(hashA), ROCSOLVER_FORMAT_HASH(hashARes)); + rocsolver_bench_endl(); + } } else { diff --git a/clients/common/lapack/testing_syevx_heevx.hpp b/clients/common/lapack/testing_syevx_heevx.hpp index 621214d87..9480b8f8d 100644 --- a/clients/common/lapack/testing_syevx_heevx.hpp +++ b/clients/common/lapack/testing_syevx_heevx.hpp @@ -313,7 +313,10 @@ void syevx_heevx_getError(const rocblas_handle handle, Ih& hIfailRes, Ih& hinfo, Ih& hinfoRes, - double* max_err) + double* max_err, + size_t& hashA, + size_t& hashW, + size_t& hashZ) { using HMat = HostMatrix; using BDesc = typename HMat::BlockDescriptor; @@ -331,6 +334,9 @@ void syevx_heevx_getError(const rocblas_handle handle, // input data initialization syevx_heevx_initData(handle, evect, n, dA, lda, bc, hA, A); + // hash inputs + hashA = deterministic_hash(hA, bc); + // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(rocsolver_syevx_heevx( @@ -346,6 +352,11 @@ void syevx_heevx_getError(const rocblas_handle handle, CHECK_HIP_ERROR(hIfailRes.transfer_from(dIfail)); } + // hash outputs + hashW = deterministic_hash(hWRes, bc); + if(evect == rocblas_evect_original) + hashZ = deterministic_hash(hZRes, bc); + // CPU lapack // abstol = 0 ensures max accuracy in rocsolver; for lapack we should use 2*safemin S atol = (abstol == 0) ? 2 * get_safemin() : abstol; @@ -619,11 +630,13 @@ void testing_syevx_heevx(Arguments& argus) size_t size_W = n; size_t size_Z = size_t(ldz) * n; size_t size_ifail = n; - size_t size_WRes = (argus.unit_check || argus.norm_check) ? size_W : 0; - size_t size_ZRes = (argus.unit_check || argus.norm_check) ? size_Z : 0; - size_t size_ifailRes = (argus.unit_check || argus.norm_check) ? size_ifail : 0; + size_t size_WRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_W : 0; + size_t size_ZRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_Z : 0; + size_t size_ifailRes + = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_ifail : 0; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; + size_t hashA = 0, hashW = 0, hashZ = 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || (evect != rocblas_evect_none && ldz < n) || bc < 0 @@ -729,12 +742,12 @@ void testing_syevx_heevx(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) + if(argus.unit_check || argus.norm_check || argus.hash_check) { - syevx_heevx_getError(handle, evect, erange, uplo, n, dA, lda, stA, vl, vu, - il, iu, abstol, dNev, dW, stW, dZ, ldz, stZ, dIfail, - stF, dinfo, bc, hA, hNev, hNevRes, hW, hWres, hZ, - hZRes, hIfail, hIfailRes, hinfo, hinfoRes, &max_error); + syevx_heevx_getError( + handle, evect, erange, uplo, n, dA, lda, stA, vl, vu, il, iu, abstol, dNev, dW, stW, + dZ, ldz, stZ, dIfail, stF, dinfo, bc, hA, hNev, hNevRes, hW, hWres, hZ, hZRes, + hIfail, hIfailRes, hinfo, hinfoRes, &max_error, hashA, hashW, hashZ); } // collect performance data @@ -776,12 +789,12 @@ void testing_syevx_heevx(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) + if(argus.unit_check || argus.norm_check || argus.hash_check) { - syevx_heevx_getError(handle, evect, erange, uplo, n, dA, lda, stA, vl, vu, - il, iu, abstol, dNev, dW, stW, dZ, ldz, stZ, dIfail, - stF, dinfo, bc, hA, hNev, hNevRes, hW, hWres, hZ, - hZRes, hIfail, hIfailRes, hinfo, hinfoRes, &max_error); + syevx_heevx_getError( + handle, evect, erange, uplo, n, dA, lda, stA, vl, vu, il, iu, abstol, dNev, dW, stW, + dZ, ldz, stZ, dIfail, stF, dinfo, bc, hA, hNev, hNevRes, hW, hWres, hZ, hZRes, + hIfail, hIfailRes, hinfo, hinfoRes, &max_error, hashA, hashW, hashZ); } // collect performance data @@ -839,6 +852,13 @@ void testing_syevx_heevx(Arguments& argus) rocsolver_bench_output(cpu_time_used, gpu_time_used); } rocsolver_bench_endl(); + if(argus.hash_check) + { + rocsolver_bench_output("hash(A)", "hash(W)", "hash(Z)"); + rocsolver_bench_output(ROCSOLVER_FORMAT_HASH(hashA), ROCSOLVER_FORMAT_HASH(hashW), + ROCSOLVER_FORMAT_HASH(hashZ)); + rocsolver_bench_endl(); + } } else { diff --git a/clients/common/lapack/testing_sygvdx_hegvdx.hpp b/clients/common/lapack/testing_sygvdx_hegvdx.hpp index 40075077e..690e2f9c8 100644 --- a/clients/common/lapack/testing_sygvdx_hegvdx.hpp +++ b/clients/common/lapack/testing_sygvdx_hegvdx.hpp @@ -376,7 +376,11 @@ void sygvdx_hegvdx_getError(const rocblas_handle handle, Vh& hInfo, Vh& hInfoRes, double* max_err, - const bool singular) + const bool singular, + size_t& hashA, + size_t& hashB, + size_t& hashW, + size_t& hashZ) { constexpr bool COMPLEX = rocblas_is_complex; @@ -395,6 +399,10 @@ void sygvdx_hegvdx_getError(const rocblas_handle handle, sygvdx_hegvdx_initData(handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, true, singular); + // hash inputs + hashA = deterministic_hash(hA, bc); + hashB = deterministic_hash(hB, bc); + // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(rocsolver_sygvdx_hegvdx( @@ -407,6 +415,11 @@ void sygvdx_hegvdx_getError(const rocblas_handle handle, if(evect != rocblas_evect_none) CHECK_HIP_ERROR(hZRes.transfer_from(dZ)); + // hash outputs + hashW = deterministic_hash(hWRes, bc); + if(evect != rocblas_evect_none) + hashZ = deterministic_hash(hZRes, bc); + // CPU lapack // abstol = 0 ensures max accuracy in rocsolver; for lapack we should use 2*safemin S atol = 2 * get_safemin(); @@ -631,8 +644,8 @@ void testing_sygvdx_hegvdx(Arguments& argus) rocblas_int bc = argus.batch_count; rocblas_int hot_calls = argus.iters; - rocblas_stride stWRes = (argus.unit_check || argus.norm_check) ? stW : 0; - rocblas_stride stZRes = (argus.unit_check || argus.norm_check) ? stZ : 0; + rocblas_stride stWRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? stW : 0; + rocblas_stride stZRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? stZ : 0; // check non-supported values if(uplo == rocblas_fill_full || evect == rocblas_evect_tridiagonal) @@ -664,9 +677,10 @@ void testing_sygvdx_hegvdx(Arguments& argus) size_t size_W = size_t(n); size_t size_Z = size_t(ldz) * n; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; + size_t hashA = 0, hashB = 0, hashW = 0, hashZ = 0; - size_t size_WRes = (argus.unit_check || argus.norm_check) ? size_W : 0; - size_t size_ZRes = (argus.unit_check || argus.norm_check) ? size_Z : 0; + size_t size_WRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_W : 0; + size_t size_ZRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_Z : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || ldb < n || (evect != rocblas_evect_none && ldz < n) @@ -771,11 +785,11 @@ void testing_sygvdx_hegvdx(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) - sygvdx_hegvdx_getError(handle, itype, evect, erange, uplo, n, dA, lda, stA, - dB, ldb, stB, vl, vu, il, iu, dNev, dW, stW, dZ, ldz, - stZ, dInfo, bc, hA, hB, hNev, hNevRes, hW, hWRes, hZ, - hZRes, hInfo, hInfoRes, &max_error, argus.singular); + if(argus.unit_check || argus.norm_check || argus.hash_check) + sygvdx_hegvdx_getError( + handle, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, + dNev, dW, stW, dZ, ldz, stZ, dInfo, bc, hA, hB, hNev, hNevRes, hW, hWRes, hZ, hZRes, + hInfo, hInfoRes, &max_error, argus.singular, hashA, hashB, hashW, hashZ); // collect performance data if(argus.timing) @@ -818,11 +832,11 @@ void testing_sygvdx_hegvdx(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) - sygvdx_hegvdx_getError(handle, itype, evect, erange, uplo, n, dA, lda, stA, - dB, ldb, stB, vl, vu, il, iu, dNev, dW, stW, dZ, ldz, - stZ, dInfo, bc, hA, hB, hNev, hNevRes, hW, hWRes, hZ, - hZRes, hInfo, hInfoRes, &max_error, argus.singular); + if(argus.unit_check || argus.norm_check || argus.hash_check) + sygvdx_hegvdx_getError( + handle, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, + dNev, dW, stW, dZ, ldz, stZ, dInfo, bc, hA, hB, hNev, hNevRes, hW, hWRes, hZ, hZRes, + hInfo, hInfoRes, &max_error, argus.singular, hashA, hashB, hashW, hashZ); // collect performance data if(argus.timing) @@ -878,6 +892,13 @@ void testing_sygvdx_hegvdx(Arguments& argus) rocsolver_bench_output(cpu_time_used, gpu_time_used); } rocsolver_bench_endl(); + if(argus.hash_check) + { + rocsolver_bench_output("hash(A)", "hash(B)", "hash(W)", "hash(Z)"); + rocsolver_bench_output(ROCSOLVER_FORMAT_HASH(hashA), ROCSOLVER_FORMAT_HASH(hashB), + ROCSOLVER_FORMAT_HASH(hashW), ROCSOLVER_FORMAT_HASH(hashZ)); + rocsolver_bench_endl(); + } } else { diff --git a/clients/common/lapack/testing_sygvx_hegvx.hpp b/clients/common/lapack/testing_sygvx_hegvx.hpp index 031fe1f12..877626f6d 100644 --- a/clients/common/lapack/testing_sygvx_hegvx.hpp +++ b/clients/common/lapack/testing_sygvx_hegvx.hpp @@ -406,7 +406,11 @@ void sygvx_hegvx_getError(const rocblas_handle handle, Vh& hInfo, Vh& hInfoRes, double* max_err, - const bool singular) + const bool singular, + size_t& hashA, + size_t& hashB, + size_t& hashW, + size_t& hashZ) { constexpr bool COMPLEX = rocblas_is_complex; @@ -424,6 +428,10 @@ void sygvx_hegvx_getError(const rocblas_handle handle, sygvx_hegvx_initData(handle, itype, evect, n, dA, lda, stA, dB, ldb, stB, bc, hA, hB, A, B, true, singular); + // hash inputs + hashA = deterministic_hash(hA, bc); + hashB = deterministic_hash(hB, bc); + // execute computations // GPU lapack CHECK_ROCBLAS_ERROR(rocsolver_sygvx_hegvx(STRIDED, handle, itype, evect, erange, uplo, n, @@ -440,6 +448,11 @@ void sygvx_hegvx_getError(const rocblas_handle handle, CHECK_HIP_ERROR(hIfailRes.transfer_from(dIfail)); } + // hash outputs + hashW = deterministic_hash(hWRes, bc); + if(evect != rocblas_evect_none) + hashZ = deterministic_hash(hZRes, bc); + // CPU lapack // abstol = 0 ensures max accuracy in rocsolver; for lapack we should use 2*safemin S atol = (abstol == 0) ? 2 * get_safemin() : abstol; @@ -715,9 +728,9 @@ void testing_sygvx_hegvx(Arguments& argus) rocblas_int bc = argus.batch_count; rocblas_int hot_calls = argus.iters; - rocblas_stride stWRes = (argus.unit_check || argus.norm_check) ? stW : 0; - rocblas_stride stZRes = (argus.unit_check || argus.norm_check) ? stZ : 0; - rocblas_stride stFRes = (argus.unit_check || argus.norm_check) ? stF : 0; + rocblas_stride stWRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? stW : 0; + rocblas_stride stZRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? stZ : 0; + rocblas_stride stFRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? stF : 0; // check non-supported values if(uplo == rocblas_fill_full || evect == rocblas_evect_tridiagonal) @@ -751,10 +764,12 @@ void testing_sygvx_hegvx(Arguments& argus) size_t size_Z = size_t(ldz) * n; size_t size_ifail = size_W; double max_error = 0, gpu_time_used = 0, cpu_time_used = 0; + size_t hashA = 0, hashB = 0, hashW = 0, hashZ = 0; - size_t size_WRes = (argus.unit_check || argus.norm_check) ? size_W : 0; - size_t size_ZRes = (argus.unit_check || argus.norm_check) ? size_Z : 0; - size_t size_ifailRes = (argus.unit_check || argus.norm_check) ? size_ifail : 0; + size_t size_WRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_W : 0; + size_t size_ZRes = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_Z : 0; + size_t size_ifailRes + = (argus.unit_check || argus.norm_check || argus.hash_check) ? size_ifail : 0; // check invalid sizes bool invalid_size = (n < 0 || lda < n || ldb < n || (evect != rocblas_evect_none && ldz < n) @@ -867,12 +882,12 @@ void testing_sygvx_hegvx(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) - sygvx_hegvx_getError(handle, itype, evect, erange, uplo, n, dA, lda, stA, - dB, ldb, stB, vl, vu, il, iu, abstol, dNev, dW, stW, - dZ, ldz, stZ, dIfail, stF, dInfo, bc, hA, hB, hNev, - hNevRes, hW, hWRes, hZ, hZRes, hIfail, hIfailRes, - hInfo, hInfoRes, &max_error, argus.singular); + if(argus.unit_check || argus.norm_check || argus.hash_check) + sygvx_hegvx_getError( + handle, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, + abstol, dNev, dW, stW, dZ, ldz, stZ, dIfail, stF, dInfo, bc, hA, hB, hNev, hNevRes, + hW, hWRes, hZ, hZRes, hIfail, hIfailRes, hInfo, hInfoRes, &max_error, + argus.singular, hashA, hashB, hashW, hashZ); // collect performance data if(argus.timing) @@ -916,12 +931,12 @@ void testing_sygvx_hegvx(Arguments& argus) } // check computations - if(argus.unit_check || argus.norm_check) - sygvx_hegvx_getError(handle, itype, evect, erange, uplo, n, dA, lda, stA, - dB, ldb, stB, vl, vu, il, iu, abstol, dNev, dW, stW, - dZ, ldz, stZ, dIfail, stF, dInfo, bc, hA, hB, hNev, - hNevRes, hW, hWRes, hZ, hZRes, hIfail, hIfailRes, - hInfo, hInfoRes, &max_error, argus.singular); + if(argus.unit_check || argus.norm_check || argus.hash_check) + sygvx_hegvx_getError( + handle, itype, evect, erange, uplo, n, dA, lda, stA, dB, ldb, stB, vl, vu, il, iu, + abstol, dNev, dW, stW, dZ, ldz, stZ, dIfail, stF, dInfo, bc, hA, hB, hNev, hNevRes, + hW, hWRes, hZ, hZRes, hIfail, hIfailRes, hInfo, hInfoRes, &max_error, + argus.singular, hashA, hashB, hashW, hashZ); // collect performance data if(argus.timing) @@ -978,6 +993,13 @@ void testing_sygvx_hegvx(Arguments& argus) rocsolver_bench_output(cpu_time_used, gpu_time_used); } rocsolver_bench_endl(); + if(argus.hash_check) + { + rocsolver_bench_output("hash(A)", "hash(B)", "hash(W)", "hash(Z)"); + rocsolver_bench_output(ROCSOLVER_FORMAT_HASH(hashA), ROCSOLVER_FORMAT_HASH(hashB), + ROCSOLVER_FORMAT_HASH(hashW), ROCSOLVER_FORMAT_HASH(hashZ)); + rocsolver_bench_endl(); + } } else { diff --git a/clients/common/misc/rocsolver_arguments.hpp b/clients/common/misc/rocsolver_arguments.hpp index a8ffd9e73..b895efdd5 100644 --- a/clients/common/misc/rocsolver_arguments.hpp +++ b/clients/common/misc/rocsolver_arguments.hpp @@ -50,6 +50,7 @@ class Arguments : private std::map // test options rocblas_int norm_check = 0; rocblas_int unit_check = 1; + rocblas_int hash_check = 0; rocblas_int timing = 0; rocblas_int perf = 0; rocblas_int singular = 0; @@ -120,6 +121,7 @@ class Arguments : private std::map to_consume.erase("perf"); to_consume.erase("singular"); to_consume.erase("device"); + to_consume.erase("hash"); } void clear() diff --git a/clients/common/misc/rocsolver_test.hpp b/clients/common/misc/rocsolver_test.hpp index e97ff794d..75713926a 100644 --- a/clients/common/misc/rocsolver_test.hpp +++ b/clients/common/misc/rocsolver_test.hpp @@ -1,5 +1,5 @@ /* ************************************************************************** - * Copyright (C) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -56,6 +56,8 @@ namespace fs = std::experimental::filesystem; #define ROCSOLVER_TEST_CHECK(T, max_error, tol) #endif +#define ROCSOLVER_FORMAT_HASH(h) fmt::format("0x{:x}", h) + typedef enum rocsolver_inform_type_ { inform_quick_return, @@ -158,3 +160,70 @@ inline std::ostream& operator<<(std::ostream& os, printable_char x) // location of the sparse data directory for the re-factorization tests fs::path get_sparse_data_dir(); + +/// Combines `seed` with the hash of `value`, following the spirit of +/// `boost::hash_combine`. +/// +/// Extends `std::hash` to combine the hashes of multiple values (e.g., +/// from an array). +/// +/// Attention: hash_combine(0, T(0)) != 0 +template +std::size_t hash_combine(std::size_t seed, T value) +{ + using S = decltype(std::real(T{})); + auto hasher = std::hash(); + + if constexpr(rocblas_is_complex) + { + seed ^= hasher(std::real(value)) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= hasher(std::imag(value)) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + else + { + seed ^= hasher(value) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + + return seed; +} + +/// Hash contents of the given array. +/// +/// If seed == 0 and array_size == 0, then hash_combine(seed, b++; b < bc_, array_size) == 0 +template +std::size_t hash_combine(std::size_t seed, T const* array, std::size_t array_size) +{ + if(array == nullptr) + return (std::size_t)0; + std::size_t hash = 0; + if(array_size > 0) + { + hash = hash_combine(seed, array_size); + for(std::size_t i = 0; i < array_size; ++i) + { + hash = hash_combine(hash, array[i]); + } + } + + return hash; +} + +#define ROCSOLVER_DETERMINISTIC_HASH_SEED ((std::size_t)1) + +/// Wrapper for hash_combine +template +std::size_t deterministic_hash(const T& vector) +{ + return hash_combine(ROCSOLVER_DETERMINISTIC_HASH_SEED, vector.data(), vector.size()); +} + +template +std::size_t deterministic_hash(const T& vector, std::size_t bc) +{ + std::size_t hash = ROCSOLVER_DETERMINISTIC_HASH_SEED; + for(std::size_t b = 0; b < bc; b++) + { + hash = hash_combine(hash, vector[b], vector.n() * std::abs(vector.inc())); + } + return hash; +}