Skip to content

Commit

Permalink
Tpetra: revert #13491
Browse files Browse the repository at this point in the history
Until we can diagnose segfaults reported wrt PR #13598.

Signed-off-by: Jonathan Hu <[email protected]>
  • Loading branch information
jhux2 committed Nov 20, 2024
1 parent 0dd826f commit 4b7b350
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 757 deletions.
90 changes: 67 additions & 23 deletions packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
#include "KokkosBlas1_scal.hpp"
#include "KokkosSparse_getDiagCopy.hpp"
#include "KokkosSparse_spmv.hpp"
#include "Kokkos_StdAlgorithms.hpp"

#include <memory>
#include <sstream>
Expand Down Expand Up @@ -8302,43 +8301,59 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
<< std::endl;
std::cerr << os.str ();
}
destMat->numExportPacketsPerLID_.sync_device();
auto numExportPacketsPerLID = destMat->numExportPacketsPerLID_.view_device();
auto numImportPacketsPerLID = destMat->numImportPacketsPerLID_.view_device();
// Make sure that host has the latest version, since we're
// using the version on host. If host has the latest
// version, syncing to host does nothing.
destMat->numExportPacketsPerLID_.sync_host ();
Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
destMat->numImportPacketsPerLID_.sync_host ();
Teuchos::ArrayView<size_t> numImportPacketsPerLID =
getArrayViewFromDualView (destMat->numImportPacketsPerLID_);

if (verbose) {
std::ostringstream os;
os << *verbosePrefix << "Calling 3-arg doReversePostsAndWaits"
<< std::endl;
std::cerr << os.str ();
}
Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1, numImportPacketsPerLID);
Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
destMat->numImportPacketsPerLID_.view_host());
if (verbose) {
std::ostringstream os;
os << *verbosePrefix << "Finished 3-arg doReversePostsAndWaits"
<< std::endl;
std::cerr << os.str ();
}

size_t totalImportPackets = Kokkos::Experimental::reduce(typename Node::execution_space(), numImportPacketsPerLID);
size_t totalImportPackets = 0;
for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
totalImportPackets += numImportPacketsPerLID[i];
}

// Reallocation MUST go before setting the modified flag,
// because it may clear out the flags.
destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
verbosePrefix.get ());
destMat->imports_.modify_host ();
auto deviceImports = destMat->imports_.view_device();
auto deviceExports = destMat->exports_.view_device();
auto hostImports = destMat->imports_.view_host();
// This is a legacy host pack/unpack path, so use the host
// version of exports_.
destMat->exports_.sync_host ();
auto hostExports = destMat->exports_.view_host();
if (verbose) {
std::ostringstream os;
os << *verbosePrefix << "Calling 4-arg doReversePostsAndWaitsKokkos"
os << *verbosePrefix << "Calling 4-arg doReversePostsAndWaits"
<< std::endl;
std::cerr << os.str ();
}
destMat->imports_.sync_device();
Distor.doReversePostsAndWaitsKokkos (deviceExports, numExportPacketsPerLID, deviceImports, numImportPacketsPerLID);
Distor.doReversePostsAndWaits (hostExports,
numExportPacketsPerLID,
hostImports,
numImportPacketsPerLID);
if (verbose) {
std::ostringstream os;
os << *verbosePrefix << "Finished 4-arg doReversePostsAndWaitsKokkos"
os << *verbosePrefix << "Finished 4-arg doReversePostsAndWaits"
<< std::endl;
std::cerr << os.str ();
}
Expand Down Expand Up @@ -8381,43 +8396,58 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
<< std::endl;
std::cerr << os.str ();
}
destMat->numExportPacketsPerLID_.sync_device ();
auto numExportPacketsPerLID = destMat->numExportPacketsPerLID_.view_device();
auto numImportPacketsPerLID = destMat->numImportPacketsPerLID_.view_device();
// Make sure that host has the latest version, since we're
// using the version on host. If host has the latest
// version, syncing to host does nothing.
destMat->numExportPacketsPerLID_.sync_host ();
Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
destMat->numImportPacketsPerLID_.sync_host ();
Teuchos::ArrayView<size_t> numImportPacketsPerLID =
getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
if (verbose) {
std::ostringstream os;
os << *verbosePrefix << "Calling 3-arg doPostsAndWaits"
<< std::endl;
std::cerr << os.str ();
}
Distor.doPostsAndWaits(numExportPacketsPerLID, 1, numImportPacketsPerLID);
Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
destMat->numImportPacketsPerLID_.view_host());
if (verbose) {
std::ostringstream os;
os << *verbosePrefix << "Finished 3-arg doPostsAndWaits"
<< std::endl;
std::cerr << os.str ();
}

size_t totalImportPackets = Kokkos::Experimental::reduce(typename Node::execution_space(), numImportPacketsPerLID);
size_t totalImportPackets = 0;
for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
totalImportPackets += numImportPacketsPerLID[i];
}

// Reallocation MUST go before setting the modified flag,
// because it may clear out the flags.
destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
verbosePrefix.get ());
destMat->imports_.modify_host ();
auto deviceImports = destMat->imports_.view_device();
auto deviceExports = destMat->exports_.view_device();
auto hostImports = destMat->imports_.view_host();
// This is a legacy host pack/unpack path, so use the host
// version of exports_.
destMat->exports_.sync_host ();
auto hostExports = destMat->exports_.view_host();
if (verbose) {
std::ostringstream os;
os << *verbosePrefix << "Calling 4-arg doPostsAndWaitsKokkos"
os << *verbosePrefix << "Calling 4-arg doPostsAndWaits"
<< std::endl;
std::cerr << os.str ();
}
destMat->imports_.sync_device ();
Distor.doPostsAndWaitsKokkos (deviceExports, numExportPacketsPerLID, deviceImports, numImportPacketsPerLID);
Distor.doPostsAndWaits (hostExports,
numExportPacketsPerLID,
hostImports,
numImportPacketsPerLID);
if (verbose) {
std::ostringstream os;
os << *verbosePrefix << "Finished 4-arg doPostsAndWaitsKokkos"
os << *verbosePrefix << "Finished 4-arg doPostsAndWaits"
<< std::endl;
std::cerr << os.str ();
}
Expand Down Expand Up @@ -8464,6 +8494,12 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
Teuchos::Array<int> RemotePids;
if (runOnHost) {
Teuchos::Array<int> TargetPids;
// Backwards compatibility measure. We'll use this again below.

// TODO JHU Need to track down why numImportPacketsPerLID_ has not been corrently marked as modified on host (which it has been)
// TODO JHU somewhere above, e.g., call to Distor.doPostsAndWaits().
// TODO JHU This only becomes apparent as we begin to convert TAFC to run on device.
destMat->numImportPacketsPerLID_.modify_host(); //FIXME

# ifdef HAVE_TPETRA_MMM_TIMINGS
RCP<TimeMonitor> tmCopySPRdata = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC unpack-count-resize + copy same-perm-remote data"))));
Expand Down Expand Up @@ -8655,6 +8691,14 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
} else {
// run on device


// Backwards compatibility measure. We'll use this again below.

// TODO JHU Need to track down why numImportPacketsPerLID_ has not been corrently marked as modified on host (which it has been)
// TODO JHU somewhere above, e.g., call to Distor.doPostsAndWaits().
// TODO JHU This only becomes apparent as we begin to convert TAFC to run on device.
destMat->numImportPacketsPerLID_.modify_host(); //FIXME

# ifdef HAVE_TPETRA_MMM_TIMINGS
RCP<TimeMonitor> tmCopySPRdata = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC unpack-count-resize + copy same-perm-remote data"))));
# endif
Expand Down
Loading

0 comments on commit 4b7b350

Please sign in to comment.