Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tpetra: CrsMatrix getLocalDiagCopy hang, see issue #13498 #13575

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 26 additions & 32 deletions packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3271,22 +3271,22 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
"a view with global column indices by calling getGlobalRowCopy().");

const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
rowInfo.numEntries > 0) {
indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
rowInfo.offset1D,
rowInfo.numEntries,
Access::ReadOnly);
values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
rowInfo.numEntries,
Access::ReadOnly);
}
else {
// This does the right thing (reports an empty row) if the input
// row is invalid.
indices = local_inds_host_view_type();
values = values_host_view_type();
}
// if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
// rowInfo.numEntries > 0) {
// indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
// rowInfo.offset1D,
// rowInfo.numEntries,
// Access::ReadOnly);
// values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
// rowInfo.numEntries,
// Access::ReadOnly);
// }
// else {
// // This does the right thing (reports an empty row) if the input
// // row is invalid.
// indices = local_inds_host_view_type();
// values = values_host_view_type();
// }

lucbv marked this conversation as resolved.
Show resolved Hide resolved
#ifdef HAVE_TPETRA_DEBUG
const char suffix[] = ". This should never happen. Please report this "
Expand Down Expand Up @@ -3602,23 +3602,17 @@ CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
"diag.getMap ()->isCompatible (A.getRowMap ());");
#endif // HAVE_TPETRA_DEBUG

if (this->isFillComplete ()) {
const auto D_lcl = diag.getLocalViewDevice(Access::OverwriteAll);
// 1-D subview of the first (and only) column of D_lcl.
const auto D_lcl_1d =
Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
const auto D_lcl = diag.getLocalViewDevice(Access::OverwriteAll);
// 1-D subview of the first (and only) column of D_lcl.
const auto D_lcl_1d =
Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);

const auto lclRowMap = rowMap.getLocalMap ();
const auto lclColMap = colMap.getLocalMap ();
using ::Tpetra::Details::getDiagCopyWithoutOffsets;
(void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
lclColMap,
getLocalMatrixDevice ());
}
else {
using ::Tpetra::Details::getLocalDiagCopyWithoutOffsetsNotFillComplete;
(void) getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *this);
}
const auto lclRowMap = rowMap.getLocalMap ();
const auto lclColMap = colMap.getLocalMap ();
using ::Tpetra::Details::getDiagCopyWithoutOffsets;
(void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
lclColMap,
getLocalMatrixDevice ());
}

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,23 @@ namespace Details {
template<class SC, class LO, class GO, class NT>
class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
public:
typedef ::Tpetra::RowMatrix<SC, LO, GO, NT> row_matrix_type;
typedef ::Tpetra::Vector<SC, LO, GO, NT> vec_type;
using row_matrix_type = ::Tpetra::RowMatrix<SC, LO, GO, NT>;
using vec_type = ::Tpetra::Vector<SC, LO, GO, NT>;

typedef typename vec_type::impl_scalar_type IST;
using IST = typename vec_type::impl_scalar_type;
// The output Vector determines the execution space.

using host_execution_space = typename vec_type::dual_view_type::t_host::execution_space;
private:
typedef typename vec_type::dual_view_type::t_host::execution_space host_execution_space;
typedef typename vec_type::map_type map_type;
using map_type = typename vec_type::map_type;

static bool
graphIsSorted (const row_matrix_type& A)
{
using Teuchos::RCP;
using Teuchos::rcp_dynamic_cast;
typedef Tpetra::CrsGraph<LO, GO, NT> crs_graph_type;
typedef Tpetra::RowGraph<LO, GO, NT> row_graph_type;
using crs_graph_type = Tpetra::CrsGraph<LO, GO, NT>;
using row_graph_type = Tpetra::RowGraph<LO, GO, NT>;

// We conservatively assume not sorted. RowGraph lacks an
// "isSorted" predicate, so we can't know for sure unless the cast
Expand Down Expand Up @@ -108,6 +108,7 @@ class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {

void operator () (const LO& lclRowInd, LO& errCount) const {
using KokkosSparse::findRelOffset;
Kokkos::printf("Not hanging yet [0]\n");
lucbv marked this conversation as resolved.
Show resolved Hide resolved

lucbv marked this conversation as resolved.
Show resolved Hide resolved
D_lcl_1d_(lclRowInd) = Kokkos::ArithTraits<IST>::zero ();
const GO gblInd = lclRowMap_.getGlobalElement (lclRowInd);
Expand All @@ -119,19 +120,23 @@ class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
else { // row index is also in the column Map on this process
typename row_matrix_type::local_inds_host_view_type lclColInds;
typename row_matrix_type::values_host_view_type curVals;
Kokkos::printf("Not hanging yet [1]\n");
A_.getLocalRowView(lclRowInd, lclColInds, curVals);
Kokkos::printf("Not hanging yet [2]\n");
lucbv marked this conversation as resolved.
Show resolved Hide resolved
LO numEnt = lclColInds.extent(0);
// The search hint is always zero, since we only call this
// once per row of the matrix.
const LO hint = 0;
const LO offset =
findRelOffset (lclColInds, numEnt, lclColInd, hint, sorted_);
Kokkos::printf("Not hanging yet [3]\n");
if (offset == numEnt) { // didn't find the diagonal column index
errCount++;
}
else {
D_lcl_1d_(lclRowInd) = curVals[offset];
}
Kokkos::printf("Not hanging yet [4]\n");
}
}

Expand All @@ -155,8 +160,7 @@ getLocalDiagCopyWithoutOffsetsNotFillComplete ( ::Tpetra::Vector<SC, LO, GO, NT>
using Teuchos::outArg;
using Teuchos::REDUCE_MIN;
using Teuchos::reduceAll;
typedef GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC,
LO, GO, NT> functor_type;
using functor_type = GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC, LO, GO, NT>;

// The functor's constructor does error checking and executes the
// thread-parallel kernel.
Expand Down Expand Up @@ -212,6 +216,8 @@ getLocalDiagCopyWithoutOffsetsNotFillComplete ( ::Tpetra::Vector<SC, LO, GO, NT>
}
}
else { // ! debug
Kokkos::printf("\n\n\nRunning on Kokkos::Serial: %s\n", std::is_same_v<typename functor_type::host_execution_space, Kokkos::Serial> ? "true" : "false");

lucbv marked this conversation as resolved.
Show resolved Hide resolved
functor_type functor (lclNumErrs, diag, A);
}

Expand Down
9 changes: 9 additions & 0 deletions packages/tpetra/core/test/CrsMatrix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,15 @@ if (
)
endif()

TRIBITS_ADD_EXECUTABLE_AND_TEST(
CrsMatrix_getLocalDiagCopy
SOURCES
CrsMatrix_getLocalDiagCopy.cpp
${TEUCHOS_STD_UNIT_TEST_MAIN}
COMM mpi
STANDARD_PASS_OUTPUT
)

SET(TIMING_INSTALLS "")

INSTALL(TARGETS ${TIMING_INSTALLS}
Expand Down
112 changes: 112 additions & 0 deletions packages/tpetra/core/test/CrsMatrix/CrsMatrix_getLocalDiagCopy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// @HEADER
// *****************************************************************************
// Tpetra: Templated Linear Algebra Services Package
//
// Copyright 2008 NTESS and the Tpetra contributors.
// SPDX-License-Identifier: BSD-3-Clause
// *****************************************************************************
// @HEADER

#include <Teuchos_UnitTestHarness.hpp>
#include <TpetraCore_ETIHelperMacros.h>

#include <Tpetra_Core.hpp>
#include <Tpetra_Map.hpp>
#include <Tpetra_Vector.hpp>
#include <Tpetra_CrsMatrix.hpp>

namespace { // (anonymous)

template <typename Scalar, typename LO, typename GO, typename Node, int Tag>
void getLocalDiagCopyTest(Teuchos::FancyOStream& out, bool& success) {
using Teuchos::RCP;

using map_type = Tpetra::Map<LO, GO, Node>;
using vec_type = Tpetra::Vector<Scalar, LO, GO, Node>;
using crs_matrix_type = Tpetra::CrsMatrix<Scalar, LO, GO, Node>;
using row_matrix_type = Tpetra::RowMatrix<Scalar, LO, GO, Node>;

using STS = Teuchos::ScalarTraits<Scalar>;
using MT = typename STS::magnitudeType;
const Scalar SC_ONE = STS::one();

using LOT = Teuchos::OrdinalTraits<LO>;
const LO LO_INVALID = LOT::invalid();
const LO LO_ONE = LOT::one();
const GO GO_ONE = Teuchos::OrdinalTraits<GO>::one();

int lclSuccess = 0;
int gblSuccess = 0;

RCP<const Teuchos::Comm<int> > comm = Tpetra::getDefaultComm ();
const size_t numProc = comm->getSize();
const size_t myProc = comm->getRank();

// create a Map
RCP<const map_type> map = Tpetra::createContigMapWithNode<LO,GO,Node> (LO_INVALID,
LO_ONE + LO_ONE,
comm);

// Create a matrix with at most 3 entries per row
RCP<crs_matrix_type> matrix = Teuchos::rcp (new crs_matrix_type (map, 3));
const Scalar rankAsScalar = static_cast<Scalar>(static_cast<MT>(comm->getRank()));

Teuchos::Array<Scalar> vals = {{SC_ONE, rankAsScalar + SC_ONE, SC_ONE}};
for(size_t lclRowIdx = 0; lclRowIdx < 2; ++lclRowIdx) {
const GO gblRowIdx = Teuchos::as<GO>(2*myProc + lclRowIdx);
Teuchos::Array<GO> cols = {{gblRowIdx - GO_ONE, gblRowIdx, gblRowIdx + GO_ONE}};

if((myProc == 0) && (lclRowIdx == 0)) { // First row of the matrix
matrix->insertGlobalValues(gblRowIdx, cols(1, 2), vals(1, 2));
} else if((myProc == numProc - 1) && (lclRowIdx == 1)) { // Last row of the matrix
matrix->insertGlobalValues(gblRowIdx, cols(0, 2), vals(0, 2));
} else {
matrix->insertGlobalValues(gblRowIdx, cols(), vals());
}
}

matrix->fillComplete();

// Make sure that all processes got this far.
{
lclSuccess = success ? 1 : 0;
gblSuccess = 0;
Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN, lclSuccess, Teuchos::outArg (gblSuccess));
success = success && (gblSuccess == 1);
TEST_EQUALITY_CONST( gblSuccess, 1 );
}

RCP<vec_type> diag = Teuchos::rcp(new vec_type(map));
diag->putScalar(-1);

if constexpr (Tag == 0) {
matrix->resumeFill();
}
matrix->getLocalDiagCopy(*diag);
}

// Unit test of getLocalDiagCopy
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, getLocalDiagCopy, Scalar, LO, GO, Node )
{
getLocalDiagCopyTest<Scalar, LO, GO, Node, 1>(out, success);
}

// Unit test of getLocalDiagCopy
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( CrsMatrix, getLocalDiagCopyFillActive, Scalar, LO, GO, Node )
{
getLocalDiagCopyTest<Scalar, LO, GO, Node, 0>(out, success);
}

//
// INSTANTIATIONS
//

#define UNIT_TEST_GROUP( SCALAR, LO, GO, NODE ) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( CrsMatrix, getLocalDiagCopy, SCALAR, LO, GO, NODE ) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( CrsMatrix, getLocalDiagCopyFillActive, SCALAR, LO, GO, NODE )

TPETRA_ETI_MANGLING_TYPEDEFS()

TPETRA_INSTANTIATE_SLGN( UNIT_TEST_GROUP )

} // namespace (anonymous)
Loading