Skip to content

Commit

Permalink
MueLu: Fixing Issue trilinos#13377 and trilinos#13378
Browse files Browse the repository at this point in the history
Issues listed above have been addressed.
Threshold has been redefined to 1/threshold.
Unit tests have been modified to be more thorough.

Signed-off-by: Ian Halim <[email protected]>
  • Loading branch information
Ian Halim committed Oct 1, 2024
1 parent 4dff65a commit ea245d1
Show file tree
Hide file tree
Showing 2 changed files with 180 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -591,13 +591,27 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level

//move from host to device
auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadOnly), Kokkos::ALL(), 0);
auto boundaryNodesDevice = Kokkos::create_mirror_view_and_copy(ExecSpace(), boundaryNodes);
auto thresholdKokkos = static_cast<impl_scalar_type>(threshold);
auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);

auto At = Utilities::Op2TpetraCrs(A);
auto A_device = At->getLocalMatrixDevice();
auto A_device = A->getLocalMatrixDevice();
RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
RCP<const Import> importer = A->getCrsGraph()->getImporter();
RCP<LocalOrdinalVector> boundaryNodesVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetDomainMap());
RCP<LocalOrdinalVector> boundaryColumnVector;
for(size_t i = 0; i < graph->GetNodeNumVertices(); i++) {
boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
}
if(!importer.is_null()) {
boundaryColumnVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetImportMap());
boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
}
else {
boundaryColumnVector = boundaryNodesVector;
}
auto boundaryColumn = boundaryColumnVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);

Kokkos::View<LO*, ExecSpace>rownnzView("rownnzView", A_device.numRows());
auto drop_views = Kokkos::View<bool*, ExecSpace>("drop_views", A_device.nnz());
Expand All @@ -608,30 +622,24 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
auto rowView = A_device.rowConst(row);
size_t nnz = rowView.length;

size_t dropSize = 0;
auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row+1)));
auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row+1)));

//find magnitudes
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, (LO)nnz), [&](const LO colID, size_t &count) {
Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (LO)nnz), [&](const LO colID) {
index_view(colID) = colID;
LO col = rowView.colidx(colID);
//ignore diagonals for now, they are checked again later
if(row == col) {
drop_view(colID) = true;
count++;
}
//Don't aggregate boundaries
else if(boundaryNodesDevice(colID)) {
if(row == col || boundary(col)) {
drop_view(colID) = true;
}
else {
drop_view(colID) = false;
count++;
}
}, dropSize);
});

size_t dropStart = dropSize;
size_t dropStart = nnz;
if (classicalAlgo == unscaled_cut) {
//push diagonals and boundaries to the right, sort everything else by aij on the left
Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
Expand All @@ -646,7 +654,7 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
});

//find index where dropping starts
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, 1, dropSize), [=](size_t i, size_t& min) {
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
auto const& x = index_view(i - 1);
auto const& y = index_view(i);
typename implATS::magnitudeType x_aij = 0;
Expand All @@ -658,7 +666,7 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
}

if(x_aij > realThresholdKokkos * y_aij) {
if(realThreshold * realThreshold * x_aij > y_aij) {
if(i < min) {
min = i;
}
Expand All @@ -673,30 +681,30 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
else {
auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
auto x_aiiajj = implATS::magnitude(thresholdKokkos * thresholdKokkos * ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
auto y_aiiajj = implATS::magnitude(thresholdKokkos * thresholdKokkos * ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
}
});

//find index where dropping starts
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, 1, dropSize), [=](size_t i, size_t& min) {
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
auto const& x = index_view(i - 1);
auto const& y = index_view(i);
typename implATS::magnitudeType x_val = 0;
typename implATS::magnitudeType y_val = 0;
if(!drop_view(x)) {
typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
typename implATS::magnitudeType x_aiiajj = implATS::magnitude(thresholdKokkos * thresholdKokkos * ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
x_val = x_aij / x_aiiajj;
}
if(!drop_view(y)) {
typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
typename implATS::magnitudeType y_aiiajj = implATS::magnitude(thresholdKokkos * thresholdKokkos * ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
y_val = y_aij / y_aiiajj;
}

if(x_val > realThresholdKokkos * y_val) {
if(realThreshold * realThreshold * x_val > y_val) {
if(i < min) {
min = i;
}
Expand All @@ -705,15 +713,15 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
}

//drop everything to the right of where values stop passing threshold
if(dropStart < dropSize) {
Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, dropSize), [=](size_t i) {
if(dropStart < nnz) {
Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](size_t i) {
drop_view(index_view(i)) = true;
});
}

LO rownnz = 0;
GO rowDropped = 0;
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, dropSize), [=](const size_t idxID, LO& keep, GO& drop) {
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, nnz), [=](const size_t idxID, LO& keep, GO& drop) {
LO col = rowView.colidx(idxID);
//don't drop diagonal
if(row == col || !drop_view(idxID)) {
Expand Down
168 changes: 148 additions & 20 deletions packages/muelu/test/unit_tests/CoalesceDropFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1389,6 +1389,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalScaledCut, Scala
typedef Teuchos::ScalarTraits<SC> STS;
typedef typename STS::magnitudeType real_type;
typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
typedef Tpetra::Map<LO, GO, NO> map_type;
typedef Tpetra::CrsMatrix<SC, LO, GO, NO> crs_matrix_type;

MUELU_TESTING_SET_OSTREAM;
MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node);
Expand All @@ -1399,11 +1401,41 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalScaledCut, Scala
Level fineLevel;
TestHelpers::TestFactory<SC, LO, GO, NO>::createSingleLevelHierarchy(fineLevel);

RCP<Matrix> A = TestHelpers::TestFactory<SC, LO, GO, NO>::Build1DPoisson(36);
const global_size_t globalIndices = 12;
const GO indexBase = 0;
RCP<const map_type> map = rcp(new map_type(globalIndices, indexBase, comm));
RCP<crs_matrix_type> A_t(new crs_matrix_type(map, 5));
const SC two = static_cast<SC>(2.0);
const SC one = static_cast<SC>(1.0);
const SC negOne = static_cast<SC>(-1.0);
for(LO lclRow = 0; lclRow < static_cast<LO> (map->getLocalNumElements()); lclRow++) {
const GO gblRow = map->getGlobalElement(lclRow);
if(gblRow == 0) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow, gblRow + 1), Teuchos::tuple<SC>(two, negOne));
}
else if(static_cast<Tpetra::global_size_t>(gblRow) == globalIndices - 1) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow - 1, gblRow), Teuchos::tuple<SC>(negOne, two));
}
else if(gblRow == 2 || gblRow == 9) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow), Teuchos::tuple<SC>(one));
}
else if(gblRow == 5) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow-2, gblRow-1, gblRow, gblRow+1, gblRow+2), Teuchos::tuple<SC>(negOne, negOne, two, negOne, negOne));
}
else if(gblRow == 6) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow-2, gblRow-1, gblRow, gblRow+1, gblRow+2), Teuchos::tuple<SC>(negOne, two, two, two, negOne));
}
else {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow - 1, gblRow, gblRow + 1), Teuchos::tuple<SC>(negOne, two, negOne));
}
}
A_t->fillComplete();
RCP<CrsMatrix> A_x = rcp(new TpetraCrsMatrix(A_t));
RCP<Matrix> A = rcp(new CrsMatrixWrap(A_x));
fineLevel.Set("A", A);

Teuchos::ParameterList galeriList;
galeriList.set("nx", Teuchos::as<GlobalOrdinal>(36));
galeriList.set("nx", Teuchos::as<GlobalOrdinal>(globalIndices));
RCP<RealValuedMultiVector> coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC, LO, GO, Map, RealValuedMultiVector>("1D", A->getRowMap(), galeriList);
fineLevel.Set("Coordinates", coordinates);

Expand All @@ -1429,25 +1461,59 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalScaledCut, Scala
const RCP<const Map> myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping!
const RCP<const Map> myDomainMap = graph->GetDomainMap();

TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35);
TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), globalIndices-1);
TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0);
TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0);
TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as<size_t>(36 + (comm->getSize() - 1) * 2));
TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as<size_t>(globalIndices + (comm->getSize() - 1) * 2));

TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35);
TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), globalIndices-1);
TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0);
TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0);
TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36);

TEST_EQUALITY(graph->GetGlobalNumEdges(), 72);

} // SignaledClassical
TEST_EQUALITY(myDomainMap->getGlobalNumElements(), globalIndices);

TEST_EQUALITY(graph->GetGlobalNumEdges(), 28);

int rows[13] = {0, 2, 4, 5, 7, 10, 15, 18, 21, 23, 24, 26, 28};
int columns[28] = {0, 1,
0, 1,
2,
3, 4,
3, 4, 5,
3, 4, 5, 6, 7,
5, 6, 7,
6, 7, 8,
7, 8,
9,
10, 11,
10, 11};
auto rowPtrs = graph->getRowPtrs();
auto entries = graph->getEntries();
size_t rowID = 0;
TEST_EQUALITY(rowPtrs(0), rowID);
for(size_t i = 0; i < rowPtrs.size()-1; i++) {
auto gblID = myDomainMap->getGlobalElement(i);
int rownnz = rows[gblID+1]-rows[gblID];
rowID += rownnz;
TEST_EQUALITY(rowPtrs(i+1), rowID);

std::vector<int> colID;
for(int j = 0; j < rownnz; j++) {
colID.push_back(myImportMap->getGlobalElement(entries(rowPtrs(i)+j)));
}
std::sort(std::begin(colID), std::end(colID));
for(int j = 0; j < rownnz; j++) {
TEST_EQUALITY(colID[j], columns[rows[gblID]+j]);
}
}
} // ClassicalScaledCut

TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalUnScaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) {
#include <MueLu_UseShortNames.hpp>
typedef Teuchos::ScalarTraits<SC> STS;
typedef typename STS::magnitudeType real_type;
typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
typedef Tpetra::Map<LO, GO, NO> map_type;
typedef Tpetra::CrsMatrix<SC, LO, GO, NO> crs_matrix_type;

MUELU_TESTING_SET_OSTREAM;
MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node);
Expand All @@ -1458,11 +1524,41 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalUnScaledCut, Sca
Level fineLevel;
TestHelpers::TestFactory<SC, LO, GO, NO>::createSingleLevelHierarchy(fineLevel);

RCP<Matrix> A = TestHelpers::TestFactory<SC, LO, GO, NO>::Build1DPoisson(36);
const global_size_t globalIndices = 12;
const GO indexBase = 0;
RCP<const map_type> map = rcp(new map_type(globalIndices, indexBase, comm));
RCP<crs_matrix_type> A_t(new crs_matrix_type(map, 5));
const SC two = static_cast<SC>(2.0);
const SC one = static_cast<SC>(1.0);
const SC negOne = static_cast<SC>(-1.0);
for(LO lclRow = 0; lclRow < static_cast<LO> (map->getLocalNumElements()); lclRow++) {
const GO gblRow = map->getGlobalElement(lclRow);
if(gblRow == 0) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow, gblRow + 1), Teuchos::tuple<SC>(two, negOne));
}
else if(static_cast<Tpetra::global_size_t>(gblRow) == globalIndices - 1) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow - 1, gblRow), Teuchos::tuple<SC>(negOne, two));
}
else if(gblRow == 2 || gblRow == 9) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow), Teuchos::tuple<SC>(one));
}
else if(gblRow == 5) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow-2, gblRow-1, gblRow, gblRow+1, gblRow+2), Teuchos::tuple<SC>(negOne, negOne, two, negOne, negOne));
}
else if(gblRow == 6) {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow-2, gblRow-1, gblRow, gblRow+1, gblRow+2), Teuchos::tuple<SC>(negOne, two, two, two, negOne));
}
else {
A_t->insertGlobalValues(gblRow, Teuchos::tuple<GO>(gblRow - 1, gblRow, gblRow + 1), Teuchos::tuple<SC>(negOne, two, negOne));
}
}
A_t->fillComplete();
RCP<CrsMatrix> A_x = rcp(new TpetraCrsMatrix(A_t));
RCP<Matrix> A = rcp(new CrsMatrixWrap(A_x));
fineLevel.Set("A", A);

Teuchos::ParameterList galeriList;
galeriList.set("nx", Teuchos::as<GlobalOrdinal>(36));
galeriList.set("nx", Teuchos::as<GlobalOrdinal>(globalIndices));
RCP<RealValuedMultiVector> coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC, LO, GO, Map, RealValuedMultiVector>("1D", A->getRowMap(), galeriList);
fineLevel.Set("Coordinates", coordinates);

Expand All @@ -1488,19 +1584,51 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalUnScaledCut, Sca
const RCP<const Map> myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping!
const RCP<const Map> myDomainMap = graph->GetDomainMap();

TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35);
TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), globalIndices-1);
TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0);
TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0);
TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as<size_t>(36 + (comm->getSize() - 1) * 2));
TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as<size_t>(globalIndices + (comm->getSize() - 1) * 2));

TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35);
TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), globalIndices-1);
TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0);
TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0);
TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36);

TEST_EQUALITY(graph->GetGlobalNumEdges(), 72);

} // SignaledClassical
TEST_EQUALITY(myDomainMap->getGlobalNumElements(), globalIndices);

TEST_EQUALITY(graph->GetGlobalNumEdges(), 28);

int rows[13] = {0, 2, 4, 5, 7, 10, 15, 18, 21, 23, 24, 26, 28};
int columns[28] = {0, 1,
0, 1,
2,
3, 4,
3, 4, 5,
3, 4, 5, 6, 7,
5, 6, 7,
6, 7, 8,
7, 8,
9,
10, 11,
10, 11};
auto rowPtrs = graph->getRowPtrs();
auto entries = graph->getEntries();
size_t rowID = 0;
TEST_EQUALITY(rowPtrs(0), rowID);
for(size_t i = 0; i < rowPtrs.size()-1; i++) {
auto gblID = myDomainMap->getGlobalElement(i);
int rownnz = rows[gblID+1]-rows[gblID];
rowID += rownnz;
TEST_EQUALITY(rowPtrs(i+1), rowID);

std::vector<int> colID;
for(int j = 0; j < rownnz; j++) {
colID.push_back(myImportMap->getGlobalElement(entries(rowPtrs(i)+j)));
}
std::sort(std::begin(colID), std::end(colID));
for(int j = 0; j < rownnz; j++) {
TEST_EQUALITY(colID[j], columns[rows[gblID]+j]);
}
}
} // ClassicalUnScaledCut

TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, SignaledClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) {
#include <MueLu_UseShortNames.hpp>
Expand Down

0 comments on commit ea245d1

Please sign in to comment.