From 2bdbe49e463db0e26899f9e928ac63e1083bad4a Mon Sep 17 00:00:00 2001 From: Alan Williams Date: Fri, 13 Dec 2024 13:08:42 -0700 Subject: [PATCH] STK: Snapshot 12-13-24 13:08 from Sierra 5.23.2-429-g07a311ce Signed-off-by: Alan Williams --- packages/stk/CHANGELOG.md | 7 + .../stk/stk_doc_tests/stk_mesh/howToNgp.cpp | 8 +- .../stk_topology/shell_sides.cpp | 8 +- .../stk/stk_expreval/stk_expreval/Node.cpp | 6 +- .../stk_search/SearchMeshTest.cpp | 73 ++ packages/stk/stk_io/stk_io/InputFile.cpp | 491 ++------------ packages/stk/stk_io/stk_io/InputFile.hpp | 34 +- packages/stk/stk_io/stk_io/InputQuery.cpp | 514 ++++++++++++++ packages/stk/stk_io/stk_io/InputQuery.hpp | 115 ++++ packages/stk/stk_io/stk_io/IossBridge.cpp | 17 +- packages/stk/stk_io/stk_io/IossBridge.hpp | 9 +- packages/stk/stk_io/stk_io/MeshField.cpp | 5 + packages/stk/stk_io/stk_io/MeshField.hpp | 4 + .../stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp | 12 +- .../stk/stk_io/stk_io/StkMeshIoBroker.cpp | 8 +- .../stk/stk_io/stk_io/StkMeshIoBroker.hpp | 4 +- .../stk/stk_mesh/stk_mesh/base/Bucket.cpp | 44 +- .../stk/stk_mesh/stk_mesh/base/Bucket.hpp | 24 +- .../stk/stk_mesh/stk_mesh/base/BulkData.cpp | 30 +- .../stk/stk_mesh/stk_mesh/base/BulkData.hpp | 2 +- .../stk_mesh/stk_mesh/base/DeviceField.hpp | 79 ++- .../stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp | 421 +++++------- .../stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp | 63 +- .../stk/stk_mesh/stk_mesh/base/FEMHelpers.hpp | 4 + .../stk/stk_mesh/stk_mesh/base/FieldBase.cpp | 24 +- .../stk/stk_mesh/stk_mesh/base/FieldBase.hpp | 6 +- .../stk_mesh/stk_mesh/base/GetNgpField.hpp | 32 +- .../stk/stk_mesh/stk_mesh/base/HostField.hpp | 15 +- .../stk/stk_mesh/stk_mesh/base/HostMesh.hpp | 8 + .../stk/stk_mesh/stk_mesh/base/MetaData.cpp | 153 +++-- .../stk/stk_mesh/stk_mesh/base/MetaData.hpp | 30 +- packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp | 35 +- .../stk_mesh/base/NgpFieldSyncDebugger.hpp | 18 +- .../stk/stk_mesh/stk_mesh/base/NgpTypes.hpp | 12 +- .../stk_mesh/stk_mesh/base/SideSetHelper.cpp | 6 +- .../stk_mesh/base/StkFieldSyncDebugger.cpp | 82 ++- .../stk_mesh/base/StkFieldSyncDebugger.hpp | 10 + packages/stk/stk_mesh/stk_mesh/base/Types.hpp | 1 + .../stk_mesh/baseImpl/BucketConnDynamic.hpp | 11 +- .../stk_mesh/baseImpl/BucketRepository.cpp | 81 +-- .../stk_mesh/baseImpl/BucketRepository.hpp | 17 +- .../stk_mesh/baseImpl/MeshImplUtils.cpp | 9 +- .../stk_mesh/baseImpl/MeshModification.cpp | 9 +- .../stk_mesh/baseImpl/NgpMeshHostData.hpp | 5 - .../stk_mesh/stk_mesh/baseImpl/Partition.cpp | 6 +- .../stk_mesh/stk_mesh/baseImpl/Partition.hpp | 37 +- .../abstract_cdt_interface.hpp | 4 + .../stk_middle_mesh_util/create_stk_mesh.cpp | 10 +- .../stk_middle_mesh_util/stk_field_copier.cpp | 10 +- .../stk_ngp_test/GlobalReporter.hpp | 6 + .../stk_ngp_test/NgpTestDeviceMacros.hpp | 6 - .../stk_ngp_test/stk_ngp_test/ngp_test.hpp | 14 - .../stk_mesh/NgpMeshUpdate.cpp | 10 + .../stk_mesh/perfNgpFieldStateRotation.cpp | 136 ++++ .../stk_search/VolumeToOne.cpp | 32 +- .../MortonLBVH_TreeManipulationUtils.hpp | 67 +- .../stk_topology/topology_defn.hpp | 48 +- .../stk_transfer/copy_by_id/SearchById.hpp | 1 + .../copy_by_id/TransferCopyTranslator.hpp | 1 + .../TextMeshStkTopologyMapping.hpp | 13 +- .../stk_io/UnitTestGmeshFixture.cpp | 7 +- .../stk_io/UnitTestReadFieldData.cpp | 37 + .../stk_mesh/UnitTestBucket.cpp | 2 +- .../stk_mesh/UnitTestFieldDataManager.cpp | 74 +- .../stk_mesh/UnitTestFieldImpl.cpp | 13 +- .../stk_mesh/UnitTestMetaData.cpp | 6 +- .../stk_mesh/UnitTestSideSet.cpp | 373 +++++++++- .../ngp/NgpDebugFieldSync_Fixtures.hpp | 25 +- .../stk_mesh/ngp/NgpMeshTest.cpp | 146 +++- .../stk_mesh/ngp/UnitTestNgp.cpp | 14 +- .../ngp/UnitTestNgpDebugFieldSync.cpp | 21 +- ...FieldSync_AccessDuringMeshModification.cpp | 26 +- ...TestNgpDebugFieldSync_MeshModification.cpp | 30 +- ...estNgpDebugFieldSync_PartialAllocation.cpp | 15 +- .../ngp/UnitTestNgpMeshModification.cpp | 29 + .../ngp/UnitTestTransposePinnedMapped.cpp | 4 +- .../stk_mesh/ngp/ngpFieldTest.cpp | 84 ++- .../test_stk_field_copier.cpp | 11 +- .../stk_ngp_test/utest_VirtualFunction.cpp | 1 + .../stk_search/UnitTestCoarseSearchTwoBox.cpp | 40 ++ .../stk_topology/topology_test_utils.hpp | 5 +- .../utest_c/unit_test_shell_quad.cpp | 54 +- .../utest_c/unit_test_shell_tri.cpp | 37 +- .../unit_test_shell_tri_all_face_sides.cpp | 8 +- .../diag/UnitTestParallelTimerImpl.cpp | 255 +++++++ .../parallel/UnitTestParallelComm.cpp | 30 +- .../stk_util/util/UnitTestStridedArray.cpp | 3 +- packages/stk/stk_util/stk_util/Version.hpp | 2 +- .../command_line/CommandLineParser.hpp | 17 +- .../stk_util/diag/ParallelTimerImpl.cpp | 254 +++++++ .../stk_util/diag/ParallelTimerImpl.hpp | 210 ++++++ .../stk/stk_util/stk_util/diag/PrintTimer.cpp | 447 +----------- .../stk/stk_util/stk_util/diag/PrintTimer.hpp | 8 +- packages/stk/stk_util/stk_util/diag/Timer.cpp | 635 +----------------- packages/stk/stk_util/stk_util/diag/Timer.hpp | 93 ++- .../stk/stk_util/stk_util/diag/TimerImpl.cpp | 333 +++++++++ .../stk/stk_util/stk_util/diag/TimerImpl.hpp | 370 ++++++++++ .../stk_util/stk_util/environment/EnvData.cpp | 1 - .../stk_util/stk_util/environment/EnvData.hpp | 1 - .../stk_util/environment/Scheduler.cpp | 2 +- .../stk/stk_util/stk_util/ngp/NgpSpaces.hpp | 2 + .../stk_util/registry/ProductRegistry.cpp | 2 +- .../stk_util/stk_util/util/FPExceptions.hpp | 12 +- .../stk_util/stk_util/util/StkNgpVector.hpp | 19 +- .../stk_util/stk_util/util/StridedArray.hpp | 2 +- 105 files changed, 4215 insertions(+), 2492 deletions(-) create mode 100644 packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp create mode 100644 packages/stk/stk_io/stk_io/InputQuery.cpp create mode 100644 packages/stk/stk_io/stk_io/InputQuery.hpp create mode 100644 packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp create mode 100644 packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp create mode 100644 packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp create mode 100644 packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp create mode 100644 packages/stk/stk_util/stk_util/diag/TimerImpl.cpp create mode 100644 packages/stk/stk_util/stk_util/diag/TimerImpl.hpp diff --git a/packages/stk/CHANGELOG.md b/packages/stk/CHANGELOG.md index 514f7e831a1a..f74c63a19424 100644 --- a/packages/stk/CHANGELOG.md +++ b/packages/stk/CHANGELOG.md @@ -1,5 +1,12 @@ # CHANGELOG +5.23.2 (STK_VERSION 5230200) 12/11/2024 + misc fixes for AMD/ROCm (ATS-4) + stk_mesh: speedup for device-field multi-state rotation + reduce stacksize (sizeof(DeviceMesh)) from ~2900 to ~470 + stk_search: misc fixes + stk_io: add query for existence of fields on database + 5.21.6-1 (STK_VERSION 5210601) 10/31/2024 stk_mesh, stk_search: more fixes for HIP unified and Cuda no-uvm builds diff --git a/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp b/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp index 7bcbd60764f4..d45a371d6576 100644 --- a/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp +++ b/packages/stk/stk_doc_tests/stk_mesh/howToNgp.cpp @@ -372,13 +372,12 @@ void run_connected_face_test(const stk::mesh::BulkData& bulk) typedef stk::ngp::TeamPolicy::member_type TeamHandleType; const auto& teamPolicy = stk::ngp::TeamPolicy(ngpMesh.num_buckets(stk::topology::ELEM_RANK), Kokkos::AUTO); - Kokkos::parallel_for(teamPolicy, KOKKOS_LAMBDA(const TeamHandleType& team) { const stk::mesh::NgpMesh::BucketType& bucket = ngpMesh.get_bucket(stk::topology::ELEM_RANK, team.league_rank()); - unsigned numElems = bucket.size(); + const unsigned numElems = bucket.size(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0u, numElems), [&] (const int& i) { @@ -409,9 +408,8 @@ void run_connected_face_test(const stk::mesh::BulkData& bulk) TEST_F(NgpHowTo, loopOverElemFaces) { - if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { - GTEST_SKIP(); - } + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); auto &field = get_meta().declare_field(stk::topology::NODE_RANK, "myField"); stk::mesh::put_field_on_mesh(field, get_meta().universal_part(), nullptr); diff --git a/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp b/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp index 3482ca2468c2..ba891a31e780 100644 --- a/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp +++ b/packages/stk/stk_doc_tests/stk_topology/shell_sides.cpp @@ -71,10 +71,10 @@ TEST(stk_topology, shell_side_topology) { EXPECT_EQ(shell.num_sides(),6u); EXPECT_EQ(shell.side_topology(0), stk::topology::QUAD_4); EXPECT_EQ(shell.side_topology(1), stk::topology::QUAD_4); - EXPECT_EQ(shell.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(shell.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(shell.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(shell.side_topology(5), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(shell.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(shell.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(shell.side_topology(4), stk::topology::LINE_2); + EXPECT_EQ(shell.side_topology(5), stk::topology::LINE_2); } //end_shell_side_topo diff --git a/packages/stk/stk_expreval/stk_expreval/Node.cpp b/packages/stk/stk_expreval/stk_expreval/Node.cpp index 83916c8c7c9c..8a5ac920c8f1 100644 --- a/packages/stk/stk_expreval/stk_expreval/Node.cpp +++ b/packages/stk/stk_expreval/stk_expreval/Node.cpp @@ -82,7 +82,11 @@ double& Node::setResult() { void Node::eval() { - stk::util::clear_fp_errors(); + if (m_owner->get_fp_error_behavior() != Eval::FPErrorBehavior::Ignore) + { + stk::util::clear_fp_errors(); + } + switch (m_opcode) { case OPCODE_STATEMENT: { setResult() = m_left->getResult(); diff --git a/packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp b/packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp new file mode 100644 index 000000000000..d0d9c408ea6f --- /dev/null +++ b/packages/stk/stk_integration_tests/stk_search/SearchMeshTest.cpp @@ -0,0 +1,73 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +TEST(StkSearch, NGP_coarse_search_mesh_elem_boxes_MORTON) +{ + using ExecSpace = Kokkos::DefaultExecutionSpace; + MPI_Comm comm = MPI_COMM_WORLD; + if (stk::parallel_machine_size(comm) != 1) { GTEST_SKIP(); } + + stk::mesh::MeshBuilder builder(comm); + std::shared_ptr bulkPtr = builder.create(); + + stk::io::fill_mesh("generated:1x9x19|sideset:xXyYzZ", *bulkPtr); + + Kokkos::View elemBoxes = + createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + Kokkos::View faceBoxes = + createBoundingBoxesForEntities(*bulkPtr, stk::topology::FACE_RANK); + + std::cout<<"Num elem-boxes: "< searchResults; + stk::search::local_coarse_search(elemBoxes, faceBoxes, searchMethod, searchResults, ExecSpace{}); + + const size_t expectedSize = 2910; + EXPECT_EQ(expectedSize, searchResults.size())<<"expected results size: "< +#include #include // for exception #include // for copy, sort, max, find #include // for fmod @@ -80,34 +81,6 @@ namespace { bool meshFieldSort(const stk::io::MeshField& f1, const stk::io::MeshField &f2) { return f1.field()->mesh_meta_data_ordinal() < f2.field()->mesh_meta_data_ordinal(); } - - void add_missing_fields(std::vector *missingFields, - std::map missing_fields_collector) { - if (missingFields) - { - std::vector discoveredMissingFields; - for (auto missingStatedFieldIter : missing_fields_collector) - { - discoveredMissingFields.push_back(stk::io::MeshField(missingStatedFieldIter.first, - missingStatedFieldIter.second->db_name())); - } - std::sort(discoveredMissingFields.begin(), discoveredMissingFields.end(), - [](const stk::io::MeshField &a, const stk::io::MeshField &b) { - return (a.db_name() < b.db_name()) - || ((a.db_name() == b.db_name()) && (a.field()->name() < b.field()->name())); }); - - for(stk::io::MeshField &missingField : *missingFields) - { - std::vector::iterator iter = std::find(discoveredMissingFields.begin(), discoveredMissingFields.end(), missingField); - if(iter != discoveredMissingFields.end()) - { - discoveredMissingFields.erase(iter); - } - } - - missingFields->insert(missingFields->end(), discoveredMissingFields.begin(), discoveredMissingFields.end()); - } - } } namespace stk { @@ -156,11 +129,11 @@ namespace io { m_stopTime(std::numeric_limits::max()), m_periodType(CYCLIC), m_fieldsInitialized(false), - m_haveCachedEntityList(false), + m_haveCachedEntityList(false), m_multiStateSuffixes(nullptr) { STK_ThrowErrorMsgIf(m_database == nullptr || !m_database->ok(true), - "ERROR: Invalid Ioss region detected in add_mesh_database"); + "ERROR: Invalid Ioss region detected in add_mesh_database"); Ioss::DatabaseUsage db_usage = m_database->usage(); if (db_usage == Ioss::READ_RESTART) { @@ -178,8 +151,8 @@ namespace io { } STK_ThrowErrorMsgIf(m_region->mesh_type() != Ioss::MeshType::UNSTRUCTURED, - "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " - "Only 'Unstructured' mesh is currently supported."); + "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " + "Only 'Unstructured' mesh is currently supported."); } @@ -202,8 +175,8 @@ namespace io { m_region = std::shared_ptr(region); STK_ThrowErrorMsgIf(m_region->mesh_type() != Ioss::MeshType::UNSTRUCTURED, - "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " - "Only 'Unstructured' mesh is currently supported."); + "Mesh type is '" << m_region->mesh_type_string() << "' which is not supported. " + "Only 'Unstructured' mesh is currently supported."); } } @@ -220,7 +193,7 @@ namespace io { if (!fieldAlreadyExists) { m_fields.push_back(mesh_field); stk::io::set_field_role(*mesh_field.field(), Ioss::Field::TRANSIENT); - m_fieldsInitialized = false; + m_fieldsInitialized = false; } } @@ -278,58 +251,9 @@ namespace io { for (size_t i=0; i < fields.size(); i++) { const Ioss::Field::RoleType* role = stk::io::get_field_role(*fields[i]); if ( role && *role == Ioss::Field::TRANSIENT ) { - add_input_field(MeshField(fields[i], fields[i]->name(), tmo)); - } - } - } - - void InputFile::build_field_part_associations_for_part(Ioss::Region *region, - const stk::mesh::FieldBase *f, - const stk::mesh::Part * part, - stk::io::MeshField &mf) - { - stk::mesh::EntityRank rank = part_primary_entity_rank(*part); - // Get Ioss::GroupingEntity corresponding to this part... - Ioss::GroupingEntity *entity = region->get_entity(part->name()); - - if (entity != nullptr) { - if (f->entity_rank() == rank) { - build_field_part_associations(mf, *part, rank, entity); - process_fields_for_grouping_entity(mf, *part, entity); - - if(entity->type() == Ioss::SIDESET) { - auto io_side_set = dynamic_cast(entity); - STK_ThrowRequire(io_side_set != nullptr); - auto fbs = io_side_set->get_side_blocks(); - - for(auto& io_fblock : fbs) { - build_field_part_associations(mf, *part, rank, io_fblock); - process_fields_for_grouping_entity(mf, *part, io_fblock); - } - } - } - - // If rank is != NODE_RANK, then see if field is defined on the nodes of this part - if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { - Ioss::GroupingEntity *node_entity = nullptr; - std::string nodes_name = part->name() + "_nodes"; - - node_entity = region->get_entity(nodes_name); - - if (node_entity == nullptr) { - nodes_name = part->name() + "_n"; - node_entity = region->get_entity(nodes_name); - } - - if (node_entity == nullptr) { - node_entity = region->get_entity("nodeblock_1"); - } - if (node_entity != nullptr) { - build_field_part_associations(mf, *part, stk::topology::NODE_RANK, node_entity); - process_fields_for_grouping_entity(mf, *part, node_entity); - } - } + add_input_field(MeshField(fields[i], fields[i]->name(), tmo)); } + } } bool InputFile::read_input_field(stk::io::MeshField &mf, stk::mesh::BulkData &bulk) @@ -356,14 +280,15 @@ namespace io { "ERROR: Input database '" << region->get_database()->get_filename() << "' has no transient data."); + InputQuery iq(*region, bulk.mesh_meta_data(), m_db_purpose, m_multiStateSuffixes); + const stk::mesh::FieldBase *f = mf.field(); - std::vector::iterator P = mf.m_subsetParts.begin(); - while (P != mf.m_subsetParts.end()) { + + for (const stk::mesh::Part* part : mf.m_subsetParts) { // Find the Ioss::GroupingEntity corresponding to this part... mf.set_inactive(); - const stk::mesh::Part *part = *P; ++P; - build_field_part_associations_for_part(region, f, part, mf); + iq.build_field_part_associations_for_part(mf, part); if (mf.is_active()) { mf.restore_field_data(bulk, sti, false, m_multiStateSuffixes); @@ -371,35 +296,30 @@ namespace io { } if(mf.m_subsetParts.empty()) { - mf.set_inactive(); - // Now handle the non-subsetted fields... - - // Check universal_part() NODE_RANK first... - const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - { - if (f->entity_rank() == stk::topology::NODE_RANK) { - build_field_part_associations(mf, meta.universal_part(), stk::topology::NODE_RANK, - region->get_node_blocks()[0]); - process_fields_for_grouping_entity(mf, meta.universal_part(), region->get_node_blocks()[0]); - } - } - - // Now handle all non-nodeblock parts... - const stk::mesh::PartVector &all_parts = meta.get_parts(); - for ( stk::mesh::PartVector::const_iterator - ip = all_parts.begin(); ip != all_parts.end(); ++ip ) { - - const stk::mesh::Part * part = *ip; + mf.set_inactive(); + // Now handle the non-subsetted fields... - // Check whether this part is an input part... - if (stk::io::is_part_io_part(*part)) { - build_field_part_associations_for_part(region, f, part, mf); - } + // Check universal_part() NODE_RANK first... + const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); + { + if (f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::NodeBlock* nb = region->get_node_blocks()[0]; + iq.build_field_part_associations(mf, meta.universal_part(), stk::topology::NODE_RANK, nb); + iq.process_fields_for_grouping_entity(mf, meta.universal_part(), nb); } + } - if (mf.is_active()) { - mf.restore_field_data(bulk, sti, false, m_multiStateSuffixes); + // Now handle all non-nodeblock parts... + for ( const stk::mesh::Part * part : meta.get_parts() ) { + // Check whether this part is an input part... + if (stk::io::is_part_io_part(*part)) { + iq.build_field_part_associations_for_part(mf, part); } + } + + if (mf.is_active()) { + mf.restore_field_data(bulk, sti, false, m_multiStateSuffixes); + } } return mf.is_active(); @@ -432,318 +352,6 @@ namespace io { return read_defined_input_fields(state_time, missingFields, bulk); } - bool InputFile::build_field_part_associations(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - const stk::mesh::EntityRank rank, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields_collector) - { - bool field_is_missing = false; - stk::mesh::FieldBase *f = mesh_field.field(); - // Only add TRANSIENT Fields -- check role; if not present assume transient... - const Ioss::Field::RoleType *role = stk::io::get_field_role(*f); - if (role == nullptr || *role == Ioss::Field::TRANSIENT) { - if (stk::io::is_field_on_part(f, rank, part)) { - const stk::mesh::FieldBase::Restriction &res = stk::mesh::find_restriction(*f, rank, part); - FieldType field_type; - stk::io::get_io_field_type(f, res, &field_type); - if (field_type.type != Ioss::Field::INVALID) { - - const std::string &db_name = mesh_field.db_name(); - unsigned num_states = f->number_of_states(); - std::vector missing_states; - if (num_states > 1) { - bool has_all_states = all_field_states_exist_on_io_entity(db_name, f, io_entity, missing_states, m_multiStateSuffixes); - if(has_all_states == false) { - field_is_missing = true; - if (missing_fields_collector) { - for (stk::mesh::FieldState missing_state : missing_states) - (*missing_fields_collector)[f->field_state(missing_state)] = &mesh_field; - } - } - } - - bool field_exists = io_entity->field_exists(db_name); - if (!field_exists) { - field_is_missing = true; - if (missing_fields_collector) { - (*missing_fields_collector)[f] = &mesh_field; - } - } - - // See if field with that name exists on io_entity... - if (field_exists) { - mesh_field.add_part(rank, part, io_entity); - mesh_field.set_single_state((m_db_purpose == stk::io::READ_RESTART) ? false : true); - mesh_field.set_active(); - } - } - } - } - return field_is_missing; - } - - bool InputFile::process_fields_for_grouping_entity(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields_collector_ptr) - { - STK_ThrowRequireMsg(io_entity != nullptr, "Null IO entity"); - - bool doesFieldExist = false; - - stk::mesh::FieldBase *f = mesh_field.field(); - - stk::mesh::EntityRank rank = part_primary_entity_rank(part); - if(f->entity_rank() == rank) { - const std::string &db_name = mesh_field.db_name(); - unsigned num_states = f->number_of_states(); - std::vector missing_states; - if (num_states > 1) { - bool has_all_states = all_field_states_exist_on_io_entity(db_name, f, io_entity, missing_states, m_multiStateSuffixes); - if(has_all_states == false) { - if (missing_fields_collector_ptr) { - for (stk::mesh::FieldState missing_state : missing_states) - (*missing_fields_collector_ptr)[f->field_state(missing_state)] = &mesh_field; - } - } else { - doesFieldExist = true; - } - } - - if(doesFieldExist == false) { - doesFieldExist = io_entity->field_exists(db_name); - if (!doesFieldExist) { - if (missing_fields_collector_ptr) { - (*missing_fields_collector_ptr)[f] = &mesh_field; - } - } - } - - // See if field with that name exists on io_entity... - if (doesFieldExist) { - mesh_field.add_part(f->entity_rank(), part, io_entity); - mesh_field.set_single_state((m_db_purpose == stk::io::READ_RESTART) ? false : true); - mesh_field.set_active(); - } - } - - return doesFieldExist; - } - - void InputFile::build_field_part_associations_from_grouping_entity(stk::mesh::BulkData &bulk, std::vector *missingFields) - { - Ioss::Region *region = m_region.get(); - size_t num_missing_fields = 0; - const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - - for (auto &mesh_field : m_fields) - { - if(mesh_field.is_active()) { - continue; - } - - std::map missingFieldCollector; - bool doesFieldExist = false; - stk::mesh::Part &universalPart = meta.universal_part(); - Ioss::GroupingEntity * universalNodeEntity = region->get_entity("nodeblock_1"); - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, universalPart, universalNodeEntity, &missingFieldCollector); - - const stk::mesh::PartVector &all_parts = meta.get_parts(); - for ( stk::mesh::PartVector::const_iterator - ip = all_parts.begin(); ip != all_parts.end(); ++ip ) { - - stk::mesh::Part * const part = *ip; - - // Check whether this part is an input part... - if (stk::io::is_part_io_part(*part)) { - // Get Ioss::GroupingEntity corresponding to this part... - Ioss::GroupingEntity *io_entity = region->get_entity(part->name()); - - if(io_entity == nullptr) { - continue; - } - - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, *part, io_entity, &missingFieldCollector); - - if(io_entity->type() == Ioss::SIDEBLOCK || io_entity->type() == Ioss::SIDESET) - { - static const std::string s_nodeset_suffix("_n"); - - std::string ns_name = part->name(); - ns_name += s_nodeset_suffix; - Ioss::NodeSet *io_node_set = region->get_nodeset(ns_name); - if(io_node_set != nullptr) { - // Process hidden nodesets - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, *part, io_node_set, &missingFieldCollector); - } - } - - if(io_entity->type() == Ioss::SIDESET) - { - Ioss::SideSet* sideSet = dynamic_cast(io_entity); - auto faceBlocks = sideSet->get_side_blocks(); - for (auto faceBlock : faceBlocks) - { - doesFieldExist |= process_fields_for_grouping_entity(mesh_field, *part, faceBlock, &missingFieldCollector); - } - - } - } - } - - if (!doesFieldExist) - { - num_missing_fields += missingFieldCollector.size(); - if (nullptr != missingFields) - { - add_missing_fields(missingFields, missingFieldCollector); - } - else { - for (auto missingField : missingFieldCollector) { - std::cout << "Missing field: " << missingField.second->db_name() << std::endl; - } - } - } - } - - if (num_missing_fields > 0 && missingFields==nullptr) { - std::ostringstream msg; - msg << "ERROR: Input field processing could not find " << num_missing_fields << " fields.\n"; - throw std::runtime_error( msg.str() ); - } - } - - void InputFile::build_field_part_associations(stk::mesh::BulkData &bulk, std::vector *missingFields) - { - std::map missing_fields_collector; - std::map *missing_fields_collector_ptr = - (missingFields ? &missing_fields_collector : 0); - - // Each input field will have a list of the Parts that the field exists on... - // Create this list. - Ioss::Region *region = m_region.get(); - size_t num_missing_fields = 0; - // First handle any fields that are subsetted (restricted to a specified list of parts) - { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - const stk::mesh::FieldBase *f = (*I).field(); - std::vector::iterator P = (*I).m_subsetParts.begin(); - while (P != (*I).m_subsetParts.end()) { - // Find the Ioss::GroupingEntity corresponding to this part... - const stk::mesh::Part *part = *P; ++P; - stk::mesh::EntityRank rank = part_primary_entity_rank(*part); - bool field_is_missing = false; - if (f->entity_rank() == rank) { - Ioss::GroupingEntity *io_entity = region->get_entity(part->name()); - STK_ThrowErrorMsgIf( io_entity == nullptr, - "ERROR: For field '" << (*I).field()->name() - << "' Could not find database entity corresponding to the part named '" - << part->name() << "'."); - field_is_missing = build_field_part_associations(*I, *part, rank, io_entity, missing_fields_collector_ptr); - } - - // If rank is != NODE_RANK, then see if field is defined on the nodes of this part - if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { - Ioss::GroupingEntity *node_entity = nullptr; - std::string nodes_name = part->name() + "_nodes"; - node_entity = region->get_entity(nodes_name); - if (node_entity == nullptr) { - node_entity = region->get_entity("nodeblock_1"); - } - if (node_entity != nullptr) { - field_is_missing = build_field_part_associations(*I, *part, stk::topology::NODE_RANK, node_entity, - missing_fields_collector_ptr); - } - } - - if (field_is_missing) { - ++num_missing_fields; - } - } - ++I; - } - } - - // Now handle the non-subsetted fields... - - // Check universal_part() NODE_RANK first... - const stk::mesh::MetaData &meta = bulk.mesh_meta_data(); - { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - if ((*I).m_subsetParts.empty()) { - const stk::mesh::FieldBase *f = (*I).field(); - if (f->entity_rank() == stk::topology::NODE_RANK) { - bool field_is_missing = build_field_part_associations(*I, meta.universal_part(), stk::topology::NODE_RANK, - region->get_node_blocks()[0], missing_fields_collector_ptr); - if (field_is_missing) { - ++num_missing_fields; - } - } - } - ++I; - } - } - - // Now handle all non-nodeblock parts... - const stk::mesh::PartVector &all_parts = meta.get_parts(); - for ( stk::mesh::PartVector::const_iterator - ip = all_parts.begin(); ip != all_parts.end(); ++ip ) { - - stk::mesh::Part * const part = *ip; - - // Check whether this part is an input part... - if (stk::io::is_part_io_part(*part)) { - stk::mesh::EntityRank rank = part_primary_entity_rank(*part); - // Get Ioss::GroupingEntity corresponding to this part... - Ioss::GroupingEntity *entity = region->get_entity(part->name()); - if (entity != nullptr && !m_fields.empty() && entity->type() != Ioss::SIDESET) { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - if ((*I).m_subsetParts.empty()) { - const stk::mesh::FieldBase *f = (*I).field(); - bool field_is_missing = false; - if (f->entity_rank() == rank) { - field_is_missing = build_field_part_associations(*I, *part, rank, entity, missing_fields_collector_ptr); - } - - // If rank is != NODE_RANK, then see if field is defined on the nodes of this part - if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { - Ioss::GroupingEntity *node_entity = nullptr; - std::string nodes_name = part->name() + "_nodes"; - node_entity = region->get_entity(nodes_name); - if (node_entity == nullptr) { - node_entity = region->get_entity("nodeblock_1"); - } - if (node_entity != nullptr) { - field_is_missing = build_field_part_associations(*I, *part, stk::topology::NODE_RANK, node_entity, - missing_fields_collector_ptr); - } - } - - if (field_is_missing) { - ++num_missing_fields; - } - } - ++I; - } - } - } - } - - if (num_missing_fields > 0 && missingFields==nullptr) { - std::ostringstream msg; - msg << "ERROR: Input field processing could not find " << num_missing_fields << " fields.\n"; - throw std::runtime_error( msg.str() ); - } - - add_missing_fields(missingFields, missing_fields_collector); - } - double InputFile::map_analysis_to_db_time(double time) const { double db_time = time; @@ -773,15 +381,16 @@ namespace io { std::sort(m_fields.begin(), m_fields.end(), meshFieldSort); bool ignore_missing_fields = (missingFields != nullptr); + Ioss::Region *region = m_region.get(); if (!m_fieldsInitialized) { - std::vector::iterator I = m_fields.begin(); - while (I != m_fields.end()) { - (*I).set_inactive(); ++I; - } + InputQuery iq(*region, bulk.mesh_meta_data(), m_db_purpose, m_multiStateSuffixes); - build_field_part_associations(bulk, missingFields); - build_field_part_associations_from_grouping_entity(bulk, missingFields); + for (stk::io::MeshField& mf : m_fields) { + mf.set_inactive(); + iq.build_field_part_associations(mf, missingFields); + iq.build_field_part_associations_from_grouping_entity(mf, missingFields); + } m_fieldsInitialized = true; } @@ -796,7 +405,6 @@ namespace io { STK_ThrowErrorMsgIf (m_region.get() == nullptr, "ERROR: There is no Input mesh/restart region associated with this Mesh Data."); - Ioss::Region *region = m_region.get(); // Get struct containing interval of database time(s) containing 'time' DBStepTimeInterval sti(region, db_time); @@ -859,13 +467,15 @@ namespace io { bool ignore_missing_fields = (missingFields != nullptr); + if (!m_fieldsInitialized) { + InputQuery iq(*region, bulk.mesh_meta_data(), m_db_purpose, m_multiStateSuffixes); + for (auto & meshField : m_fields) { meshField.set_inactive(); + iq.build_field_part_associations(meshField, missingFields); } - build_field_part_associations(bulk, missingFields); - m_fieldsInitialized = true; } @@ -903,5 +513,14 @@ namespace io { return time_read; } + void InputFile::initialize_input_fields() + { + for (auto & meshField : m_fields) { + meshField.set_inactive(); + meshField.clear_field_parts(); + } + + m_fieldsInitialized = false; + } } } diff --git a/packages/stk/stk_io/stk_io/InputFile.hpp b/packages/stk/stk_io/stk_io/InputFile.hpp index fe0e5c3ee239..a8c7d37e3e0f 100644 --- a/packages/stk/stk_io/stk_io/InputFile.hpp +++ b/packages/stk/stk_io/stk_io/InputFile.hpp @@ -87,10 +87,6 @@ class Part; stk::mesh::BulkData &bulk, bool useEntityListCache = false); void get_global_variable_names(std::vector &names); - void build_field_part_associations(stk::mesh::BulkData &bulk, std::vector *missing); - - void build_field_part_associations_from_grouping_entity(stk::mesh::BulkData &bulk, std::vector *missingFields); - std::shared_ptr get_input_ioss_region() { if (m_region.get() == nullptr && m_database.get() != nullptr) { @@ -138,22 +134,22 @@ class Part; return true; } + const std::vector& get_multistate_suffixes() const + { + static std::vector emptyVector; + + if(nullptr != m_multiStateSuffixes) { + return *m_multiStateSuffixes; + } + + return emptyVector; + } + + DatabasePurpose get_database_purpose() const { return m_db_purpose; } + + void initialize_input_fields(); + private: - bool process_fields_for_grouping_entity(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields_collector_ptr = nullptr); - - bool build_field_part_associations(stk::io::MeshField &mesh_field, - const stk::mesh::Part &part, - const stk::mesh::EntityRank rank, - Ioss::GroupingEntity *io_entity, - std::map *missing_fields = nullptr); - - void build_field_part_associations_for_part(Ioss::Region *region, - const stk::mesh::FieldBase *f, - const stk::mesh::Part * part, - stk::io::MeshField &mf); DatabasePurpose m_db_purpose; std::shared_ptr m_database; diff --git a/packages/stk/stk_io/stk_io/InputQuery.cpp b/packages/stk/stk_io/stk_io/InputQuery.cpp new file mode 100644 index 000000000000..6d865a2024bf --- /dev/null +++ b/packages/stk/stk_io/stk_io/InputQuery.cpp @@ -0,0 +1,514 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +// ####################### Start Clang Header Tool Managed Headers ######################## +// clang-format off +#include +#include // for exception +#include // for copy, sort, max, find +#include // for fmod +#include // for size_t +#include // for operator<<, basic_ostream +#include // for numeric_limits +#include // for runtime_error +#include // for READ_RESTART, Database... +#include // for DBStepTimeInterval +#include +#include // for is_part_io_part, all_f... +#include // for MeshField, MeshField::... +#include +#include // for FieldBase, FieldBase::... +#include // for find_restriction +#include // for MetaData +#include // for filename_substitution +#include "stk_util/environment/RuntimeWarning.hpp" // for RuntimeWarningAdHoc +#include // for ThrowErrorMsgIf, Throw... +#include // for move, pair +#include "Ioss_DBUsage.h" // for DatabaseUsage, READ_MODEL +#include "Ioss_DatabaseIO.h" // for DatabaseIO +#include "Ioss_EntityType.h" // for SIDESET, EntityType +#include "Ioss_Field.h" // for Field, Field::TRANSIENT +#include "Ioss_GroupingEntity.h" // for GroupingEntity +#include "Ioss_IOFactory.h" // for IOFactory +#include "Ioss_MeshType.h" // for MeshType, MeshType::UN... +#include "Ioss_NodeBlock.h" // for NodeBlock +#include "Ioss_NodeSet.h" // for NodeSet +#include "Ioss_Property.h" // for Property +#include "Ioss_Region.h" // for Region, NodeBlockConta... +#include "Ioss_SideBlock.h" // for SideBlock +#include "Ioss_SideSet.h" // for SideSet +#include "StkIoUtils.hpp" // for part_primary_entity_rank +#include "stk_mesh/base/BulkData.hpp" // for BulkData +#include "stk_mesh/base/FieldState.hpp" // for FieldState +#include "stk_mesh/base/Part.hpp" // for Part +#include "stk_mesh/base/Types.hpp" // for PartVector, EntityRank +#include "stk_topology/topology.hpp" // for topology, topology::NO... +// clang-format on +// ####################### End Clang Header Tool Managed Headers ######################## + +namespace { +void add_missing_fields(std::vector *missingFields, + stk::io::MissingFieldMap& missingFieldsCollector) +{ + if (nullptr != missingFields) { + std::vector discoveredMissingFields; + for (auto missingStatedFieldIter : missingFieldsCollector) + { + discoveredMissingFields.push_back(stk::io::MeshField(missingStatedFieldIter.first, + missingStatedFieldIter.second->db_name())); + } + std::sort(discoveredMissingFields.begin(), discoveredMissingFields.end(), + [](const stk::io::MeshField &a, const stk::io::MeshField &b) { + return (a.db_name() < b.db_name()) + || ((a.db_name() == b.db_name()) && (a.field()->name() < b.field()->name())); }); + + for(stk::io::MeshField &missingField : *missingFields) { + std::vector::iterator iter = std::find(discoveredMissingFields.begin(), discoveredMissingFields.end(), missingField); + if(iter != discoveredMissingFields.end()) { + discoveredMissingFields.erase(iter); + } + } + + missingFields->insert(missingFields->end(), discoveredMissingFields.begin(), discoveredMissingFields.end()); + } +} +} + +namespace stk { +namespace io { + + InputQuery::InputQuery(const Ioss::Region& region, + const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, + const std::vector* multiStateSuffixes) + : m_region(region), + m_meta(meta), + m_dbPurpose(dbPurpose), + m_multiStateSuffixes(multiStateSuffixes) + { + } + + bool InputQuery::build_field_part_associations(stk::io::MeshField &meshField, + const stk::mesh::Part &part, + const stk::mesh::EntityRank rank, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFieldsCollector) + { + bool fieldIsMissing = false; + stk::mesh::FieldBase *f = meshField.field(); + // Only add TRANSIENT Fields -- check role; if not present assume transient... + const Ioss::Field::RoleType *role = stk::io::get_field_role(*f); + if (role == nullptr || *role == Ioss::Field::TRANSIENT) { + if (stk::io::is_field_on_part(f, rank, part)) { + const stk::mesh::FieldBase::Restriction &res = stk::mesh::find_restriction(*f, rank, part); + FieldType fieldType; + stk::io::get_io_field_type(f, res, &fieldType); + + if (fieldType.type != Ioss::Field::INVALID) { + const std::string &dbName = meshField.db_name(); + unsigned numStates = f->number_of_states(); + std::vector missingStates; + if (numStates > 1) { + bool hasAllStates = all_field_states_exist_on_io_entity(dbName, f, ioEntity, missingStates, m_multiStateSuffixes); + if(hasAllStates == false) { + fieldIsMissing = true; + if (missingFieldsCollector) { + for (stk::mesh::FieldState missingState : missingStates) + (*missingFieldsCollector)[f->field_state(missingState)] = &meshField; + } + } + } + + bool fieldExists = ioEntity->field_exists(dbName); + if (!fieldExists) { + fieldIsMissing = true; + if (missingFieldsCollector) { + (*missingFieldsCollector)[f] = &meshField; + } + } + + // See if field with that name exists on ioEntity... + if (fieldExists) { + meshField.add_part(rank, part, ioEntity); + meshField.set_single_state((m_dbPurpose == stk::io::READ_RESTART) ? false : true); + meshField.set_active(); + } + } + } + } + return fieldIsMissing; + } + + int InputQuery::build_field_part_associations(stk::io::MeshField& mf, + std::vector *missingFields, + const bool throwOnErrorMessage) + { + MissingFieldMap missingFieldsCollector; + MissingFieldMap *missingFieldsCollectorPtr = (missingFields ? &missingFieldsCollector : nullptr); + + // Each input field will have a list of the Parts that the field exists on... + // Create this list. + int numMissingFields = 0; + // First handle any fields that are sub-setted (restricted to a specified list of parts) + + const stk::mesh::FieldBase *f = mf.field(); + + for (const stk::mesh::Part *part : mf.m_subsetParts) { + stk::mesh::EntityRank rank = part_primary_entity_rank(*part); + bool fieldIsMissing = false; + + if (f->entity_rank() == rank) { + Ioss::GroupingEntity *ioEntity = m_region.get_entity(part->name()); + STK_ThrowErrorMsgIf( ioEntity == nullptr, + "ERROR: For field '" << + mf.field()->name() << + "' Could not find database entity corresponding to the part named '" << + part->name() << "'."); + fieldIsMissing |= build_field_part_associations(mf, *part, rank, ioEntity, missingFieldsCollectorPtr); + } + + // If rank is != NODE_RANK, then see if field is defined on the nodes of this part + if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = nullptr; + std::string nodesName = part->name() + "_nodes"; + nodeEntity = m_region.get_entity(nodesName); + if (nodeEntity == nullptr) { + nodeEntity = m_region.get_entity("nodeblock_1"); + } + if (nodeEntity != nullptr) { + fieldIsMissing |= build_field_part_associations(mf, *part, stk::topology::NODE_RANK, nodeEntity, + missingFieldsCollectorPtr); + } + } + + if (fieldIsMissing) { + ++numMissingFields; + } + } + + + // Now handle the non-subsetted fields... + + // Check universal_part() NODE_RANK first... + if (mf.m_subsetParts.empty()) { + if (f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = m_region.get_node_blocks()[0]; + bool fieldIsMissing = build_field_part_associations(mf, m_meta.universal_part(), stk::topology::NODE_RANK, + nodeEntity, missingFieldsCollectorPtr); + if (fieldIsMissing) { + ++numMissingFields; + } + } + } + + // Now handle all non-nodeblock parts... + for ( stk::mesh::Part * const part : m_meta.get_parts()) { + // Check whether this part is an input part... + if (stk::io::is_part_io_part(*part)) { + stk::mesh::EntityRank rank = part_primary_entity_rank(*part); + // Get Ioss::GroupingEntity corresponding to this part... + Ioss::GroupingEntity *entity = m_region.get_entity(part->name()); + if (entity != nullptr && entity->type() != Ioss::SIDESET) { + + if (mf.m_subsetParts.empty()) { + f = mf.field(); + bool fieldIsMissing = false; + if (f->entity_rank() == rank) { + fieldIsMissing |= build_field_part_associations(mf, *part, rank, entity, missingFieldsCollectorPtr); + } + + // If rank is != NODE_RANK, then see if field is defined on the nodes of this part + if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = nullptr; + std::string nodesName = part->name() + "_nodes"; + nodeEntity = m_region.get_entity(nodesName); + if (nodeEntity == nullptr) { + nodeEntity = m_region.get_entity("nodeblock_1"); + } + if (nodeEntity != nullptr) { + fieldIsMissing |= build_field_part_associations(mf, *part, stk::topology::NODE_RANK, nodeEntity, + missingFieldsCollectorPtr); + } + } + + if (fieldIsMissing) { + ++numMissingFields; + } + } + } + } + } + + if (numMissingFields > 0 && missingFields==nullptr && throwOnErrorMessage) { + std::ostringstream msg; + msg << "ERROR: Input field processing could not find " << numMissingFields << " fields.\n"; + throw std::runtime_error( msg.str() ); + } + + add_missing_fields(missingFields, missingFieldsCollector); + + return numMissingFields; + } + + bool InputQuery::process_fields_for_grouping_entity(stk::io::MeshField &mf, + const stk::mesh::Part &part, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFieldsCollectorPtr) + { + STK_ThrowRequireMsg(ioEntity != nullptr, "Null IO entity"); + + bool doesFieldExist = false; + + stk::mesh::FieldBase *f = mf.field(); + + stk::mesh::EntityRank rank = part_primary_entity_rank(part); + if(f->entity_rank() == rank) { + const std::string &dbName = mf.db_name(); + unsigned numStates = f->number_of_states(); + std::vector missingStates; + if (numStates > 1) { + bool hasAllStates = all_field_states_exist_on_io_entity(dbName, f, ioEntity, missingStates, m_multiStateSuffixes); + if(hasAllStates == false) { + if (missingFieldsCollectorPtr) { + for (stk::mesh::FieldState missingState : missingStates) { + (*missingFieldsCollectorPtr)[f->field_state(missingState)] = &mf; + } + } + } else { + doesFieldExist = true; + } + } + + if(doesFieldExist == false) { + doesFieldExist = ioEntity->field_exists(dbName); + if (!doesFieldExist) { + if (missingFieldsCollectorPtr) { + (*missingFieldsCollectorPtr)[f] = &mf; + } + } + } + + // See if field with that name exists on ioEntity... + if (doesFieldExist) { + mf.add_part(f->entity_rank(), part, ioEntity); + mf.set_single_state((m_dbPurpose == stk::io::READ_RESTART) ? false : true); + mf.set_active(); + } + } + + return doesFieldExist; + } + + int InputQuery::build_field_part_associations_from_grouping_entity(stk::io::MeshField& mf, + std::vector *missingFields, + const bool throwOnErrorMessage) + { + int numMissingFields = 0; + + if(mf.is_active()) { + return numMissingFields; + } + + MissingFieldMap missingFieldCollector; + bool doesFieldExist = false; + stk::mesh::Part &universalPart = m_meta.universal_part(); + Ioss::GroupingEntity * universalNodeEntity = m_region.get_entity("nodeblock_1"); + + doesFieldExist |= process_fields_for_grouping_entity(mf, universalPart, universalNodeEntity, &missingFieldCollector); + + for ( stk::mesh::Part * const part : m_meta.get_parts() ) { + // Check whether this part is an input part... + if (stk::io::is_part_io_part(*part)) { + // Get Ioss::GroupingEntity corresponding to this part... + Ioss::GroupingEntity *ioEntity = m_region.get_entity(part->name()); + + if(ioEntity == nullptr) { + continue; + } + + doesFieldExist |= process_fields_for_grouping_entity(mf, *part, ioEntity, &missingFieldCollector); + + if(ioEntity->type() == Ioss::SIDEBLOCK || ioEntity->type() == Ioss::SIDESET) { + static const std::string s_nodeset_suffix("_n"); + + std::string nsName = part->name(); + nsName += s_nodeset_suffix; + Ioss::NodeSet *ioNodeSet = m_region.get_nodeset(nsName); + if(ioNodeSet != nullptr) { + // Process hidden nodesets + doesFieldExist |= process_fields_for_grouping_entity(mf, *part, ioNodeSet, &missingFieldCollector); + } + } + + if(ioEntity->type() == Ioss::SIDESET) { + Ioss::SideSet* sideSet = dynamic_cast(ioEntity); + auto faceBlocks = sideSet->get_side_blocks(); + for (auto faceBlock : faceBlocks) { + doesFieldExist |= process_fields_for_grouping_entity(mf, *part, faceBlock, &missingFieldCollector); + } + } + } + } + + if (!doesFieldExist) { + numMissingFields += missingFieldCollector.size(); + if (nullptr != missingFields) { + add_missing_fields(missingFields, missingFieldCollector); + } + else { + for (auto missingField : missingFieldCollector) { + std::cout << "Missing field: " << missingField.second->db_name() << std::endl; + } + } + } + + if (numMissingFields > 0 && missingFields==nullptr && throwOnErrorMessage) { + std::ostringstream msg; + msg << "ERROR: Input field processing could not find " << numMissingFields << " fields.\n"; + throw std::runtime_error( msg.str() ); + } + + return numMissingFields; + } + + void InputQuery::build_field_part_associations_for_part(stk::io::MeshField &mf, const stk::mesh::Part * part) + { + stk::mesh::FieldBase *f = mf.field(); + stk::mesh::EntityRank rank = part_primary_entity_rank(*part); + // Get Ioss::GroupingEntity corresponding to this part... + Ioss::GroupingEntity *entity = m_region.get_entity(part->name()); + + if (entity != nullptr) { + if (f->entity_rank() == rank) { + build_field_part_associations(mf, *part, rank, entity); + process_fields_for_grouping_entity(mf, *part, entity); + + if(entity->type() == Ioss::SIDESET) { + auto io_side_set = dynamic_cast(entity); + STK_ThrowRequire(io_side_set != nullptr); + auto fbs = io_side_set->get_side_blocks(); + + for(auto& io_fblock : fbs) { + build_field_part_associations(mf, *part, rank, io_fblock); + process_fields_for_grouping_entity(mf, *part, io_fblock); + } + } + } + + // If rank is != NODE_RANK, then see if field is defined on the nodes of this part + if (rank != stk::topology::NODE_RANK && f->entity_rank() == stk::topology::NODE_RANK) { + Ioss::GroupingEntity *nodeEntity = nullptr; + std::string nodes_name = part->name() + "_nodes"; + + nodeEntity = m_region.get_entity(nodes_name); + + if (nodeEntity == nullptr) { + nodes_name = part->name() + "_n"; + nodeEntity = m_region.get_entity(nodes_name); + } + + if (nodeEntity == nullptr) { + nodeEntity = m_region.get_entity("nodeblock_1"); + } + if (nodeEntity != nullptr) { + build_field_part_associations(mf, *part, stk::topology::NODE_RANK, nodeEntity); + process_fields_for_grouping_entity(mf, *part, nodeEntity); + } + } + } + } + + bool verify_field_request(const Ioss::Region& region, const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, const std::vector& multiStateSuffixes, + const stk::io::MeshField &meshField, bool printWarning) + { + stk::io::InputQuery iq(region, meta, dbPurpose, (multiStateSuffixes.empty() ? nullptr : &multiStateSuffixes)); + + stk::io::MeshField mf(meshField.field(), meshField.db_name()); + std::vector missingFields; + + iq.build_field_part_associations(mf, &missingFields, false); + iq.build_field_part_associations_from_grouping_entity(mf, &missingFields, false); + + if(missingFields.size() > 0) { + std::ostringstream oss; + oss << "For input IO field: " + << meshField.db_name() + << " the following associated fields for the requested STK field: " + << meshField.field()->name() + << " of rank: " + << meshField.field()->entity_rank() + << ", are missing in database: " + << region.get_database()->get_filename() + << std::endl; + + for(auto & missingField : missingFields) { + oss << "\t" << missingField.field()->name() << std::endl; + } + + stk::RuntimeWarning() << oss.str(); + } + + return mf.is_active(); + } + + bool verify_field_request(const StkMeshIoBroker &broker, const MeshField &meshField, bool printWarning) + { + auto region = broker.get_input_ioss_region(); + if(!region) { + if(printWarning) { + stk::RuntimeWarning() << "Broker has no input Ioss::Region" << std::endl; + } + + return false; + } + + if(broker.is_meta_data_null()) { + if(printWarning) { + stk::RuntimeWarning() << "Broker has no stk::mesh::MetaData defined" << std::endl; + } + + return false; + } + + const stk::mesh::MetaData &meta = broker.meta_data(); + InputFile& inputFile = broker.get_mesh_database(broker.get_active_mesh()); + + return verify_field_request(*region, meta, inputFile.get_database_purpose(), + inputFile.get_multistate_suffixes(), + meshField, printWarning); + } +} +} + diff --git a/packages/stk/stk_io/stk_io/InputQuery.hpp b/packages/stk/stk_io/stk_io/InputQuery.hpp new file mode 100644 index 000000000000..dfc9128068e1 --- /dev/null +++ b/packages/stk/stk_io/stk_io/InputQuery.hpp @@ -0,0 +1,115 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_STK_IO_STK_IO_INPUTQUERY_HPP_ +#define STK_STK_IO_STK_IO_INPUTQUERY_HPP_ + +#include +#include // for DatabasePurpose +#include +#include +#include "Ioss_EntityType.h" + +namespace Ioss { +class PropertyManager; +class GroupingEntity; +class Region; +class DatabaseIO; +} + +namespace stk { +namespace mesh { +class MetaData; +class BulkData; +class Part; +} + +namespace io { +class StkMeshIoBroker; + +using MissingFieldMap = std::map; + +class InputQuery +{ +public: + InputQuery(const Ioss::Region& region, + const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, + const std::vector* multiStateSuffixes = nullptr); + + ~InputQuery() { } + + int build_field_part_associations(stk::io::MeshField& mf, + std::vector *missingFields = nullptr, + const bool throwOnErrorMessage = true); + + int build_field_part_associations_from_grouping_entity(stk::io::MeshField& mf, + std::vector *missingFields = nullptr, + const bool throwOnErrorMessage = true); + + void build_field_part_associations_for_part(stk::io::MeshField &mf, const stk::mesh::Part * part); + + bool process_fields_for_grouping_entity(stk::io::MeshField &mf, + const stk::mesh::Part &part, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFieldsCollectorPtr = nullptr); + + bool build_field_part_associations(stk::io::MeshField &mesh_field, + const stk::mesh::Part &part, + const stk::mesh::EntityRank rank, + Ioss::GroupingEntity *ioEntity, + MissingFieldMap *missingFields = nullptr); + +private: + const Ioss::Region& m_region; + const stk::mesh::MetaData& m_meta; + DatabasePurpose m_dbPurpose; + const std::vector* m_multiStateSuffixes = nullptr; +}; + +bool verify_field_request(const StkMeshIoBroker &broker, + const stk::io::MeshField &meshField, + bool printWarning = true); + +bool verify_field_request(const Ioss::Region& region, + const stk::mesh::MetaData& meta, + const DatabasePurpose dbPurpose, + const std::vector& multiStateSuffixes, + const stk::io::MeshField &meshField, + bool printWarning = true); + +} +} + +#endif /* STK_STK_IO_STK_IO_INPUTQUERY_HPP_ */ diff --git a/packages/stk/stk_io/stk_io/IossBridge.cpp b/packages/stk/stk_io/stk_io/IossBridge.cpp index a69dc2a801f3..0f72a3976e68 100644 --- a/packages/stk/stk_io/stk_io/IossBridge.cpp +++ b/packages/stk/stk_io/stk_io/IossBridge.cpp @@ -1280,7 +1280,7 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta return stk::topology::SHELL_TRI_6_ALL_FACE_SIDES; } else if (name == "shell4") { return stk::topology::SHELL_QUAD_4_ALL_FACE_SIDES; - } else if (name == "shel8") { + } else if (name == "shell8") { return stk::topology::SHELL_QUAD_8_ALL_FACE_SIDES; } else if (name == "shell9") { return stk::topology::SHELL_QUAD_9_ALL_FACE_SIDES; @@ -1837,7 +1837,8 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta } } - const std::string get_suffix_for_field_at_state(enum stk::mesh::FieldState fieldState, std::vector* multiStateSuffixes) + const std::string get_suffix_for_field_at_state(enum stk::mesh::FieldState fieldState, + const std::vector* multiStateSuffixes) { if(nullptr != multiStateSuffixes) { STK_ThrowRequireMsg((multiStateSuffixes->size() >= fieldState), @@ -1872,26 +1873,27 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta } std::string get_stated_field_name(const std::string &fieldBaseName, stk::mesh::FieldState stateIdentifier, - std::vector* multiStateSuffixes) + const std::vector* multiStateSuffixes) { std::string field_name_with_suffix = fieldBaseName + get_suffix_for_field_at_state(stateIdentifier, multiStateSuffixes); return field_name_with_suffix; } bool field_state_exists_on_io_entity(const std::string& dbName, const stk::mesh::FieldBase* field, stk::mesh::FieldState stateIdentifier, - Ioss::GroupingEntity *ioEntity, std::vector* multiStateSuffixes) + Ioss::GroupingEntity *ioEntity, const std::vector* multiStateSuffixes) { std::string fieldNameWithSuffix = get_stated_field_name(dbName, stateIdentifier, multiStateSuffixes); return ioEntity->field_exists(fieldNameWithSuffix); } - bool all_field_states_exist_on_io_entity(const std::string& dbName, const stk::mesh::FieldBase* field, Ioss::GroupingEntity *ioEntity, - std::vector &missingStates, std::vector* inputMultiStateSuffixes) + bool all_field_states_exist_on_io_entity(const std::string& dbName, const stk::mesh::FieldBase* field, + Ioss::GroupingEntity *ioEntity, std::vector &missingStates, + const std::vector* inputMultiStateSuffixes) { bool allStatesExist = true; size_t stateCount = field->number_of_states(); - std::vector* multiStateSuffixes = stateCount > 2 ? inputMultiStateSuffixes : nullptr; + const std::vector* multiStateSuffixes = stateCount > 2 ? inputMultiStateSuffixes : nullptr; if(nullptr != multiStateSuffixes) { STK_ThrowRequire(multiStateSuffixes->size() >= stateCount); @@ -4415,6 +4417,5 @@ const stk::mesh::FieldBase *declare_stk_field_internal(stk::mesh::MetaData &meta filter_nodes_by_local_connectivity(bulk, params.get_subset_selector(), nodes); } - }//namespace io }//namespace stk diff --git a/packages/stk/stk_io/stk_io/IossBridge.hpp b/packages/stk/stk_io/stk_io/IossBridge.hpp index 0198587b9f04..9a4fa424c936 100644 --- a/packages/stk/stk_io/stk_io/IossBridge.hpp +++ b/packages/stk/stk_io/stk_io/IossBridge.hpp @@ -372,13 +372,14 @@ void delete_selector_property(Ioss::Region &io_region); void delete_selector_property(Ioss::GroupingEntity *io_entity); std::string get_stated_field_name(const std::string &field_base_name, stk::mesh::FieldState state_identifier, - std::vector* multiStateSuffixes=nullptr); + const std::vector* multiStateSuffixes=nullptr); bool field_state_exists_on_io_entity(const std::string& db_name, const stk::mesh::FieldBase* field, stk::mesh::FieldState state_identifier, - Ioss::GroupingEntity *io_entity, std::vector* multiStateSuffixes=nullptr); + Ioss::GroupingEntity *io_entity, const std::vector* multiStateSuffixes=nullptr); -bool all_field_states_exist_on_io_entity(const std::string& db_name, const stk::mesh::FieldBase* field, Ioss::GroupingEntity *io_entity, - std::vector &missing_states, std::vector* multiStateSuffixes=nullptr); +bool all_field_states_exist_on_io_entity(const std::string& db_name, const stk::mesh::FieldBase* field, + Ioss::GroupingEntity *io_entity, std::vector &missing_states, + const std::vector* multiStateSuffixes=nullptr); void multistate_field_data_from_ioss(const stk::mesh::BulkData& mesh, const stk::mesh::FieldBase *field, diff --git a/packages/stk/stk_io/stk_io/MeshField.cpp b/packages/stk/stk_io/stk_io/MeshField.cpp index 77d92f5d39cc..bb4e1dd75a76 100644 --- a/packages/stk/stk_io/stk_io/MeshField.cpp +++ b/packages/stk/stk_io/stk_io/MeshField.cpp @@ -302,6 +302,11 @@ double MeshField::restore_field_data(stk::mesh::BulkData &bulk, return time_read; } +void MeshField::clear_field_parts() +{ + m_fieldParts.clear(); +} + void MeshFieldPart::release_field_data() { m_preStep = 0; diff --git a/packages/stk/stk_io/stk_io/MeshField.hpp b/packages/stk/stk_io/stk_io/MeshField.hpp index 10f320e17f30..5f38142c4c75 100644 --- a/packages/stk/stk_io/stk_io/MeshField.hpp +++ b/packages/stk/stk_io/stk_io/MeshField.hpp @@ -51,6 +51,7 @@ namespace stk { namespace mesh { class Part; } } // clang-format on // ####################### End Clang Header Tool Managed Headers ######################## namespace stk { namespace io { class InputFile; } } +namespace stk { namespace io { class InputQuery; } } namespace stk { namespace io { @@ -91,6 +92,7 @@ class MeshField public: friend class InputFile; + friend class InputQuery; // Options: // * Frequency: @@ -165,6 +167,8 @@ class MeshField bool field_restored() const {return m_fieldRestored;} double time_restored() const {return m_timeRestored;} + void clear_field_parts(); + private: MeshField(); diff --git a/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp b/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp index f50450bca6e2..6577523e3730 100644 --- a/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp +++ b/packages/stk/stk_io/stk_io/ProcessSetsOrBlocks.cpp @@ -112,7 +112,6 @@ void process_surface_entity(Ioss::SideSet *sset, stk::mesh::MetaData &meta) STKIORequire(ss_part != nullptr); stk::mesh::FieldBase *distribution_factors_field = nullptr; - bool surface_df_defined = false; // Has the surface df field been defined yet? size_t block_count = sset->block_count(); for (size_t i=0; i < block_count; i++) { @@ -125,13 +124,14 @@ void process_surface_entity(Ioss::SideSet *sset, stk::mesh::MetaData &meta) } if (sb->field_exists("distribution_factors")) { - if (!surface_df_defined) { - stk::topology::rank_t side_rank = static_cast(stk::io::part_primary_entity_rank(*sb_part)); - std::string field_name = sset->name() + "_df"; - distribution_factors_field = &meta.declare_field(side_rank, field_name); + stk::topology::rank_t side_block_rank = static_cast(stk::io::part_primary_entity_rank(*sb_part)); + std::string field_name = sset->name() + "_df"; + distribution_factors_field = meta.get_field(side_block_rank, field_name); + + if (distribution_factors_field == nullptr) { + distribution_factors_field = &meta.declare_field(side_block_rank, field_name); stk::io::set_field_role(*distribution_factors_field, Ioss::Field::MESH); stk::io::set_distribution_factor_field(*ss_part, *distribution_factors_field); - surface_df_defined = true; } stk::io::set_distribution_factor_field(*sb_part, *distribution_factors_field); int side_node_count = sb->topology()->number_nodes(); diff --git a/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp b/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp index fb9f5e4438eb..76e427b1a2b6 100644 --- a/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp +++ b/packages/stk/stk_io/stk_io/StkMeshIoBroker.cpp @@ -372,7 +372,7 @@ std::shared_ptr StkMeshIoBroker::get_input_ioss_region() const } } -InputFile &StkMeshIoBroker::get_mesh_database(size_t input_file_index) +InputFile &StkMeshIoBroker::get_mesh_database(size_t input_file_index) const { validate_input_file_index(input_file_index); return *m_inputFiles[input_file_index]; @@ -1275,6 +1275,12 @@ bool StkMeshIoBroker::get_throw_on_missing_input_fields() const void StkMeshIoBroker::set_enable_all_face_sides_shell_topo(bool flag) { m_enableAllFaceSidesShellTopo = flag; + if (m_inputFiles.size() > m_activeMeshIndex) { + Ioss::Region *region = m_inputFiles[m_activeMeshIndex]->get_input_ioss_region().get(); + if (nullptr != region) { + region->property_add(Ioss::Property("ENABLE_ALL_FACE_SIDES_SHELL", "YES")); + } + } } bool StkMeshIoBroker::get_enable_all_face_sides_shell_topo() const diff --git a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp index f58f08df3a40..f7164f11ad61 100644 --- a/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp +++ b/packages/stk/stk_io/stk_io/StkMeshIoBroker.hpp @@ -287,8 +287,8 @@ namespace stk { // Get a reference to an existing mesh database so it can be modified // Typical modifications deal with - // times: tart/stop/offset/scale/cycle/periodlength. - InputFile &get_mesh_database(size_t input_file_index); + // times: start/stop/offset/scale/cycle/periodlength. + InputFile &get_mesh_database(size_t input_file_index) const; // Remove the specified mesh database from the list of mesh databases. // All files associated with the mesh database will be closed and destructors diff --git a/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp b/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp index 4eb2e69024ce..a10eb53b61b7 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Bucket.cpp @@ -245,7 +245,8 @@ unsigned get_default_bucket_capacity() { return impl::default_maximum_bucket_cap unsigned get_default_initial_bucket_capacity() { return impl::default_initial_bucket_capacity; } unsigned get_default_maximum_bucket_capacity() { return impl::default_maximum_bucket_capacity; } -bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ) +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Jan 1, 2025 +STK_DEPRECATED bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ) { bool result = true ; { @@ -259,21 +260,19 @@ bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ) } inline -bool bucket_key_less( const unsigned * lhs , const unsigned * rhs ) +bool bucket_key_less( const OrdinalVector& lhs , const OrdinalVector& rhs ) { - const unsigned * const last_lhs = lhs + ( *lhs < *rhs ? *lhs : *rhs ); - while ( last_lhs != lhs && *lhs == *rhs ) { ++lhs ; ++rhs ; } - return *lhs < *rhs ; + return lhs < rhs; } -// The part count and part ordinals are less bool BucketLess::operator()( const Bucket * lhs_bucket , - const unsigned * rhs ) const -{ return bucket_key_less( lhs_bucket->key() , rhs ); } + const OrdinalVector& rhs ) const +{ return bucket_key_less( lhs_bucket->key_vector() , rhs ); } -bool BucketLess::operator()( const unsigned * lhs , +bool BucketLess::operator()( const OrdinalVector& lhs , const Bucket * rhs_bucket ) const -{ return bucket_key_less( lhs , rhs_bucket->key() ); } +{ return bucket_key_less( lhs , rhs_bucket->key_vector() ); } +#endif //---------------------------------------------------------------------- @@ -287,7 +286,7 @@ Bucket::Bucket(BulkData & mesh, m_entity_rank(entityRank), m_topology(), m_key(key), - m_partOrdsBeginEnd(m_key.data()+1,m_key.data()+1+m_key[0]), + m_partOrdsBeginEnd(m_key.data(),m_key.data()+m_key.size()), m_capacity(initialCapacity), m_maxCapacity(maximumCapacity), m_size(0), @@ -323,7 +322,7 @@ Bucket::Bucket(BulkData & mesh, setup_connectivity(m_topology, entityRank, stk::topology::FACE_RANK, m_face_kind, m_fixed_face_connectivity); setup_connectivity(m_topology, entityRank, stk::topology::ELEMENT_RANK, m_element_kind, m_fixed_element_connectivity); - m_parts.reserve(m_key.size()-1); + m_parts.reserve(m_key.size()); supersets(m_parts); m_mesh.new_bucket_callback(m_entity_rank, m_parts, m_capacity, this); @@ -654,20 +653,13 @@ unsigned Bucket::get_ngp_field_bucket_is_modified(unsigned fieldOrdinal) const void Bucket::reset_part_ord_begin_end() { - m_partOrdsBeginEnd.first = m_key.data()+1; - m_partOrdsBeginEnd.second = m_key.data()+1+m_key[0]; + m_partOrdsBeginEnd.first = m_key.data(); + m_partOrdsBeginEnd.second = m_key.data()+m_key.size(); } void Bucket::reset_bucket_key(const OrdinalVector& newPartOrdinals) { - unsigned newPartCount = newPartOrdinals.size(); - - m_key.resize(newPartCount + 1); - m_key[0] = newPartCount; - - for(unsigned i = 0; i < newPartCount; i++) { - m_key[i+1] = newPartOrdinals[i]; - } + m_key = newPartOrdinals; } void Bucket::reset_bucket_parts(const OrdinalVector& newPartOrdinals) @@ -748,6 +740,10 @@ bool Bucket::destroy_relation(Entity e_from, Entity e_to, const RelationIdentifi DestroyRelationFunctor functor(from_bucket_ordinal, e_to, static_cast(local_id)); modify_connectivity(functor, m_mesh.entity_rank(e_to)); + if (functor.m_modified) { + mark_for_modification(); + } + return functor.m_modified; } @@ -756,6 +752,10 @@ bool Bucket::declare_relation(unsigned bucket_ordinal, Entity e_to, const Connec DeclareRelationFunctor functor(bucket_ordinal, e_to, ordinal, permutation); modify_connectivity(functor, m_mesh.entity_rank(e_to)); + if (functor.m_modified) { + mark_for_modification(); + } + return functor.m_modified; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp b/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp index 423d3c07eefc..7dfa4941326a 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Bucket.hpp @@ -91,8 +91,10 @@ std::ostream & operator << ( std::ostream & , const Bucket & ); std::ostream & print( std::ostream & , const std::string & indent , const Bucket & ); +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Jan 1, 2025 // The part count and parts are equal -bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ); +STK_DEPRECATED bool raw_part_equal( const unsigned * lhs , const unsigned * rhs ); +#endif #define CONNECTIVITY_TYPE_SWITCH(entity_kind, fixed_func_sig, dynamic_func_sig, check_invalid) \ switch(entity_kind) { \ @@ -220,9 +222,9 @@ class Bucket std::pair superset_part_ordinals() const { return m_partOrdsBeginEnd; } -#ifndef DOXYGEN_COMPILE + const std::vector & key_vector() const { return m_key; } + const unsigned * key() const { return m_key.data() ; } -#endif /* DOXYGEN_COMPILE */ /** \brief The allocation size, in bytes, of this bucket */ unsigned allocation_size() const { return 0 ; } @@ -460,8 +462,6 @@ class Bucket unsigned maximumCapacity, unsigned bucketId); - const std::vector & key_vector() const { return m_key; } - // Add a new entity to end of bucket void add_entity(Entity entity = Entity()); @@ -598,16 +598,16 @@ bool has_superset( const Bucket & bucket , const Part & p ) */ bool has_superset( const Bucket & bucket , const PartVector & parts ); - -struct BucketLess { - bool operator()( const Bucket * lhs_bucket , const unsigned * rhs ) const ; - bool operator()( const unsigned * lhs , const Bucket * rhs_bucket ) const ; +#ifndef STK_HIDE_DEPRECATED_CODE // Delete after Jan 1 2025 +struct STK_DEPRECATED BucketLess { + bool operator()( const Bucket * lhs_bucket , const OrdinalVector& rhs ) const ; + bool operator()( const OrdinalVector& lhs , const Bucket * rhs_bucket ) const ; }; -inline -BucketVector::iterator -lower_bound( BucketVector & v , const unsigned * key ) +STK_DEPRECATED inline BucketVector::iterator +lower_bound( BucketVector & v , const OrdinalVector& key ) { return std::lower_bound( v.begin() , v.end() , key , BucketLess() ); } +#endif struct BucketIdComparator { diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp index 5a1655330001..1b81c06f73b8 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.cpp @@ -859,12 +859,13 @@ Entity BulkData::declare_element_side_with_id(const stk::mesh::EntityId globalSi } } else { - EntityKey sideKey(mesh_meta_data().side_rank(), globalSideId); + stk::topology sideTop = bucket(elem).topology().side_topology(sideOrd); + EntityKey sideKey(sideTop.rank(), globalSideId); + std::pair result = internal_get_or_create_entity_with_notification(sideKey); side = result.first; const bool newlyCreated = result.second; - stk::topology sideTop = bucket(elem).topology().side_topology(sideOrd); if (newlyCreated) { PARTVECTOR allParts = add_root_topology_part(parts, mesh_meta_data().get_topology_root_part(sideTop)); allParts.push_back(&mesh_meta_data().locally_owned_part()); @@ -4118,12 +4119,6 @@ void BulkData::internal_finish_modification_end(ModEndOptimizationFlag opt) m_meshModification.get_deleted_entity_cache().update_deleted_entities_container(); - for (FieldBase * stkField : mesh_meta_data().get_fields()) { - if (stkField->has_ngp_field()) { - impl::get_ngp_field(*stkField)->debug_modification_end(synchronized_count()); - } - } - for(SelectorBucketMap& selectorBucketMap : m_selector_to_buckets_maps) { for (SelectorBucketMap::iterator itr = selectorBucketMap.begin(), end = selectorBucketMap.end(); itr != end; ++itr) { if (itr->second.empty()) { @@ -4136,6 +4131,14 @@ void BulkData::internal_finish_modification_end(ModEndOptimizationFlag opt) } notify_finished_mod_end(); + + if (mesh_meta_data().is_field_sync_debugger_enabled()) { + for (FieldBase * stkField : mesh_meta_data().get_fields()) { + if (stkField->has_ngp_field()) { + impl::get_ngp_field(*stkField)->debug_modification_end(synchronized_count()); + } + } + } } bool BulkData::internal_modification_end_for_skin_mesh( EntityRank entity_rank, ModEndOptimizationFlag opt, const stk::mesh::Selector& selectedToSkin, @@ -4807,8 +4810,8 @@ void BulkData::internal_change_bucket_parts_without_propagating_to_downward_conn bucket->reset_bucket_parts(newBucketPartList); originalPartition->reset_partition_key(bucket->key_vector()); } else { - if(impl::partition_key_less(originalPartition->key(), partition->key()) || - impl::partition_key_less(partition->key(), originalPartition->key()) ) { + if(originalPartition->get_legacy_partition_id() < partition->get_legacy_partition_id() || + partition->get_legacy_partition_id() < originalPartition->get_legacy_partition_id()) { originalPartition->remove_bucket(bucket); bucket->reset_bucket_parts(newBucketPartList); @@ -5573,7 +5576,14 @@ void BulkData::de_induce_parts_from_nodes(const stk::mesh::EntityVector & deacti unsigned BulkData::num_sides(Entity entity) const { + if (bucket(entity).topology().has_mixed_rank_sides()) { + auto num_connected_edges = num_connectivity(entity, stk::topology::EDGE_RANK); + auto num_connected_faces = num_connectivity(entity, stk::topology::FACE_RANK); + + return num_connected_edges + num_connected_faces; + } else { return num_connectivity(entity, mesh_meta_data().side_rank()); + } } void BulkData::sort_entities(const stk::mesh::EntitySorterBase& sorter) diff --git a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp index d5f491b8cff8..c752f7255b32 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/BulkData.hpp @@ -1464,7 +1464,7 @@ class BulkData { friend class ::stk::io::StkMeshIoBroker; template friend class stk::mesh::DeviceMeshT; friend class stk::mesh::StkFieldSyncDebugger; - template class NgpDebugger> friend class stk::mesh::DeviceField; + template class NgpDebugger> friend class stk::mesh::DeviceField; // friends until it is decided what we're doing with Fields and Parallel and BulkData friend void communicate_field_data(const Ghosting & ghosts, const std::vector & fields); diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp index 0756cb18a676..f02b07a3c54e 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceField.hpp @@ -59,7 +59,7 @@ constexpr unsigned NUM_COMPONENTS_INDEX = 0; constexpr unsigned FIRST_DIMENSION_INDEX = 1; constexpr unsigned INVALID_ORDINAL = 9999999; -template class NgpDebugger> class DeviceField; +template class NgpDebugger> class DeviceField; namespace impl { constexpr double OVERALLOCATION_FACTOR = 1.1; @@ -69,19 +69,18 @@ namespace impl { return std::lround(size_requested*OVERALLOCATION_FACTOR); } - template const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField); - template FieldDataDeviceViewType get_device_data(DeviceField&); + template const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField); + template FieldDataDeviceViewType get_device_data(DeviceField&); } -template class NgpDebugger> +template class NgpDebugger> class DeviceField : public NgpFieldBase { -private: - using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; - public: using ExecSpace = stk::ngp::ExecSpace; + using MemSpace = NgpMemSpace; using value_type = T; + using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; KOKKOS_FUNCTION DeviceField() @@ -114,19 +113,19 @@ class DeviceField : public NgpFieldBase initialize(); } - KOKKOS_DEFAULTED_FUNCTION DeviceField(const DeviceField&) = default; - KOKKOS_DEFAULTED_FUNCTION DeviceField(DeviceField&&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField(const DeviceField&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField(DeviceField&&) = default; KOKKOS_FUNCTION ~DeviceField() {} - KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(const DeviceField&) = default; - KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(DeviceField&&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(const DeviceField&) = default; + KOKKOS_DEFAULTED_FUNCTION DeviceField& operator=(DeviceField&&) = default; void initialize() { hostField->template make_field_sync_debugger(); - fieldSyncDebugger = NgpDebugger(&hostField->get_field_sync_debugger()); + fieldSyncDebugger = NgpDebugger(&hostField->get_field_sync_debugger()); } - void set_field_states(DeviceField* /*fields*/[]) + void set_field_states(DeviceField* /*fields*/[]) { } @@ -359,13 +358,15 @@ class DeviceField : public NgpFieldBase void swap_field_views(NgpFieldBase *other) override { - DeviceField* deviceFieldT = dynamic_cast*>(other); + DeviceField* deviceFieldT = dynamic_cast*>(other); STK_ThrowRequireMsg(deviceFieldT != nullptr, "DeviceField::swap_field_views called with class that can't dynamic_cast to DeviceField"); swap_views(deviceData, deviceFieldT->deviceData); + swap_views(hostBucketPtrData, deviceFieldT->hostBucketPtrData); + swap_views(deviceBucketPtrData, deviceFieldT->deviceBucketPtrData); } KOKKOS_FUNCTION - void swap(DeviceField &other) + void swap(DeviceField &other) { swap_views(deviceData, other.deviceData); } @@ -405,10 +406,15 @@ class DeviceField : public NgpFieldBase private: ExecSpace& get_execution_space() const { return hostField->get_execution_space(); } - void set_execution_space(const ExecSpace& executionSpace) { hostField->set_execution_space(executionSpace); } + void set_execution_space(const ExecSpace& executionSpace) + { + static_assert(Kokkos::SpaceAccessibility::accessible); + hostField->set_execution_space(executionSpace); + } void set_execution_space(ExecSpace&& executionSpace) { + static_assert(Kokkos::SpaceAccessibility::accessible); hostField->set_execution_space(std::forward(executionSpace)); } @@ -468,8 +474,9 @@ class DeviceField : public NgpFieldBase void construct_view(const BucketVector& buckets, const std::string& name, unsigned numPerEntity) { unsigned numBuckets = buckets.size(); - FieldDataDeviceViewType tempDataDeviceView = FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, name), numBuckets, - ORDER_INDICES(bucketCapacity, numPerEntity)); + FieldDataDeviceViewType tempDataDeviceView = + FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, name), + numBuckets, ORDER_INDICES(bucketCapacity, numPerEntity)); fieldSyncDebugger.initialize_view(tempDataDeviceView); copy_unmodified_buckets(buckets, tempDataDeviceView, numPerEntity); @@ -609,7 +616,7 @@ class DeviceField : public NgpFieldBase Kokkos::deep_copy(get_execution_space(), deviceBucketPtrData, hostBucketPtrData); } - void copy_unmodified_buckets(const BucketVector& buckets, FieldDataDeviceViewType destDevView, unsigned numPerEntity) + void copy_unmodified_buckets(const BucketVector& buckets, FieldDataDeviceViewType destDevView, unsigned numPerEntity) { for(unsigned i = 0; i < buckets.size(); i++) { unsigned oldBucketId = buckets[i]->get_ngp_field_bucket_id(get_ordinal()); @@ -617,7 +624,7 @@ class DeviceField : public NgpFieldBase if(!buckets[i]->get_ngp_field_bucket_is_modified(get_ordinal())) { STK_ThrowRequire(deviceData.extent(0) != 0 && deviceSelectedBucketOffset.extent(0) != 0); - copy_moved_device_bucket_data, UnmanagedDevInnerView>(destDevView, deviceData, oldBucketId, newBucketId, numPerEntity); + copy_moved_device_bucket_data, UnmanagedDevInnerView>(destDevView, deviceData, oldBucketId, newBucketId, numPerEntity); } } } @@ -682,19 +689,19 @@ class DeviceField : public NgpFieldBase void shift_bucket_forward(unsigned oldBucketId, unsigned newBucketId, unsigned numPerEntity) { - copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, - oldBucketId, newBucketId, - numPerEntity); + copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, + oldBucketId, newBucketId, + numPerEntity); } void shift_buckets_backward(const std::vector & backwardShiftList, unsigned numPerEntity) { for (auto it = backwardShiftList.rbegin(); it != backwardShiftList.rend(); ++it) { const BackwardShiftIndices& shiftIndices = *it; - copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, - shiftIndices.oldIndex, - shiftIndices.newIndex, - numPerEntity); + copy_moved_device_bucket_data, UnmanagedDevInnerView>(deviceData, deviceData, + shiftIndices.oldIndex, + shiftIndices.newIndex, + numPerEntity); } } @@ -784,11 +791,11 @@ class DeviceField : public NgpFieldBase host = Kokkos::create_mirror_view(view); } - friend NgpDebugger; - friend const FieldDataDeviceViewType impl::get_device_data(const DeviceField&); - friend FieldDataDeviceViewType impl::get_device_data(DeviceField&); + friend NgpDebugger; + friend const FieldDataDeviceViewType impl::get_device_data(const DeviceField&); + friend FieldDataDeviceViewType impl::get_device_data(DeviceField&); - FieldDataDeviceViewType deviceData; + FieldDataDeviceViewType deviceData; FieldDataPointerHostViewType hostBucketPtrData; FieldDataPointerDeviceViewType deviceBucketPtrData; @@ -814,20 +821,20 @@ class DeviceField : public NgpFieldBase UnsignedViewType deviceFieldBucketsNumComponentsPerEntity; UnsignedViewType deviceFieldBucketsMarkedModified; - NgpDebugger fieldSyncDebugger; + NgpDebugger fieldSyncDebugger; }; namespace impl { //not for public consumption. calling this will void your warranty. -template -const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField) +template +const FieldDataDeviceViewType get_device_data(const DeviceField& deviceField) { return deviceField.deviceData; } -template -FieldDataDeviceViewType get_device_data(DeviceField& deviceField) +template +FieldDataDeviceViewType get_device_data(DeviceField& deviceField) { return deviceField.deviceData; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp index 27faedc82158..ece362a2e4f9 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.hpp @@ -97,6 +97,9 @@ struct DeviceBucketT { KOKKOS_INLINE_FUNCTION ConnectedOrdinals get_connected_ordinals(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const; + KOKKOS_INLINE_FUNCTION + Permutations get_connected_permutations(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const; + KOKKOS_FUNCTION ConnectedNodes get_nodes(unsigned offsetIntoBucket) const { return get_connected_entities(offsetIntoBucket, stk::topology::NODE_RANK); @@ -136,6 +139,7 @@ struct DeviceBucketT { void initialize_bucket_attributes(const stk::mesh::Bucket &bucket); void initialize_fixed_data_from_host(const stk::mesh::Bucket &bucket); void update_entity_data_from_host(const stk::mesh::Bucket &bucket); + void update_sparse_connectivity_from_host(const stk::mesh::Bucket &bucket); void resize_device_views(const stk::mesh::Bucket &bucket); std::pair scan_entities_for_nodal_connectivity(const stk::mesh::Bucket & bucket); @@ -146,6 +150,11 @@ struct DeviceBucketT { OrdinalViewType m_nodeOrdinals; + Unsigned2dViewType m_sparseConnectivityOffsets; + BucketConnectivityType m_sparseConnectivity; + OrdinalViewType m_sparseConnectivityOrdinals; + PermutationViewType m_sparseConnectivityPermutations; + PartOrdinalViewType m_partOrdinals; const stk::mesh::DeviceMeshT* m_owningMesh; @@ -244,69 +253,39 @@ class DeviceMeshT : public NgpMeshBase } KOKKOS_FUNCTION - ConnectedEntities get_connected_entities(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const + ConnectedEntities get_connected_entities(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex, stk::mesh::EntityRank connectedRank) const { - if (connectedRank == stk::topology::NODE_RANK) - { - return buckets[rank](entity.bucket_id).get_connected_entities(entity.bucket_ord, connectedRank); - } - - int entityOffset = bucketEntityOffsets[rank](entity.bucket_id) + entity.bucket_ord; - int connectivityOffset = entityConnectivityOffset[rank][connectedRank](entityOffset); - size_t numConnected = entityConnectivityOffset[rank][connectedRank](entityOffset+1) - - connectivityOffset; - ConnectedEntities connectedEntities(nullptr, 0); - if (numConnected > 0) { - int stride = 1; - connectedEntities = - ConnectedEntities(&(sparseConnectivity[rank][connectedRank](connectivityOffset)), numConnected, stride); - } - return connectedEntities; + return buckets[rank](entityIndex.bucket_id).get_connected_entities(entityIndex.bucket_ord, connectedRank); } KOKKOS_FUNCTION - ConnectedNodes get_nodes(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedNodes get_nodes(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return buckets[rank](entity.bucket_id).get_nodes(entity.bucket_ord); + return buckets[rank](entityIndex.bucket_id).get_nodes(entityIndex.bucket_ord); } KOKKOS_FUNCTION - ConnectedEntities get_edges(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedEntities get_edges(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return get_connected_entities(rank, entity, stk::topology::EDGE_RANK); + return get_connected_entities(rank, entityIndex, stk::topology::EDGE_RANK); } KOKKOS_FUNCTION - ConnectedEntities get_faces(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedEntities get_faces(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return get_connected_entities(rank, entity, stk::topology::FACE_RANK); + return get_connected_entities(rank, entityIndex, stk::topology::FACE_RANK); } KOKKOS_FUNCTION - ConnectedEntities get_elements(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity) const + ConnectedEntities get_elements(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex) const { - return get_connected_entities(rank, entity, stk::topology::ELEM_RANK); + return get_connected_entities(rank, entityIndex, stk::topology::ELEM_RANK); } KOKKOS_FUNCTION - ConnectedOrdinals get_connected_ordinals(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const + ConnectedOrdinals get_connected_ordinals(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex, stk::mesh::EntityRank connectedRank) const { - if (connectedRank == stk::topology::NODE_RANK) { - return buckets[rank](entity.bucket_id).get_connected_ordinals(entity.bucket_ord, connectedRank); - } - - int entityOffset = bucketEntityOffsets[rank](entity.bucket_id) + entity.bucket_ord; - int connectivityOffset = entityConnectivityOffset[rank][connectedRank](entityOffset); - size_t numConnected = entityConnectivityOffset[rank][connectedRank](entityOffset+1) - - connectivityOffset; - ConnectedOrdinals connectedOrdinals(nullptr, 0); - if (numConnected > 0) - { - int stride = 1; - connectedOrdinals = ConnectedOrdinals( - &(sparseConnectivityOrdinals[rank][connectedRank](connectivityOffset)), numConnected, stride); - } - return connectedOrdinals; + return buckets[rank](entityIndex.bucket_id).get_connected_ordinals(entityIndex.bucket_ord, connectedRank); } KOKKOS_FUNCTION @@ -334,24 +313,9 @@ class DeviceMeshT : public NgpMeshBase } KOKKOS_FUNCTION - Permutations get_permutations(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entity, stk::mesh::EntityRank connectedRank) const + Permutations get_permutations(stk::mesh::EntityRank rank, const stk::mesh::FastMeshIndex &entityIndex, stk::mesh::EntityRank connectedRank) const { - Permutations permutations(nullptr, 0); - if (connectedRank == stk::topology::NODE_RANK) - { - return permutations; - } - - int entityOffset = bucketEntityOffsets[rank](entity.bucket_id) + entity.bucket_ord; - int connectivityOffset = entityConnectivityOffset[rank][connectedRank](entityOffset); - size_t numConnected = entityConnectivityOffset[rank][connectedRank](entityOffset+1) - - connectivityOffset; - if (numConnected > 0) - { - int stride = 1; - permutations = Permutations(&(sparsePermutations[rank][connectedRank](connectivityOffset)), numConnected, stride); - } - return permutations; + return buckets[rank](entityIndex.bucket_id).get_connected_permutations(entityIndex.bucket_ord, connectedRank); } KOKKOS_FUNCTION @@ -522,12 +486,29 @@ class DeviceMeshT : public NgpMeshBase return m_needSyncToHost; } -private: - void set_entity_keys(const stk::mesh::BulkData& bulk_in); + template + void impl_batch_change_entity_parts(const Kokkos::View& entities, + const Kokkos::View& addPartOrdinals, + const Kokkos::View& removePartOrdinals) + { + using EntitiesMemorySpace = typename std::remove_reference::type::memory_space; + using AddPartOrdinalsMemorySpace = typename std::remove_reference::type::memory_space; + using RemovePartOrdinalsMemorySpace = typename std::remove_reference::type::memory_space; + + static_assert(Kokkos::SpaceAccessibility::accessible, + "The memory space of the 'entities' View is inaccessible from the DeviceMesh execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "The memory space of the 'addPartOrdinals' View is inaccessible from the DeviceMesh execution space"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "The memory space of the 'removePartOrdinals' View is inaccessible from the DeviceMesh execution space"); - void set_bucket_entity_offsets(const stk::mesh::BulkData& bulk_in); + using HostEntitiesType = typename std::remove_reference::type::HostMirror; + using HostAddPartOrdinalsType = typename std::remove_reference::type::HostMirror; + using HostRemovePartOrdinalsType = typename std::remove_reference::type::HostMirror; + } - void fill_sparse_connectivities(const stk::mesh::BulkData& bulk_in); +private: + void set_entity_keys(const stk::mesh::BulkData& bulk_in); KOKKOS_FUNCTION bool is_last_bucket_reference(unsigned rank = stk::topology::NODE_RANK) const @@ -557,8 +538,6 @@ class DeviceMeshT : public NgpMeshBase void copy_mesh_indices_to_device(); - void copy_bucket_entity_offsets_to_device(); - void copy_sparse_connectivities_to_device(); void copy_volatile_fast_shared_comm_map_to_device(); @@ -578,11 +557,6 @@ class DeviceMeshT : public NgpMeshBase HostMeshIndexType hostMeshIndices; MeshIndexType deviceMeshIndices; - BucketEntityOffsetsViewType bucketEntityOffsets[stk::topology::NUM_RANKS]; - UnsignedViewType entityConnectivityOffset[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - EntityViewType sparseConnectivity[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - OrdinalViewType sparseConnectivityOrdinals[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - PermutationViewType sparsePermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; UnsignedViewType volatileFastSharedCommMapOffset[stk::topology::NUM_RANKS]; FastSharedCommMapViewType volatileFastSharedCommMap[stk::topology::NUM_RANKS]; }; @@ -599,9 +573,10 @@ DeviceBucketT::get_connected_entities(unsigned offsetIntoBuck const size_t nodeOffset = m_nodeConnectivityOffsets(offsetIntoBucket); return ConnectedEntities(&m_nodeConnectivity(nodeOffset), numNodes, 1); } - STK_NGP_ThrowAssert(m_owningMesh != nullptr); - stk::mesh::FastMeshIndex meshIndex{bucket_id(), offsetIntoBucket}; - return m_owningMesh->get_connected_entities(entity_rank(), meshIndex, connectedRank); + + const unsigned offset = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket); + const unsigned length = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket+1) - offset; + return ConnectedEntities(&m_sparseConnectivity(offset), length, 1); } template @@ -613,10 +588,26 @@ DeviceBucketT::get_connected_ordinals(unsigned offsetIntoBuck const unsigned numNodes = m_nodeConnectivityOffsets(offsetIntoBucket+1)-m_nodeConnectivityOffsets(offsetIntoBucket); return ConnectedOrdinals(m_nodeOrdinals.data(), numNodes, 1); } - STK_NGP_ThrowAssert(m_owningMesh != nullptr); - stk::mesh::FastMeshIndex meshIndex{bucket_id(), offsetIntoBucket}; - return m_owningMesh->get_connected_ordinals(entity_rank(), meshIndex, connectedRank); + + const unsigned offset = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket); + const unsigned length = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket+1) - offset; + return ConnectedOrdinals(&m_sparseConnectivityOrdinals(offset), length, 1); +} + +template +KOKKOS_INLINE_FUNCTION +typename DeviceBucketT::Permutations +DeviceBucketT::get_connected_permutations(unsigned offsetIntoBucket, stk::mesh::EntityRank connectedRank) const { + STK_NGP_ThrowAssert(connectedRank < stk::topology::NUM_RANKS); + const unsigned offset = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket); + const unsigned length = m_sparseConnectivityOffsets(connectedRank,offsetIntoBucket+1) - offset; + if (m_sparseConnectivityPermutations.size() <= offset) { + return Permutations(nullptr, 0); + } + + return Permutations(&m_sparseConnectivityPermutations(offset), length, 1); } + template void DeviceBucketT::initialize_bucket_attributes(const stk::mesh::Bucket &bucket) { @@ -660,33 +651,37 @@ void DeviceBucketT::resize_device_views(const stk::mesh::Buck { Kokkos::Profiling::pushRegion("resize_device_views()"); + Kokkos::Profiling::pushRegion("set node ordinals"); + const auto [maxNodesPerEntity, totalNumConnectedNodes] = scan_entities_for_nodal_connectivity(bucket); if (m_nodeOrdinals.size() != maxNodesPerEntity) { - m_nodeOrdinals = OrdinalViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "NodeOrdinals"), - static_cast(maxNodesPerEntity)); + Kokkos::resize(Kokkos::WithoutInitializing, m_nodeOrdinals, static_cast(maxNodesPerEntity)); OrdinalViewType& nodeOrds = m_nodeOrdinals; //local var to avoid implicit this capture Kokkos::parallel_for(Kokkos::RangePolicy(0, maxNodesPerEntity), KOKKOS_LAMBDA(const int i) { nodeOrds(i) = static_cast(i); }); } + Kokkos::Profiling::popRegion(); + Kokkos::Profiling::pushRegion("bucket entities"); if (m_entities.size() != m_bucketCapacity) { - m_entities = EntityViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "BucketEntities"), m_bucketCapacity); + Kokkos::resize(Kokkos::WithoutInitializing, m_entities, m_bucketCapacity); STK_ThrowRequireMsg(m_bucketCapacity > 0, "bucket capacity must be greater than 0"); } + Kokkos::Profiling::popRegion(); + Kokkos::Profiling::pushRegion("nodal connectivity"); if (m_nodeConnectivity.size() != totalNumConnectedNodes) { - m_nodeConnectivity = BucketConnectivityType(Kokkos::view_alloc(Kokkos::WithoutInitializing, - "NodeConnectivity"), totalNumConnectedNodes); + Kokkos::resize(Kokkos::WithoutInitializing, m_nodeConnectivity, totalNumConnectedNodes); } if (m_nodeConnectivityOffsets.size() != m_bucketCapacity+1) { - m_nodeConnectivityOffsets = OrdinalViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, - "NodeConnectivityOffsets"), m_bucketCapacity+1); + Kokkos::resize(Kokkos::WithoutInitializing, m_nodeConnectivityOffsets, m_bucketCapacity+1); } Kokkos::Profiling::popRegion(); + Kokkos::Profiling::popRegion(); } template @@ -725,6 +720,87 @@ void DeviceBucketT::update_entity_data_from_host(const stk::m Kokkos::Profiling::popRegion(); } +constexpr double RESIZE_FACTOR = 0.05; + +template +inline void reallocate_views(DEVICE_VIEW & deviceView, HOST_VIEW & hostView, size_t requiredSize, double resizeFactor = 0.0) +{ + const size_t currentSize = deviceView.extent(0); + const size_t shrinkThreshold = currentSize - static_cast(2*resizeFactor*currentSize); + const bool needGrowth = (requiredSize > currentSize); + const bool needShrink = (requiredSize < shrinkThreshold); + + if (needGrowth || needShrink) { + const size_t newSize = requiredSize + static_cast(resizeFactor*requiredSize); + deviceView = DEVICE_VIEW(Kokkos::view_alloc(Kokkos::WithoutInitializing, deviceView.label()), newSize); + hostView = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceView); + } +} + +template +void DeviceBucketT::update_sparse_connectivity_from_host(const stk::mesh::Bucket &bucket) +{ + Kokkos::Profiling::pushRegion("update_sparse_connectivity_from_host()"); + + Unsigned2dViewType::HostMirror hostConnectivityOffsets("hostConnectivityOffsets", 0,0); + Kokkos::resize(Kokkos::WithoutInitializing, hostConnectivityOffsets, stk::topology::NUM_RANKS, bucket.size()+1); + Kokkos::resize(Kokkos::WithoutInitializing, m_sparseConnectivityOffsets, stk::topology::NUM_RANKS, bucket.size()+1); + BucketConnectivityType::HostMirror hostConnectivity("hostConnectivity", 0); + OrdinalViewType::HostMirror hostConnectivityOrdinals("hostConnectivityOrdinals", 0); + PermutationViewType::HostMirror hostConnectivityPermutations("hostConnectivityPermutations", 0); + + const stk::mesh::EntityRank endRank = static_cast(bucket.mesh().mesh_meta_data().entity_rank_count()); + + unsigned offset = 0; + for(stk::mesh::EntityRank connectedRank=stk::topology::EDGE_RANK; connectedRank void DeviceMeshT::update_mesh() { @@ -738,15 +814,23 @@ void DeviceMeshT::update_mesh() const bool anyChanges = fill_buckets(*bulk); if (anyChanges) { + Kokkos::Profiling::pushRegion("anyChanges stuff"); + + Kokkos::Profiling::pushRegion("entity-keys"); set_entity_keys(*bulk); copy_entity_keys_to_device(); - set_bucket_entity_offsets(*bulk); - copy_bucket_entity_offsets_to_device(); - fill_sparse_connectivities(*bulk); - copy_sparse_connectivities_to_device(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("volatile-fast-shared-comm-map"); copy_volatile_fast_shared_comm_map_to_device(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("mesh-indices"); fill_mesh_indices(*bulk); copy_mesh_indices_to_device(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::popRegion(); } synchronizedCount = bulk->synchronized_count(); @@ -780,6 +864,7 @@ bool DeviceMeshT::fill_buckets(const stk::mesh::BulkData& bulk_in) bucketBuffer[iBucket].initialize_bucket_attributes(stkBucket); bucketBuffer[iBucket].initialize_fixed_data_from_host(stkBucket); bucketBuffer[iBucket].update_entity_data_from_host(stkBucket); + bucketBuffer[iBucket].update_sparse_connectivity_from_host(stkBucket); anyBucketChanges = true; Kokkos::Profiling::popRegion(); } @@ -789,6 +874,7 @@ bool DeviceMeshT::fill_buckets(const stk::mesh::BulkData& bulk_in) new (&bucketBuffer[iBucket]) DeviceBucketT(buckets[rank][ngpBucketId]); if (stkBucket.is_modified()) { bucketBuffer[iBucket].update_entity_data_from_host(stkBucket); + bucketBuffer[iBucket].update_sparse_connectivity_from_host(stkBucket); anyBucketChanges = true; } bucketBuffer[iBucket].m_bucketId = stkBucket.bucket_id(); @@ -811,23 +897,6 @@ bool DeviceMeshT::fill_buckets(const stk::mesh::BulkData& bulk_in) return anyBucketChanges; } -constexpr double RESIZE_FACTOR = 0.05; - -template -inline void reallocate_views(DEVICE_VIEW & deviceView, HOST_VIEW & hostView, size_t requiredSize, double resizeFactor = 0.0) -{ - const size_t currentSize = deviceView.extent(0); - const size_t shrinkThreshold = currentSize - static_cast(2*resizeFactor*currentSize); - const bool needGrowth = (requiredSize > currentSize); - const bool needShrink = (requiredSize < shrinkThreshold); - - if (needGrowth || needShrink) { - const size_t newSize = requiredSize + static_cast(resizeFactor*requiredSize); - deviceView = DEVICE_VIEW(Kokkos::view_alloc(Kokkos::WithoutInitializing, deviceView.label()), newSize); - hostView = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceView); - } -} - template void DeviceMeshT::set_entity_keys(const stk::mesh::BulkData& bulk_in) { @@ -848,119 +917,6 @@ void DeviceMeshT::set_entity_keys(const stk::mesh::BulkData& bulk_i } } -template -void DeviceMeshT::set_bucket_entity_offsets(const stk::mesh::BulkData& bulk_in) -{ - auto& hostBucketEntityOffsets = deviceMeshHostData->hostBucketEntityOffsets; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; ranksize(); - } - for (unsigned i = stkBuckets.size(); i < hostBucketEntityOffsets[rank].extent(0); ++i) { - hostBucketEntityOffsets[rank](i) = bucketOffsetIntoEntities; - } - } -} - -template -void DeviceMeshT::fill_sparse_connectivities(const stk::mesh::BulkData& bulk_in) -{ - auto& hostEntityConnectivityOffset = deviceMeshHostData->hostEntityConnectivityOffset; - auto& hostBucketEntityOffsets = deviceMeshHostData->hostBucketEntityOffsets; - auto& hostSparseConnectivity = deviceMeshHostData->hostSparseConnectivity; - auto& hostSparseConnectivityOrdinals = deviceMeshHostData->hostSparseConnectivityOrdinals; - auto& hostSparsePermutations = deviceMeshHostData->hostSparsePermutations; - - unsigned totalNumConnectedEntities[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS] = {{0}, {0}, {0}, {0}, {0}}; - unsigned totalNumPermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS] = {{0}, {0}, {0}, {0}, {0}}; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank 0) { - - const stk::mesh::Entity* connectedEntities = stkBucket.begin(iEntity, connectedRank); - const stk::mesh::ConnectivityOrdinal* connectedOrdinals = stkBucket.begin_ordinals(iEntity, connectedRank); - const stk::mesh::Permutation* permutations = hasPermutation ? stkBucket.begin_permutations(iEntity, connectedRank) : nullptr; - for(unsigned i=0; i void DeviceMeshT::fill_mesh_indices(const stk::mesh::BulkData& bulk_in) { @@ -999,37 +955,6 @@ void DeviceMeshT::copy_mesh_indices_to_device() deviceMeshIndices = nonconst_device_mesh_indices; } -template -void DeviceMeshT::copy_bucket_entity_offsets_to_device() -{ - auto& hostBucketEntityOffsets = deviceMeshHostData->hostBucketEntityOffsets; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank -void DeviceMeshT::copy_sparse_connectivities_to_device() -{ - auto& hostEntityConnectivityOffset = deviceMeshHostData->hostEntityConnectivityOffset; - auto& hostSparseConnectivity = deviceMeshHostData->hostSparseConnectivity; - auto& hostSparseConnectivityOrdinals = deviceMeshHostData->hostSparseConnectivityOrdinals; - auto& hostSparsePermutations = deviceMeshHostData->hostSparsePermutations; - - for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank void DeviceMeshT::copy_volatile_fast_shared_comm_map_to_device() { diff --git a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp index ca5a1b61ba11..16dafae9617b 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FEMHelpers.cpp @@ -71,7 +71,7 @@ void verify_declare_element_edge( ? elem_top.edge_topology(local_edge_id) : invalid; STK_ThrowErrorMsgIf( elem_top!=stk::topology::INVALID_TOPOLOGY && local_edge_id >= elem_top.num_edges(), - "For elem " << mesh.identifier(elem) << ", local_edge_id " << local_edge_id << ", " << + "For elem " << mesh.identifier(elem) << " ("<(s)); m_field_meta_data.swap(sField->m_field_meta_data); @@ -517,15 +518,21 @@ void FieldBase::rotate_multistate_data(bool rotateNgpFieldViews) std::swap(m_modifiedOnHost, sField->m_modifiedOnHost); std::swap(m_modifiedOnDevice, sField->m_modifiedOnDevice); } - - for(int s = 0; s < numStates; ++s) { - NgpFieldBase* ngpField = field_state(static_cast(s))->get_ngp_field(); - if (ngpField != nullptr) { - ngpField->update_bucket_pointer_view(); - ngpField->fence(); + Kokkos::Profiling::popRegion(); + + if (!(rotateNgpFieldViews && allStatesHaveNgpFields)) { + Kokkos::Profiling::pushRegion("ngpField update_bucket_pointer_view"); + for(int s = 0; s < numStates; ++s) { + NgpFieldBase* ngpField = field_state(static_cast(s))->get_ngp_field(); + if (ngpField != nullptr) { + ngpField->update_bucket_pointer_view(); + ngpField->fence(); + } } + Kokkos::Profiling::popRegion(); } + Kokkos::Profiling::pushRegion("ngpField swap_field_views"); if (rotateNgpFieldViews && allStatesHaveNgpFields) { for (int s = 1; s < numStates; ++s) { NgpFieldBase* ngpField_sminus1 = field_state(static_cast(s-1))->get_ngp_field(); @@ -533,12 +540,13 @@ void FieldBase::rotate_multistate_data(bool rotateNgpFieldViews) ngpField_s->swap_field_views(ngpField_sminus1); } } + Kokkos::Profiling::popRegion(); } } void FieldBase::modify_on_host() const -{ +{ STK_ThrowRequireMsg(m_modifiedOnDevice == false, "Modify on host called for Field: " << name() << " but it has an uncleared modified_on_device"); @@ -556,7 +564,7 @@ FieldBase::modify_on_device() const void FieldBase::modify_on_host(const Selector& s) const -{ +{ modify_on_host(); } diff --git a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp index 99c56712f671..59b907ca36c1 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/FieldBase.hpp @@ -62,7 +62,7 @@ class BulkData; class MetaData; class UnitTestFieldImpl; class FieldBase; -template class NgpDebugger> class DeviceField; +template class NgpDebugger> class DeviceField; namespace impl { class FieldRepository; @@ -340,8 +340,8 @@ class FieldBase friend NgpFieldBase* impl::get_ngp_field(const FieldBase & stkField); friend void impl::set_ngp_field(const FieldBase & stkField, NgpFieldBase * ngpField); - template class NgpDebugger> friend class HostField; - template class NgpDebugger> friend class DeviceField; + template class NgpDebugger> friend class HostField; + template class NgpDebugger> friend class DeviceField; template friend class Field; protected: diff --git a/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp b/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp index 920e47fde7d0..fb219d103710 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/GetNgpField.hpp @@ -41,13 +41,16 @@ namespace stk { namespace mesh { -template class NgpDebugger = DefaultNgpFieldSyncDebugger> -NgpField & get_updated_ngp_field_async(const FieldBase & stkField, const stk::ngp::ExecSpace& execSpace) +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +NgpField & get_updated_ngp_field_async(const FieldBase & stkField, const stk::ngp::ExecSpace& execSpace) { + static_assert(Kokkos::SpaceAccessibility::accessible); + NgpFieldBase * ngpField = impl::get_ngp_field(stkField); if (ngpField == nullptr) { - ngpField = new NgpField(stkField.get_mesh(), stkField, true); + ngpField = new NgpField(stkField.get_mesh(), stkField, true); ngpField->update_field(execSpace); ngpField->debug_initialize_debug_views(); impl::set_ngp_field(stkField, ngpField); @@ -59,16 +62,19 @@ NgpField & get_updated_ngp_field_async(const FieldBase & stkFiel } } - return dynamic_cast< NgpField& >(*ngpField); + return dynamic_cast< NgpField& >(*ngpField); } -template class NgpDebugger = DefaultNgpFieldSyncDebugger> -NgpField & get_updated_ngp_field_async(const FieldBase & stkField, stk::ngp::ExecSpace&& execSpace) +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +NgpField & get_updated_ngp_field_async(const FieldBase & stkField, stk::ngp::ExecSpace&& execSpace) { + static_assert(Kokkos::SpaceAccessibility::accessible); + NgpFieldBase * ngpField = impl::get_ngp_field(stkField); if (ngpField == nullptr) { - ngpField = new NgpField(stkField.get_mesh(), stkField, true); + ngpField = new NgpField(stkField.get_mesh(), stkField, true); ngpField->update_field(std::forward(execSpace)); ngpField->debug_initialize_debug_views(); impl::set_ngp_field(stkField, ngpField); @@ -80,13 +86,17 @@ NgpField & get_updated_ngp_field_async(const FieldBase & stkFiel } } - return dynamic_cast< NgpField& >(*ngpField); + return dynamic_cast< NgpField& >(*ngpField); } -template class NgpDebugger = DefaultNgpFieldSyncDebugger> -NgpField & get_updated_ngp_field(const FieldBase & stkField) +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +NgpField & get_updated_ngp_field(const FieldBase & stkField) { - auto& ngpFieldRef = get_updated_ngp_field_async(stkField, Kokkos::DefaultExecutionSpace()); + using ExecSpace = Kokkos::DefaultExecutionSpace; + static_assert(Kokkos::SpaceAccessibility::accessible); + + auto& ngpFieldRef = get_updated_ngp_field_async(stkField, ExecSpace()); ngpFieldRef.fence(); return ngpFieldRef; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp b/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp index fe70d29c9009..279b38e9f6dc 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/HostField.hpp @@ -54,13 +54,14 @@ namespace stk { namespace mesh { -template class NgpDebugger> +template class NgpDebugger> class HostField : public NgpFieldBase { public: using ExecSpace = stk::ngp::ExecSpace; + using MemSpace = NgpMemSpace; using value_type = T; - using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; + using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; HostField() : NgpFieldBase(), @@ -79,10 +80,10 @@ class HostField : public NgpFieldBase field->template make_field_sync_debugger(); } - HostField(const HostField&) = default; - HostField(HostField&&) = default; - HostField& operator=(const HostField&) = default; - HostField& operator=(HostField&&) = default; + HostField(const HostField&) = default; + HostField(HostField&&) = default; + HostField& operator=(const HostField&) = default; + HostField& operator=(HostField&&) = default; void update_field(const ExecSpace& newExecSpace) override { @@ -96,7 +97,7 @@ class HostField : public NgpFieldBase update_field(); } - void set_field_states(HostField* fields[]) {} + void set_field_states(HostField* fields[]) {} size_t num_syncs_to_host() const override { return field->num_syncs_to_host(); } size_t num_syncs_to_device() const override { return field->num_syncs_to_device(); } diff --git a/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp b/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp index 36823b3ede6f..04eba0b2be61 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/HostMesh.hpp @@ -302,6 +302,14 @@ class HostMeshT : public NgpMeshBase return false; } + template + void impl_batch_change_entity_parts(const Kokkos::View& entities, + const Kokkos::View& addPartOrdinals, + const Kokkos::View& removePartOrdinals) + { + batch_change_entity_parts(entities, addPartOrdinals, removePartOrdinals); + } + private: stk::mesh::BulkData *bulk; size_t m_syncCountWhenUpdated; diff --git a/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp b/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp index 67ac6017ab89..d1a0173ee887 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/MetaData.cpp @@ -48,6 +48,7 @@ #include "stk_mesh/base/Part.hpp" // for Part, etc #include "stk_mesh/base/Selector.hpp" // for Selector #include "stk_mesh/base/Types.hpp" // for PartVector, EntityRank, etc +#include "stk_mesh/base/StkFieldSyncDebugger.hpp" #include "stk_mesh/baseImpl/PartRepository.hpp" // for PartRepository #include "stk_topology/topology.hpp" // for topology, etc #include "stk_util/parallel/Parallel.hpp" // for parallel_machine_rank, etc @@ -145,20 +146,21 @@ void MetaData::require_valid_entity_rank( EntityRank rank ) const //---------------------------------------------------------------------- MetaData::MetaData(size_t spatial_dimension, const std::vector& entity_rank_names) - : m_bulk_data(NULL), - m_commit( false ), - m_are_late_fields_enabled( false ), - m_part_repo( this ), + : m_bulk_data(nullptr), + m_part_repo(this), m_attributes(), - m_universal_part( NULL ), - m_owns_part( NULL ), - m_shares_part( NULL ), - m_aura_part(NULL), + m_universal_part(nullptr), + m_owns_part(nullptr), + m_shares_part(nullptr), + m_aura_part(nullptr), m_field_repo(*this), - m_coord_field(NULL), - m_entity_rank_names( ), - m_spatial_dimension( 0 /*invalid spatial dimension*/), - m_surfaceToBlock() + m_coord_field(nullptr), + m_entity_rank_names(), + m_spatial_dimension(0 /*invalid spatial dimension*/), + m_surfaceToBlock(), + m_commit(false), + m_are_late_fields_enabled(false), + m_isFieldSyncDebuggerEnabled(false) { const size_t numRanks = stk::topology::NUM_RANKS; STK_ThrowRequireMsg(entity_rank_names.size() <= numRanks, "MetaData: number of entity-ranks (" << entity_rank_names.size() << ") exceeds limit of stk::topology::NUM_RANKS (" << numRanks <<")"); @@ -172,20 +174,21 @@ MetaData::MetaData(size_t spatial_dimension, const std::vector& ent } MetaData::MetaData() - : m_bulk_data(NULL), - m_commit( false ), - m_are_late_fields_enabled( false ), - m_part_repo( this ), + : m_bulk_data(nullptr), + m_part_repo(this), m_attributes(), - m_universal_part( NULL ), - m_owns_part( NULL ), - m_shares_part( NULL ), - m_aura_part(NULL), + m_universal_part(nullptr), + m_owns_part(nullptr), + m_shares_part(nullptr), + m_aura_part(nullptr), m_field_repo(*this), - m_coord_field(NULL), - m_entity_rank_names( ), - m_spatial_dimension( 0 /*invalid spatial dimension*/), - m_surfaceToBlock() + m_coord_field(nullptr), + m_entity_rank_names(), + m_spatial_dimension(0 /*invalid spatial dimension*/), + m_surfaceToBlock(), + m_commit(false), + m_are_late_fields_enabled(false), + m_isFieldSyncDebuggerEnabled(false) { // Declare the predefined parts @@ -444,59 +447,51 @@ void MetaData::internal_declare_part_subset( Part & superset , Part & subset, bo //---------------------------------------------------------------------- -void MetaData::declare_field_restriction( - FieldBase & arg_field , - const Part & arg_part , - const unsigned arg_num_scalars_per_entity , - const unsigned arg_first_dimension , - const void * arg_init_value ) +void MetaData::declare_field_restriction(FieldBase& field, + const Part& part, + const unsigned numScalarsPerEntity, + const unsigned firstDimension, + const void* initValue) { - static const char method[] = - "std::mesh::MetaData::declare_field_restriction" ; - - require_same_mesh_meta_data( MetaData::get(arg_field) ); - require_same_mesh_meta_data( MetaData::get(arg_part) ); - - m_field_repo.declare_field_restriction( - method, - arg_field, - arg_part, - m_part_repo.get_all_parts(), - arg_num_scalars_per_entity, - arg_first_dimension, - arg_init_value - ); + require_same_mesh_meta_data(MetaData::get(field)); + require_same_mesh_meta_data(MetaData::get(part)); + + m_field_repo.declare_field_restriction("std::mesh::MetaData::declare_field_restriction", + field, + part, + m_part_repo.get_all_parts(), + numScalarsPerEntity, + firstDimension, + initValue); if (is_commit()) { - m_bulk_data->reallocate_field_data(arg_field); + m_bulk_data->reallocate_field_data(field); } + + FieldSyncDebugger::declare_field_restriction(field, part, numScalarsPerEntity, firstDimension); } -void MetaData::declare_field_restriction( - FieldBase & arg_field , - const Selector & arg_selector , - const unsigned arg_num_scalars_per_entity , - const unsigned arg_first_dimension , - const void * arg_init_value ) +void MetaData::declare_field_restriction(FieldBase& field, + const Selector& selector, + const unsigned numScalarsPerEntity, + const unsigned firstDimension, + const void* initValue) { - static const char method[] = - "std::mesh::MetaData::declare_field_restriction" ; - - require_same_mesh_meta_data( MetaData::get(arg_field) ); - - m_field_repo.declare_field_restriction( - method, - arg_field, - arg_selector, - m_part_repo.get_all_parts(), - arg_num_scalars_per_entity, - arg_first_dimension, - arg_init_value - ); + require_same_mesh_meta_data(MetaData::get(field)); + + m_field_repo.declare_field_restriction("std::mesh::MetaData::declare_field_restriction", + field, + selector, + m_part_repo.get_all_parts(), + numScalarsPerEntity, + firstDimension, + initValue); if (is_commit()) { - m_bulk_data->reallocate_field_data(arg_field); + m_bulk_data->reallocate_field_data(field); } + + FieldSyncDebugger::declare_field_restriction(field, selector, numScalarsPerEntity, firstDimension); } //---------------------------------------------------------------------- @@ -754,6 +749,13 @@ std::vector MetaData::get_part_aliases(const Part& part) const return std::vector(); } + +void MetaData::declare_field_sync_debugger_field(stk::mesh::FieldBase& field) +{ + FieldSyncDebugger::declare_field(field); +} + + //---------------------------------------------------------------------- //---------------------------------------------------------------------- // Verify parallel consistency of fields and parts @@ -1216,7 +1218,7 @@ get_topology(const MetaData& meta_data, EntityRank entity_rank, const std::pair< } -stk::topology get_topology( shards::CellTopology shards_topology, unsigned spatial_dimension) +stk::topology get_topology( shards::CellTopology shards_topology, unsigned spatial_dimension, bool useAllFaceSideShell) { stk::topology t; @@ -1269,8 +1271,7 @@ stk::topology get_topology( shards::CellTopology shards_topology, unsigned spati // t = stk::topology::SPRING_3; else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellTriangle<3> >()) ) { - t = stk::topology::SHELL_TRI_3; - // t = stk::topology::SHELL_TRI_3_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_TRI_3_ALL_FACE_SIDES : stk::topology::SHELL_TRI_3; } //NOTE: shards does not define a shell triangle 4 @@ -1278,21 +1279,17 @@ stk::topology get_topology( shards::CellTopology shards_topology, unsigned spati // t = stk::topology::SHELL_TRI_4; else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellTriangle<6> >()) ) { - t = stk::topology::SHELL_TRI_6; - // t = stk::topology::SHELL_TRI_6_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_TRI_6_ALL_FACE_SIDES : stk::topology::SHELL_TRI_6; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellQuadrilateral<4> >()) ) { - t = stk::topology::SHELL_QUAD_4; - // t = stk::topology::SHELL_QUAD_4_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_QUAD_4_ALL_FACE_SIDES : stk::topology::SHELL_QUAD_4; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellQuadrilateral<8> >()) ) { - t = stk::topology::SHELL_QUAD_8; - // t = stk::topology::SHELL_QUAD_8_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_QUAD_8_ALL_FACE_SIDES : stk::topology::SHELL_QUAD_8; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::ShellQuadrilateral<9> >()) ) { - t = stk::topology::SHELL_QUAD_9; - // t = stk::topology::SHELL_QUAD_9_ALL_FACE_SIDES; + t = (useAllFaceSideShell) ? stk::topology::SHELL_QUAD_9_ALL_FACE_SIDES : stk::topology::SHELL_QUAD_9; } else if ( shards_topology == shards::CellTopology(shards::getCellTopologyData< shards::Tetrahedron<4> >()) ) diff --git a/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp b/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp index 0585b10fbd52..8b1e13486f5f 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/MetaData.hpp @@ -108,7 +108,7 @@ inline void set_topology(Part & part) stk::topology get_topology(const MetaData& meta_data, EntityRank entity_rank, const std::pair& supersets); /** get the stk::topology given a Shards Cell Topology */ -stk::topology get_topology(shards::CellTopology shards_topology, unsigned spatial_dimension = 3); +stk::topology get_topology(shards::CellTopology shards_topology, unsigned spatial_dimension = 3, bool useAllFaceSideShell = false); /** Get the Shards Cell Topology given a stk::topology */ shards::CellTopology get_cell_topology(stk::topology topo); @@ -590,14 +590,28 @@ class MetaData { bool delete_part_alias_case_insensitive(Part& part, const std::string& alias); std::vector get_part_aliases(const Part& part) const; + // To enable the Field Sync Debugger in a production run, add the STK_DEBUG_FIELD_SYNC + // define to your build. This function is solely used to flip external parts of the + // debugger on for unit testing when it is not enabled globally. + // + void enable_field_sync_debugger() { + m_isFieldSyncDebuggerEnabled = true; + } + + bool is_field_sync_debugger_enabled() { +#ifdef STK_DEBUG_FIELD_SYNC + return true; +#else + return m_isFieldSyncDebuggerEnabled; +#endif + } + protected: Part & declare_internal_part( const std::string & p_name); /** \} */ private: - // Functions - MetaData( const MetaData & ); ///< \brief Not allowed MetaData & operator = ( const MetaData & ); ///< \brief Not allowed @@ -611,11 +625,9 @@ class MetaData { void assign_topology(Part& part, stk::topology stkTopo); - // Members + void declare_field_sync_debugger_field(stk::mesh::FieldBase& field); BulkData* m_bulk_data; - bool m_commit ; - bool m_are_late_fields_enabled; impl::PartRepository m_part_repo ; CSet m_attributes ; @@ -641,6 +653,10 @@ class MetaData { std::map > m_partAlias; std::map> m_partReverseAlias; + bool m_commit; + bool m_are_late_fields_enabled; + bool m_isFieldSyncDebuggerEnabled; + /** \name Invariants/preconditions for MetaData. * \{ */ @@ -878,6 +894,8 @@ MetaData::declare_field(stk::topology::rank_t arg_entity_rank, f[0]->set_mesh(m_bulk_data); + declare_field_sync_debugger_field(*f[0]); + return *f[0]; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp b/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp index 063f2a2616cd..3821c5932c40 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Ngp.hpp @@ -51,34 +51,45 @@ template class DeviceMeshT; class StkFieldSyncDebugger; class EmptyStkFieldSyncDebugger; -template class NgpFieldSyncDebugger; -template class EmptyNgpFieldSyncDebugger; +template class NgpFieldSyncDebugger; +template class EmptyNgpFieldSyncDebugger; #ifdef STK_DEBUG_FIELD_SYNC using DefaultStkFieldSyncDebugger = StkFieldSyncDebugger; - template using DefaultNgpFieldSyncDebugger = NgpFieldSyncDebugger; + template + using DefaultNgpFieldSyncDebugger = NgpFieldSyncDebugger; #else using DefaultStkFieldSyncDebugger = EmptyStkFieldSyncDebugger; - template using DefaultNgpFieldSyncDebugger = EmptyNgpFieldSyncDebugger; + template + using DefaultNgpFieldSyncDebugger = EmptyNgpFieldSyncDebugger; #endif -template class NgpDebugger = DefaultNgpFieldSyncDebugger> class HostField; -template class NgpDebugger = DefaultNgpFieldSyncDebugger> class DeviceField; - #ifdef STK_USE_DEVICE_MESH using NgpMeshDefaultMemSpace = stk::ngp::MemSpace; +#else + using NgpMeshDefaultMemSpace = stk::ngp::HostMemSpace; +#endif + +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +class HostField; +template class NgpDebugger = DefaultNgpFieldSyncDebugger> +class DeviceField; + +#ifdef STK_USE_DEVICE_MESH template using NgpMeshT = stk::mesh::DeviceMeshT; using NgpMesh = NgpMeshT; - template class NgpDebugger = DefaultNgpFieldSyncDebugger> - using NgpField = stk::mesh::DeviceField; + + template class NgpDebugger = DefaultNgpFieldSyncDebugger> + using NgpField = stk::mesh::DeviceField; + #else - using NgpMeshDefaultMemSpace = typename stk::ngp::HostExecSpace::memory_space; template using NgpMeshT = stk::mesh::HostMeshT; using NgpMesh = NgpMeshT; - template class NgpDebugger = DefaultNgpFieldSyncDebugger> - using NgpField = stk::mesh::HostField; + + template class NgpDebugger = DefaultNgpFieldSyncDebugger> + using NgpField = stk::mesh::HostField; #endif } diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp index 3f42f77c62b9..5a5ce4e6de72 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpFieldSyncDebugger.hpp @@ -52,7 +52,7 @@ namespace stk { namespace mesh { //============================================================================== -template +template class EmptyNgpFieldSyncDebugger { public: @@ -111,7 +111,7 @@ class EmptyNgpFieldSyncDebugger }; //============================================================================== -template +template class NgpFieldSyncDebugger { public: @@ -293,8 +293,8 @@ class NgpFieldSyncDebugger stk::mesh::Selector fieldSelector(*(ngpField->hostField)); UnsignedViewType & localDeviceNumComponentsPerEntity = ngpField->deviceFieldBucketsNumComponentsPerEntity; - FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; - FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; + FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; + FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; LastFieldModLocationType & localLastFieldModLocation = lastFieldModLocation; ScalarUvmType & localLostDeviceFieldData = lostDeviceFieldData; UnsignedViewType & localDebugDeviceSelectedBucketOffset = debugDeviceSelectedBucketOffset; @@ -349,8 +349,8 @@ class NgpFieldSyncDebugger const stk::mesh::BulkData & bulk = *ngpField->hostBulk; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(bulk); UnsignedViewType & localDeviceNumComponentsPerEntity = ngpField->deviceFieldBucketsNumComponentsPerEntity; - FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; - FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; + FieldDataDeviceViewType & localDeviceData = ngpField->deviceData; + FieldDataDeviceViewType & localLastFieldValue = lastFieldValue; UnsignedViewType & localDebugDeviceSelectedBucketOffset = debugDeviceSelectedBucketOffset; stk::mesh::for_each_entity_run(ngpMesh, ngpField->rank, modifiedSelector, @@ -371,8 +371,8 @@ class NgpFieldSyncDebugger const stk::mesh::FieldBase & stkField = *ngpField->hostField; if (buckets.size() != 0) { - lastFieldValue = FieldDataDeviceViewType(stkField.name()+"_lastValue", buckets.size(), - ORDER_INDICES(ngpField->bucketCapacity, numPerEntity)); + lastFieldValue = FieldDataDeviceViewType(stkField.name()+"_lastValue", buckets.size(), + ORDER_INDICES(ngpField->bucketCapacity, numPerEntity)); lastFieldModLocation = LastFieldModLocationType(stkField.name()+"_lastModLocation", buckets.size(), ORDER_INDICES(ngpField->bucketCapacity, numPerEntity)); } @@ -490,7 +490,7 @@ class NgpFieldSyncDebugger ScalarUvmType lostDeviceFieldData; ScalarUvmType anyPotentialDeviceFieldModification; LastFieldModLocationType lastFieldModLocation; - FieldDataDeviceViewType lastFieldValue; + FieldDataDeviceViewType lastFieldValue; typename UnsignedViewType::HostMirror debugHostSelectedBucketOffset; UnsignedViewType debugDeviceSelectedBucketOffset; }; diff --git a/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp b/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp index 85099a167005..b0465dd85342 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/NgpTypes.hpp @@ -75,14 +75,18 @@ using HostMeshIndexType = MeshIndexType::HostMirror; using BucketEntityOffsetsViewType = Kokkos::View; template using BucketEntityOffsetsViewTypeT = Kokkos::View; -template using FieldDataDeviceViewType = Kokkos::View; -template using FieldDataHostViewType = Kokkos::View; +template +using FieldDataDeviceViewType = Kokkos::View; +template +using FieldDataHostViewType = Kokkos::View; using FieldDataPointerHostViewType = Kokkos::View; using FieldDataPointerDeviceViewType = Kokkos::View; -template using UnmanagedHostInnerView = Kokkos::View>; -template using UnmanagedDevInnerView = Kokkos::View>; +template +using UnmanagedDevInnerView = Kokkos::View>; +template +using UnmanagedHostInnerView = Kokkos::View>; #ifdef STK_USE_DEVICE_MESH #define ORDER_INDICES(i,j) j,i diff --git a/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp b/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp index 3930bae52dec..341264f6ca51 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/SideSetHelper.cpp @@ -473,11 +473,11 @@ void SideSetHelper::add_sideset_entry_for_element_selected_by_sidesets(Entity en { if(mesh.bucket_ptr(entity) == nullptr) { return; } - const unsigned numSides = mesh.num_sides(entity); + const unsigned numSides = stk::mesh::num_sides(mesh, entity); if(sidesetsAndSelectors.size() > 0 && mesh.entity_rank(entity) == stk::topology::ELEM_RANK && numSides > 0) { - const stk::mesh::ConnectivityOrdinal* ordinals = mesh.begin_ordinals(entity, mesh.mesh_meta_data().side_rank()); - const stk::mesh::Entity* sides = mesh.begin(entity, mesh.mesh_meta_data().side_rank()); + const std::vector ordinals = stk::mesh::get_side_ordinals(mesh, entity); + const stk::mesh::EntityVector sides = stk::mesh::get_sides(mesh, entity); stk::mesh::SideSetEntry entry(entity); diff --git a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp index 12e5f4e0d4a4..b4a3875e5073 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.cpp @@ -38,10 +38,61 @@ #include "MetaData.hpp" #include "FieldRestriction.hpp" #include "stk_mesh/baseImpl/BucketRepository.hpp" +#include "stk_util/util/string_utils.hpp" namespace stk { namespace mesh { +const static std::string s_lastFieldModLocationPrefix = "DEBUG_lastFieldModLocation_"; + +namespace FieldSyncDebugger { + +void declare_field(stk::mesh::FieldBase& field) +{ + MetaData& meta = field.mesh_meta_data(); + if (meta.is_field_sync_debugger_enabled()) { + if (not stk::string_starts_with(field.name(), s_lastFieldModLocationPrefix)) { + meta.declare_field(field.entity_rank(), + s_lastFieldModLocationPrefix + field.name(), + field.number_of_states()); + } + } +} + +void declare_field_restriction(stk::mesh::FieldBase& field, const Part& part, + const unsigned numScalarsPerEntity, const unsigned firstDimension) +{ + MetaData& meta = field.mesh_meta_data(); + if (meta.is_field_sync_debugger_enabled()) { + if (not stk::string_starts_with(field.name(), s_lastFieldModLocationPrefix)) { + stk::mesh::FieldBase* lastModLocationField = meta.get_field(field.entity_rank(), + s_lastFieldModLocationPrefix + field.name()); + STK_ThrowRequire(lastModLocationField != nullptr); + std::vector initValue(numScalarsPerEntity, LastModLocation::HOST_OR_DEVICE); + meta.declare_field_restriction(*lastModLocationField, part, numScalarsPerEntity, firstDimension, + initValue.data()); + } + } +} + +void declare_field_restriction(stk::mesh::FieldBase& field, const Selector& selector, + const unsigned numScalarsPerEntity, const unsigned firstDimension) +{ + MetaData& meta = field.mesh_meta_data(); + if (meta.is_field_sync_debugger_enabled()) { + if (not stk::string_starts_with(field.name(), s_lastFieldModLocationPrefix)) { + stk::mesh::FieldBase* lastModLocationField = meta.get_field(field.entity_rank(), + s_lastFieldModLocationPrefix + field.name()); + STK_ThrowRequire(lastModLocationField != nullptr); + std::vector initValue(numScalarsPerEntity, LastModLocation::HOST_OR_DEVICE); + meta.declare_field_restriction(*lastModLocationField, selector, numScalarsPerEntity, firstDimension, + initValue.data()); + } + } +} + +} + StkFieldSyncDebugger::StkFieldSyncDebugger(const FieldBase* stkField) : m_stkField(stkField), m_isDataInitialized(false) @@ -133,7 +184,8 @@ StkFieldSyncDebugger::fill_last_mod_location_field_from_device() for (unsigned ordinal = 0; ordinal < bucket->size(); ++ordinal) { const Entity & entity = (*bucket)[ordinal]; const unsigned numComponents = field_scalars_per_entity(lastModLocationField, entity); - uint8_t * lastModLocation = reinterpret_cast(field_data(lastModLocationField, entity)); + uint8_t * lastModLocation = reinterpret_cast(field_data(lastModLocationField, + entity)); for (unsigned component = 0; component < numComponents; ++component) { const unsigned bucketOffset = ngpField.debug_get_bucket_offset(bucket->bucket_id()); lastModLocation[component] = m_debugFieldLastModification(bucketOffset, ORDER_INDICES(ordinal, component)); @@ -147,30 +199,10 @@ StkFieldSyncDebugger::get_last_mod_location_field() const { if (m_lastModLocationField == nullptr) { STK_ThrowRequire(impl::get_ngp_field(*m_stkField) != nullptr); - BulkData & bulk = m_stkField->get_mesh(); - MetaData & meta = bulk.mesh_meta_data(); - meta.enable_late_fields(); - FieldState state = m_stkField->state(); - FieldBase* fieldWithStateNew = m_stkField->field_state(stk::mesh::StateNew); - Field & lastModLocationField = - meta.declare_field(m_stkField->entity_rank(), - "DEBUG_lastFieldModLocation_"+fieldWithStateNew->name(), - m_stkField->number_of_states()); - - meta.set_mesh_on_fields(&bulk); - const FieldBase::RestrictionVector & fieldRestrictions = m_stkField->restrictions(); - if (not fieldRestrictions.empty()) { - for (const FieldBase::Restriction & restriction : fieldRestrictions) { - const unsigned numComponents = restriction.num_scalars_per_entity(); - std::vector initLastModLocation(numComponents, LastModLocation::HOST_OR_DEVICE); - put_field_on_mesh(lastModLocationField, restriction.selector(), numComponents, initLastModLocation.data()); - } - } - else { - bulk.reallocate_field_data(lastModLocationField); - } - - m_lastModLocationField = lastModLocationField.field_state(state); + MetaData& meta = m_stkField->mesh_meta_data(); + m_lastModLocationField = meta.get_field(m_stkField->entity_rank(), + s_lastFieldModLocationPrefix + m_stkField->name()); + STK_ThrowRequire(m_lastModLocationField != nullptr); } return *m_lastModLocationField; } diff --git a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp index b9c2be4e2189..184d77d1be88 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/StkFieldSyncDebugger.hpp @@ -48,6 +48,16 @@ namespace mesh { class Bucket; class FieldBase; +namespace FieldSyncDebugger { + +void declare_field(stk::mesh::FieldBase& field); +void declare_field_restriction(stk::mesh::FieldBase& field, const Part& part, + const unsigned numScalarsPerEntity, const unsigned firstDimension); +void declare_field_restriction(stk::mesh::FieldBase& field, const Selector& selector, + const unsigned numScalarsPerEntity, const unsigned firstDimension); + +} + class EmptyStkFieldSyncDebugger { public: diff --git a/packages/stk/stk_mesh/stk_mesh/base/Types.hpp b/packages/stk/stk_mesh/stk_mesh/base/Types.hpp index 4457df3f1796..ebf41a176298 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/Types.hpp +++ b/packages/stk/stk_mesh/stk_mesh/base/Types.hpp @@ -116,6 +116,7 @@ struct MeshIndex Bucket* bucket; unsigned bucket_ordinal; + STK_FUNCTION MeshIndex(Bucket *bucketIn, size_t ordinal) : bucket(bucketIn), bucket_ordinal(ordinal) {} }; diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp index f5fa92071b4a..a703517b4c6f 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketConnDynamic.hpp @@ -65,7 +65,7 @@ class BucketConnDynamic m_ordinals(), m_permutations(), m_numUnusedEntries(0), - m_compressionThreshold(0.5) + m_compressionThreshold(2) { STK_ThrowRequireMsg(bucketCapacity > 0, "BucketConnDynamic must have bucketCapacity strictly greater than 0"); } @@ -258,7 +258,7 @@ class BucketConnDynamic size_t total_num_connectivity() const { return m_connectivity.size() - m_numUnusedEntries; } size_t num_unused_entries() const { return m_numUnusedEntries; } - void compress_connectivity(unsigned suggestedCapacity = 0) + void compress_connectivity() { if (m_numUnusedEntries == 0) { return; @@ -356,9 +356,10 @@ class BucketConnDynamic Permutation perm = INVALID_PERMUTATION) { static constexpr unsigned minSizeHeuristic = 256; - if (total_num_connectivity() > minSizeHeuristic && (static_cast(m_numUnusedEntries)/total_num_connectivity()) > m_compressionThreshold) + if ((total_num_connectivity() > minSizeHeuristic) && + (total_num_connectivity() < m_numUnusedEntries*m_compressionThreshold)) { - compress_connectivity(total_num_connectivity()+m_numUnusedEntries/2); + compress_connectivity(); } grow_if_necessary(bktOrdinal); @@ -501,7 +502,7 @@ class BucketConnDynamic std::vector m_ordinals; std::vector m_permutations; unsigned m_numUnusedEntries; - double m_compressionThreshold; + int m_compressionThreshold; }; } // namespace impl diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp index 9f58a66dd2fc..afde826b7583 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.cpp @@ -37,10 +37,10 @@ #include // for operator new #include // for operator<<, etc #include // for runtime_error -#include // for Bucket, raw_part_equal +#include // for Bucket #include // for BulkData, etc #include -#include // for Partition, lower_bound +#include // for Partition, upper_bound #include #include #include "stk_mesh/base/BucketConnectivity.hpp" // for BucketConnectivity @@ -162,85 +162,31 @@ void BucketRepository::ensure_data_structures_sized() } } -//// -//// Note that we need to construct a key vector that the particular -//// format so we can use the lower_bound(..) function to lookup the -//// partition. Because we are using partitions now instead of -//// buckets, it should be possible to do without that vector and -//// instead do the lookup directly from the OrdinalVector. -//// - Partition *BucketRepository::get_or_create_partition( const EntityRank arg_entity_rank , const OrdinalVector &parts) { - const unsigned maxKeyTmpBufferSize = 64; - PartOrdinal keyTmpBuffer[maxKeyTmpBufferSize]; - OrdinalVector keyTmpVec; - - PartOrdinal* keyPtr = nullptr; - PartOrdinal* keyEnd = nullptr; - - fill_key_ptr(parts, &keyPtr, &keyEnd, maxKeyTmpBufferSize, keyTmpBuffer, keyTmpVec); - std::vector::iterator ik; - Partition* partition = get_partition(arg_entity_rank, parts, ik, keyPtr, keyEnd); + Partition* partition = get_partition(arg_entity_rank, parts, ik); if(partition == nullptr) { - partition = create_partition(arg_entity_rank, parts, ik, keyPtr, keyEnd); + partition = create_partition(arg_entity_rank, parts, ik); } return partition; } -void BucketRepository::fill_key_ptr(const OrdinalVector& parts, PartOrdinal** keyPtr, PartOrdinal** keyEnd, - const unsigned maxKeyTmpBufferSize, PartOrdinal* keyTmpBuffer, OrdinalVector& keyTmpVec) -{ - const size_t part_count = parts.size(); - - const size_t keyLen = 1 + part_count; - - *keyPtr = keyTmpBuffer; - *keyEnd = *keyPtr+keyLen; - - if (keyLen >= maxKeyTmpBufferSize) { - keyTmpVec.resize(keyLen); - *keyPtr = keyTmpVec.data(); - *keyEnd = *keyPtr+keyLen; - } - - //---------------------------------- - // Key layout: - // { part_count , { part_ordinals } } - // - (*keyPtr)[0] = part_count; - - for ( unsigned i = 0 ; i < part_count ; ++i ) { - (*keyPtr)[i+1] = parts[i]; - } -} - Partition *BucketRepository::get_partition(const EntityRank arg_entity_rank, const OrdinalVector &parts) { - PartOrdinal* keyPtr = nullptr; - PartOrdinal* keyEnd = nullptr; std::vector::iterator ik; - const unsigned maxKeyTmpBufferSize = 64; - PartOrdinal keyTmpBuffer[maxKeyTmpBufferSize]; - OrdinalVector keyTmpVec; - - fill_key_ptr(parts, &keyPtr, &keyEnd, maxKeyTmpBufferSize, keyTmpBuffer, keyTmpVec); - - return get_partition(arg_entity_rank, parts, ik, keyPtr, keyEnd); + return get_partition(arg_entity_rank, parts, ik); } Partition *BucketRepository::get_partition( const EntityRank arg_entity_rank , const OrdinalVector &parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd) + std::vector::iterator& ik) { STK_ThrowAssertMsg(m_mesh.mesh_meta_data().check_rank(arg_entity_rank), "Entity rank " << arg_entity_rank << " is invalid"); @@ -249,12 +195,12 @@ Partition *BucketRepository::get_partition( std::vector & partitions = m_partitions[ arg_entity_rank ]; - ik = lower_bound( partitions , keyPtr ); - const bool partition_exists = (ik != partitions.end()) && raw_part_equal( (*ik)->key() , keyPtr ); + ik = upper_bound( partitions , parts ); + const bool partition_exists = (ik != partitions.begin() && (ik[-1])->get_legacy_partition_id() == parts ); if (partition_exists) { - return *ik; + return ik[-1]; } return nullptr; @@ -263,11 +209,9 @@ Partition *BucketRepository::get_partition( Partition* BucketRepository::create_partition( const EntityRank arg_entity_rank, const OrdinalVector& parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd) + std::vector::iterator& ik) { - Partition *partition = new Partition(m_mesh, this, arg_entity_rank, keyPtr, keyEnd); + Partition *partition = new Partition(m_mesh, this, arg_entity_rank, parts.data(), parts.data()+parts.size()); STK_ThrowRequire(partition != nullptr); m_need_sync_from_partitions[arg_entity_rank] = true; @@ -411,8 +355,7 @@ Bucket *BucketRepository::allocate_bucket(EntityRank entityRank, unsigned initialCapacity, unsigned maximumCapacity) { - std::vector tmp(key.begin()+1,key.end()); - STK_ThrowAssertMsg(stk::util::is_sorted_and_unique(tmp,std::less()), + STK_ThrowAssertMsg(stk::util::is_sorted_and_unique(key,std::less()), "bucket created with 'key' vector that's not sorted and unique"); BucketVector &bucket_vec = m_buckets[entityRank]; const unsigned bucket_id = bucket_vec.size(); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp index 27366e248101..801eb0076cf8 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/BucketRepository.hpp @@ -120,15 +120,11 @@ class BucketRepository Partition *get_partition(const EntityRank arg_entity_rank , const OrdinalVector &parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd); + std::vector::iterator& ik); Partition *create_partition(const EntityRank arg_entity_rank , const OrdinalVector &parts, - std::vector::iterator& ik, - PartOrdinal* keyPtr, - PartOrdinal* keyEnd); + std::vector::iterator& ik); // For use by BulkData::internal_modification_end(). void internal_modification_end(); @@ -168,14 +164,9 @@ class BucketRepository void ensure_data_structures_sized(); - void fill_key_ptr(const OrdinalVector& parts, PartOrdinal** keyPtr, PartOrdinal** keyEnd, - const unsigned maxKeyTmpBufferSize, PartOrdinal* keyTmpBuffer, OrdinalVector& keyTmpVec); + BulkData & m_mesh ; - - BulkData & m_mesh ; // Associated Bulk Data Aggregate - - // Vector of bucket pointers by rank. This is now a cache and no longer the primary - // location of Buckets when USE_STK_MESH_IMPL_PARTITION is #defined. + // Vector of bucket pointers for each rank. std::vector< BucketVector > m_buckets ; std::vector > m_partitions; diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp index a1a0cbfd8afe..51feaff89523 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshImplUtils.cpp @@ -640,11 +640,11 @@ Entity connect_element_to_entity(BulkData & mesh, Entity elem, Entity entity, OrdinalVector entity_node_ordinals(entity_top.num_nodes()); elem_top.sub_topology_node_ordinals(mesh.entity_rank(entity), relationOrdinal, entity_node_ordinals.data()); - const stk::mesh::Entity *elem_nodes = mesh.begin_nodes(elem); + stk::mesh::EntityVector elem_nodes(mesh.begin_nodes(elem),mesh.end_nodes(elem)); EntityVector entity_top_nodes(entity_top.num_nodes()); - elem_top.sub_topology_nodes(elem_nodes, mesh.entity_rank(entity), relationOrdinal, entity_top_nodes.data()); + elem_top.sub_topology_nodes(elem_nodes.data(), mesh.entity_rank(entity), relationOrdinal, entity_top_nodes.data()); - Permutation perm = stk::mesh::find_permutation(mesh, elem_top, elem_nodes, entity_top, entity_top_nodes.data(), relationOrdinal); + Permutation perm = stk::mesh::find_permutation(mesh, elem_top, elem_nodes.data(), entity_top, entity_top_nodes.data(), relationOrdinal); OrdinalVector scratch1, scratch2, scratch3; @@ -676,10 +676,9 @@ Entity connect_element_to_entity(BulkData & mesh, Entity elem, Entity entity, if(0 == num_side_nodes) { Permutation node_perm = stk::mesh::Permutation::INVALID_PERMUTATION; - Entity const *elem_nodes_local = mesh.begin_nodes(elem); for(unsigned i = 0; i < entity_top.num_nodes(); ++i) { - Entity node = elem_nodes_local[entity_node_ordinals[i]]; + Entity node = elem_nodes[entity_node_ordinals[i]]; mesh.declare_relation(entity, node, i, node_perm, scratch1, scratch2, scratch3); } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp index 2566e59bb9ee..a1fa3b4eecd8 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/MeshModification.cpp @@ -55,8 +55,13 @@ bool MeshModification::modification_begin(const std::string description) const stk::mesh::FieldVector allFields = m_bulkData.mesh_meta_data().get_fields(); for (FieldBase * stkField : allFields) { stkField->sync_to_host(); - if (stkField->has_ngp_field()) { - impl::get_ngp_field(*stkField)->debug_modification_begin(); + } + + if (m_bulkData.mesh_meta_data().is_field_sync_debugger_enabled()) { + for (FieldBase * stkField : allFields) { + if (stkField->has_ngp_field()) { + impl::get_ngp_field(*stkField)->debug_modification_begin(); + } } } diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp index 7af403e25567..8f3f1d0f49ea 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/NgpMeshHostData.hpp @@ -53,11 +53,6 @@ template struct NgpMeshHostData : NgpMeshHostDataBase { typename EntityKeyViewTypeT::HostMirror hostEntityKeys; - typename BucketEntityOffsetsViewTypeT::HostMirror hostBucketEntityOffsets[stk::topology::NUM_RANKS]; - typename UnsignedViewTypeT::HostMirror hostEntityConnectivityOffset[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - typename EntityViewTypeT::HostMirror hostSparseConnectivity[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - typename OrdinalViewTypeT::HostMirror hostSparseConnectivityOrdinals[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; - typename PermutationViewTypeT::HostMirror hostSparsePermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS]; typename UnsignedViewTypeT::HostMirror hostVolatileFastSharedCommMapOffset[stk::topology::NUM_RANKS]; typename NgpCommMapIndicesT::HostMirror hostVolatileFastSharedCommMap[stk::topology::NUM_RANKS]; unsigned volatileFastSharedCommMapSyncCount = 0; diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp index cc95d6223656..8d9a0387d908 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.cpp @@ -326,7 +326,7 @@ stk::mesh::FieldVector get_fields_for_bucket(const stk::mesh::BulkData& mesh, void Partition::sort(const EntitySorterBase& sorter) { - std::vector partition_key = get_legacy_partition_id(); + const std::vector& partition_key = get_legacy_partition_id(); std::vector entities(m_size); @@ -540,7 +540,7 @@ stk::mesh::Bucket *Partition::get_bucket_for_adds() clear_pending_removes_by_filling_from_end(); if (no_buckets()) { - std::vector partition_key = get_legacy_partition_id(); + const std::vector& partition_key = get_legacy_partition_id(); Bucket *bucket = m_repository->allocate_bucket(m_rank, partition_key, m_repository->get_initial_bucket_capacity(), m_repository->get_maximum_bucket_capacity()); @@ -554,7 +554,7 @@ stk::mesh::Bucket *Partition::get_bucket_for_adds() if (bucket->size() == bucket->capacity()) { if (bucket->size() == m_repository->get_maximum_bucket_capacity()) { - std::vector partition_key = get_legacy_partition_id(); + const std::vector& partition_key = get_legacy_partition_id(); bucket = m_repository->allocate_bucket(m_rank, partition_key, m_repository->get_initial_bucket_capacity(), m_repository->get_maximum_bucket_capacity()); diff --git a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp index a10a754fb2a2..97916dc9525e 100644 --- a/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp +++ b/packages/stk/stk_mesh/stk_mesh/baseImpl/Partition.hpp @@ -209,35 +209,26 @@ class Partition std::ostream &operator<<(std::ostream &, const stk::mesh::impl::Partition &); -inline -bool partition_key_less( const unsigned * lhs , const unsigned * rhs ) -{ -// The following (very old) code is clever... So I'm adding some comments. -// -// A partition key is an array of unsigned, laid out like this: -// key[num-part-ordinals, first-part-ordinal, ..., last-part-ordinal] - - if (*lhs == *rhs) { //num-part-ordinals is equal for lhs and rhs... - const unsigned * const last_lhs = lhs + *lhs; - do { - ++lhs ; ++rhs ; - } while ( last_lhs != lhs && *lhs == *rhs ); - } - return *lhs < *rhs; -} - struct PartitionLess { - bool operator()( const Partition * lhs_Partition , const unsigned * rhs ) const - { return partition_key_less( lhs_Partition->key() , rhs ); } + bool operator()( const Partition * lhs_Partition , const OrdinalVector& rhs ) const + { + return lhs_Partition->get_legacy_partition_id().size() != rhs.size() ? + lhs_Partition->get_legacy_partition_id().size() < rhs.size() : + lhs_Partition->get_legacy_partition_id() < rhs; + } - bool operator()( const unsigned * lhs , const Partition * rhs_Partition ) const - { return partition_key_less( lhs , rhs_Partition->key() ); } + bool operator()( const OrdinalVector& lhs , const Partition * rhs_Partition ) const + { + return lhs.size() != rhs_Partition->get_legacy_partition_id().size() ? + lhs.size() < rhs_Partition->get_legacy_partition_id().size() : + lhs < rhs_Partition->get_legacy_partition_id(); + } }; inline std::vector::iterator -lower_bound( std::vector & v , const unsigned * key ) -{ return std::lower_bound( v.begin() , v.end() , key , PartitionLess() ); } +upper_bound( std::vector & v , const OrdinalVector& key ) +{ return std::upper_bound( v.begin() , v.end() , key , PartitionLess() ); } } // impl } // mesh diff --git a/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp b/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp index 4f19954f4e60..6953533e6049 100644 --- a/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp +++ b/packages/stk/stk_middle_mesh/stk_middle_mesh/abstract_cdt_interface.hpp @@ -10,6 +10,10 @@ namespace impl { class AbstractCDTInterface { + public: + virtual ~AbstractCDTInterface() = default; + + private: virtual void triangulate(const utils::impl::Projection& proj) = 0; }; diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp index bddbe88eb692..de5dea0daa9a 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/create_stk_mesh.cpp @@ -288,20 +288,20 @@ void StkMeshCreator::setup_edge_sharing(std::shared_ptr mesh, MeshFi constexpr unsigned maxNumEdgeNodes = 3; std::vector edgeNodes(maxNumEdgeNodes); std::vector edgeVerts(maxNumEdgeNodes); - + const std::vector& surfaceElems = mesh->get_elements(); for(const mesh::MeshEntityPtr& elem : surfaceElems) { if (elem) { const stk::mesh::SideSetEntry& ssetEntry = (*stkElsField)(elem, 0, 0); stk::mesh::Entity stkEl = ssetEntry.element; - + const bool stkElemIsFace = ssetEntry.side != stk::mesh::INVALID_CONNECTIVITY_ORDINAL; if (stkElemIsFace) { stkEl = stk::mesh::get_side_entity_for_elem_side_pair(bulk, stkEl, ssetEntry.side); } - + stk::topology stkTopo = bulk.bucket(stkEl).topology(); - + const stk::mesh::Entity* nodes = bulk.begin_nodes(stkEl); for(int dn=0; dncount_down(); ++dn) { @@ -309,7 +309,7 @@ void StkMeshCreator::setup_edge_sharing(std::shared_ptr mesh, MeshFi STK_ThrowRequire((edgeEnt && edgeEnt->get_type() == mesh::MeshEntityType::Edge)); edgeNodes.resize(edgeEnt->count_down()); stkTopo.edge_nodes(nodes, dn, edgeNodes.data()); - + edgeVerts.resize(edgeEnt->count_down()); for(int n=0; ncount_down(); ++n) { diff --git a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp index f66daa9cacae..075445307309 100644 --- a/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp +++ b/packages/stk/stk_middle_mesh_util/stk_middle_mesh_util/stk_field_copier.cpp @@ -28,7 +28,7 @@ stk::mesh::Field* StkFieldCopier::create_stk_field(mesh::FieldPtrmesh_meta_data_ptr(); stk::mesh::Field* stkField = &(metaData->declare_field(stk::topology::NODE_RANK, name)); - stk::mesh::put_field_on_mesh(*stkField, *m_part, middleMeshField->get_num_comp(), + stk::mesh::put_field_on_mesh(*stkField, *m_part, middleMeshField->get_num_comp(), middleMeshField->get_field_shape().get_num_nodes(0), 0); return stkField; @@ -38,7 +38,8 @@ void StkFieldCopier::copy(const stk::mesh::Field& stkField, mesh::FieldP { check_field_shapes(stkField, middleMeshFieldPtr); - stk::mesh::Selector selector(stkField); + auto meshMetaDataPtr = m_bulkDataPtr->mesh_meta_data_ptr(); + stk::mesh::Selector selector(stkField & (meshMetaDataPtr->locally_owned_part() | meshMetaDataPtr->globally_shared_part())); const stk::mesh::BucketVector& buckets = m_bulkDataPtr->get_buckets(stk::topology::NODE_RANK, selector); int numNodesPerEntity = middleMeshFieldPtr->get_field_shape().get_num_nodes(0); @@ -62,7 +63,8 @@ void StkFieldCopier::copy(const mesh::FieldPtr middleMeshFieldPtr, stk:: { check_field_shapes(stkField, middleMeshFieldPtr); - stk::mesh::Selector selector(stkField); + auto meshMetaDataPtr = m_bulkDataPtr->mesh_meta_data_ptr(); + stk::mesh::Selector selector(stkField & (meshMetaDataPtr->locally_owned_part() | meshMetaDataPtr->globally_shared_part())); const stk::mesh::BucketVector& buckets = m_bulkDataPtr->get_buckets(stk::topology::NODE_RANK, selector); int numNodesPerEntity = middleMeshFieldPtr->get_field_shape().get_num_nodes(0); @@ -109,7 +111,7 @@ void StkFieldCopier::check_field_shapes(const stk::mesh::Field& stkField std::string("Field shapes not compatible: stk field has ") + std::to_string(stk_field_dims.second) + " components per node, while the middle mesh field has " + std::to_string(meshField->get_num_comp()) ); - } + } } std::pair StkFieldCopier::get_field_shape_and_num_components(const stk::mesh::Field& stkField) diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp index 617d1d095b46..f318776a5239 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/GlobalReporter.hpp @@ -5,6 +5,12 @@ #include "stk_util/ngp/NgpSpaces.hpp" #include "NgpTestDeviceMacros.hpp" +// RDC is required for HIP build since registering a static global variable +// on an inline variable is not functional as of rocm 6.2.7 +#if defined(KOKKOS_ENABLE_HIP) && !defined(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) +#error "Kokkos_ENABLE_HIP_RELOCATABLE_DEVICE_CODE is required for HIP build" +#endif + namespace ngp_testing { template diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp index 0aac6f0aa609..605d54f699dc 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/NgpTestDeviceMacros.hpp @@ -6,10 +6,4 @@ #define NGP_TEST_FUNCTION KOKKOS_FUNCTION #define NGP_TEST_INLINE KOKKOS_INLINE_FUNCTION -#ifdef STK_ENABLE_GPU -#define NGP_TEST_DEVICE_ONLY __device__ -#else -#define NGP_TEST_DEVICE_ONLY -#endif - #endif diff --git a/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp b/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp index 0f25e3439677..744c2666c36e 100644 --- a/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp +++ b/packages/stk/stk_ngp_test/stk_ngp_test/ngp_test.hpp @@ -63,12 +63,6 @@ bool expect_near(const T a, const T b, const T tolerance) { #define NUM_TO_STR(x) NGP_TEST_STRINGIZE(x) #define LOCATION __FILE__ ":" NUM_TO_STR(__LINE__) -#ifdef __HIP_DEVICE_COMPILE__ -//FIXME: unsupported indirect call to function on HIP-Clang -#define NGP_EXPECT_TRUE(cond) -#define NGP_ASSERT_TRUE(cond) - -#else #define NGP_EXPECT_TRUE(cond) \ do { \ if (!(cond)) { \ @@ -83,7 +77,6 @@ bool expect_near(const T a, const T b, const T tolerance) { return; \ } \ } while (false) -#endif #define NGP_EXPECT_FALSE(cond) NGP_EXPECT_TRUE(!(cond)) #define NGP_ASSERT_FALSE(cond) NGP_ASSERT_TRUE(!(cond)) @@ -106,12 +99,6 @@ bool expect_near(const T a, const T b, const T tolerance) { #define NGP_EXPECT_GE(a, b) NGP_EXPECT_TRUE((a) >= (b)) #define NGP_ASSERT_GE(a, b) NGP_ASSERT_TRUE((a) >= (b)) -#ifdef __HIP_DEVICE_COMPILE__ -//FIXME: unsupported indirect call to function on HIP-Clang -#define NGP_EXPECT_NEAR(a, b, tolerance) -#define NGP_ASSERT_NEAR(a, b, tolerance) - -#else #define NGP_EXPECT_NEAR(a, b, tolerance) \ do { \ if (!::ngp_testing::internal::expect_near(a, b, tolerance)) { \ @@ -126,7 +113,6 @@ bool expect_near(const T a, const T b, const T tolerance) { return; \ } \ } while (false) -#endif namespace ngp_testing { diff --git a/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp b/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp index 42dbab9fc055..05a429c2a069 100644 --- a/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp +++ b/packages/stk/stk_performance_tests/stk_mesh/NgpMeshUpdate.cpp @@ -74,9 +74,14 @@ class NgpMeshChangeElementPartMembership : public stk::unit_test_util::MeshFixtu void batch_change_element_part_membership(int cycle) { + Kokkos::Profiling::pushRegion("BulkData::batch_change_entity_parts"); get_bulk().batch_change_entity_parts(stk::mesh::EntityVector{get_element(cycle)}, stk::mesh::PartVector{get_part()}, {}); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("get_updated_ngp_mesh"); stk::mesh::get_updated_ngp_mesh(get_bulk()); + Kokkos::Profiling::popRegion(); } private: @@ -232,9 +237,14 @@ TEST_F( NgpMeshChangeElementPartMembership, TimingBatch ) batchTimer.start_batch_timer(); setup_host_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + Kokkos::Profiling::pushRegion("batch_change_element_part_membership"); + for (int i = 0; i < NUM_ITERS; i++) { batch_change_element_part_membership(i); } + + Kokkos::Profiling::popRegion(); + batchTimer.stop_batch_timer(); reset_mesh(); } diff --git a/packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp b/packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp new file mode 100644 index 000000000000..44a7a65b42ce --- /dev/null +++ b/packages/stk/stk_performance_tests/stk_mesh/perfNgpFieldStateRotation.cpp @@ -0,0 +1,136 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +TEST(StkNgpField, multiStateRotation) +{ + stk::ParallelMachine comm = stk::parallel_machine_world(); + if (stk::parallel_machine_size(comm) > 1) { GTEST_SKIP(); } + + const unsigned NUM_RUNS = 5; + const unsigned NUM_ITERS = 3000; + std::string meshSpec = "generated:80x80x80"; + + std::cout << "Using mesh-spec: " << meshSpec << std::endl; + + stk::unit_test_util::BatchTimer batchTimer(comm); + + batchTimer.initialize_batch_timer(); + + std::unique_ptr bulkPtr = stk::mesh::MeshBuilder(comm) + .set_aura_option(stk::mesh::BulkData::NO_AUTO_AURA) + .set_spatial_dimension(3) + .create(); + + stk::mesh::MetaData& meta = bulkPtr->mesh_meta_data(); + const int numFieldStates = 3; + stk::mesh::Field& tensorField1 = meta.declare_field(stk::topology::ELEM_RANK, "tensorField1", numFieldStates); + stk::mesh::Field& tensorField2 = meta.declare_field(stk::topology::ELEM_RANK, "tensorField2", numFieldStates); + stk::mesh::Field& vectorField1 = meta.declare_field(stk::topology::ELEM_RANK, "vectorField1", numFieldStates); + stk::mesh::Field& vectorField2 = meta.declare_field(stk::topology::ELEM_RANK, "vectorField2", numFieldStates); + stk::mesh::put_field_on_mesh(tensorField1, meta.universal_part(), 9, nullptr); + stk::mesh::put_field_on_mesh(tensorField2, meta.universal_part(), 9, nullptr); + stk::mesh::put_field_on_mesh(vectorField1, meta.universal_part(), 3, nullptr); + stk::mesh::put_field_on_mesh(vectorField2, meta.universal_part(), 3, nullptr); + + stk::io::fill_mesh(meshSpec, *bulkPtr); + + Kokkos::Profiling::pushRegion("get_updated_ngp_mesh"); + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(*bulkPtr); + EXPECT_FALSE(ngpMesh.need_sync_to_host()); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("initialize fields"); + stk::ngp::ExecSpace execSpace; + constexpr double initValue1 = 1.14; + constexpr double initValue2 = 3.14; + for(int s=0; s(s); + stk::mesh::Field& tensorField1_state = tensorField1.field_of_state(state); + stk::mesh::Field& tensorField2_state = tensorField2.field_of_state(state); + stk::mesh::Field& vectorField1_state = vectorField1.field_of_state(state); + stk::mesh::Field& vectorField2_state = vectorField2.field_of_state(state); + stk::mesh::field_fill(initValue1, tensorField1_state, execSpace); + stk::mesh::field_fill(initValue2, tensorField2_state, execSpace); + stk::mesh::field_fill(initValue1, vectorField1_state, execSpace); + stk::mesh::field_fill(initValue2, vectorField2_state, execSpace); + } + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("multiStateRotation test"); + + for (unsigned j = 0; j < NUM_RUNS; j++) { + + batchTimer.start_batch_timer(); + + for(unsigned i=0; iupdate_field_data_states(rotateNgpFieldViews); + Kokkos::Profiling::popRegion(); + } + + batchTimer.stop_batch_timer(); + } + + Kokkos::Profiling::popRegion(); + batchTimer.print_batch_timing(NUM_ITERS); +} + +} diff --git a/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp b/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp index 3cc7847388fd..a4b6c074c663 100644 --- a/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp +++ b/packages/stk/stk_performance_tests/stk_search/VolumeToOne.cpp @@ -114,14 +114,18 @@ void run_volume_to_one_test_with_views(const std::string& meshFileName, stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + auto elemBoxesHost = Kokkos::create_mirror_view(elemBoxes); + Kokkos::deep_copy(elemBoxesHost, elemBoxes); Kokkos::View supersetBoxes("Range Boxes", 1); - supersetBoxes(0) = {elemBoxes[0].box, IdentProc(pRank, pRank)}; + auto supersetBoxesHost = Kokkos::create_mirror_view(supersetBoxes); + supersetBoxesHost(0) = {elemBoxesHost[0].box, IdentProc(pRank, pRank)}; - for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { - stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + for (unsigned i = 0; i != elemBoxesHost.extent(0); ++i) { + stk::search::add_to_box(supersetBoxesHost(0).box, elemBoxesHost(i).box); } + Kokkos::deep_copy(supersetBoxes, supersetBoxesHost); batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { @@ -219,14 +223,19 @@ void run_volume_to_one_test_local_with_views(const std::string& meshFileName, stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + auto elemBoxesHost = Kokkos::create_mirror_view(elemBoxes); + Kokkos::deep_copy(elemBoxesHost, elemBoxes); Kokkos::View supersetBoxes("Range Boxes", 1); - supersetBoxes(0) = {elemBoxes[0].box, stk::parallel_machine_rank(comm)}; - + auto supersetBoxesHost = Kokkos::create_mirror_view(supersetBoxes); + supersetBoxesHost(0) = {elemBoxesHost[0].box, stk::parallel_machine_rank(comm)}; + for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { - stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + stk::search::add_to_box(supersetBoxesHost(0).box, elemBoxesHost(i).box); } + Kokkos::deep_copy(supersetBoxes, supersetBoxesHost); + batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { Kokkos::View searchResults; @@ -259,14 +268,19 @@ void run_one_to_volume_test_local_with_views(const std::string& meshFileName, stk::io::fill_mesh_with_auto_decomp(meshFileName, *bulkPtr); Kokkos::View elemBoxes = createBoundingBoxesForEntities(*bulkPtr, stk::topology::ELEM_RANK); + auto elemBoxesHost = Kokkos::create_mirror_view(elemBoxes); + Kokkos::deep_copy(elemBoxesHost, elemBoxes); Kokkos::View supersetBoxes("Range Boxes", 1); - supersetBoxes(0) = {elemBoxes[0].box, stk::parallel_machine_rank(comm)}; + auto supersetBoxesHost = Kokkos::create_mirror_view(supersetBoxes); + supersetBoxesHost(0) = {elemBoxesHost[0].box, stk::parallel_machine_rank(comm)}; - for (unsigned i = 0; i != elemBoxes.extent(0); ++i) { - stk::search::add_to_box(supersetBoxes(0).box, elemBoxes(i).box); + for (unsigned i = 0; i != elemBoxesHost.extent(0); ++i) { + stk::search::add_to_box(supersetBoxesHost(0).box, elemBoxesHost(i).box); } + Kokkos::deep_copy(supersetBoxes, supersetBoxesHost); + batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { Kokkos::View searchResults; diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp index ff76f6ad9c5d..0b6adaae9421 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp @@ -628,6 +628,8 @@ struct UpdateInteriorNodeBVs KOKKOS_INLINE_FUNCTION void operator()(unsigned argIdx) const; + KOKKOS_INLINE_FUNCTION + void check_tree(unsigned argIdx) const; KOKKOS_FORCEINLINE_FUNCTION void get_box(RealType bvMinMax[6], LocalOrdinal idx, const bboxes_3d_view_amt &boxesMinMax) const; @@ -661,10 +663,71 @@ template void UpdateInteriorNodeBVs::apply(const MortonAabbTree &tree, ExecutionSpace const& execSpace) { const UpdateInteriorNodeBVs op(tree); - const size_t numLeaves = tree.hm_numLeaves(); + const size_t numLeaves = tree.hm_numLeaves(); auto policy = Kokkos::RangePolicy(execSpace, 0, numLeaves); - Kokkos::parallel_for(policy, op); + Kokkos::parallel_for("UpdateInteriorNodeBVs", policy, op); + Kokkos::parallel_for("check_tree", policy, KOKKOS_LAMBDA(const unsigned& argIdx){op.check_tree(argIdx);}); +} + +template +KOKKOS_INLINE_FUNCTION +void UpdateInteriorNodeBVs::check_tree(unsigned argIdx) const +{ + if (m_numLeaves > 1) { + LocalOrdinal idx = static_cast(argIdx); + + RealType bvMinMax[6]; + + LocalOrdinal parent = tm_nodeParents(idx); + RealType sibMinMax[6]; + + constexpr RealType tol = std::numeric_limits::epsilon(); + bool fixedBox = false; + + while (idx != parent) { + const LocalOrdinal parentIdx = parent - m_numLeaves; + + const bool boxIsAllZeros = ((m_nodeMinMaxs(parentIdx, 0) < tol) + &&(m_nodeMinMaxs(parentIdx, 1) < tol) + &&(m_nodeMinMaxs(parentIdx, 2) < tol) + &&(m_nodeMinMaxs(parentIdx, 3) < tol) + &&(m_nodeMinMaxs(parentIdx, 4) < tol) + &&(m_nodeMinMaxs(parentIdx, 5) < tol)); + if (boxIsAllZeros || fixedBox) { + const LocalOrdinal sib0 = tm_nodeChildren(parent, 0); + const LocalOrdinal sib1 = tm_nodeChildren(parent, 1); + + if (sib0 < m_numLeaves) { + get_stk_box(bvMinMax, sib0, m_leafMinMaxs); + } + else { + get_box(bvMinMax, sib0-m_numLeaves, m_nodeMinMaxs); + } + + if (sib1 < m_numLeaves) { + get_stk_box(sibMinMax, sib1, m_leafMinMaxs); + } + else { + get_box(sibMinMax, sib1-m_numLeaves, m_nodeMinMaxs); + } + + m_nodeMinMaxs(parentIdx, 0) = AABB_MIN(bvMinMax[0], sibMinMax[0]); + m_nodeMinMaxs(parentIdx, 1) = AABB_MIN(bvMinMax[1], sibMinMax[1]); + m_nodeMinMaxs(parentIdx, 2) = AABB_MIN(bvMinMax[2], sibMinMax[2]); + m_nodeMinMaxs(parentIdx, 3) = AABB_MAX(bvMinMax[3], sibMinMax[3]); + m_nodeMinMaxs(parentIdx, 4) = AABB_MAX(bvMinMax[4], sibMinMax[4]); + m_nodeMinMaxs(parentIdx, 5) = AABB_MAX(bvMinMax[5], sibMinMax[5]); + fixedBox = true; + } + + idx = parent; + parent = tm_nodeParents(parent); + if (idx == parent) { + return; + } + } + } } template diff --git a/packages/stk/stk_topology/stk_topology/topology_defn.hpp b/packages/stk/stk_topology/stk_topology/topology_defn.hpp index 319635eb70bf..e72e05cb7db2 100644 --- a/packages/stk/stk_topology/stk_topology/topology_defn.hpp +++ b/packages/stk/stk_topology/stk_topology/topology_defn.hpp @@ -76,13 +76,7 @@ void topology::sub_topology_node_ordinals(unsigned sub_rank, unsigned sub_ordina { case NODE_RANK: *output_ordinals = sub_ordinal; break; case EDGE_RANK: edge_node_ordinals(sub_ordinal, output_ordinals); break; - case FACE_RANK: - if (has_mixed_rank_sides() && sub_ordinal >= num_faces()) { - edge_node_ordinals(sub_ordinal - num_faces(), output_ordinals); - } else { - face_node_ordinals(sub_ordinal, output_ordinals); - } - break; + case FACE_RANK: face_node_ordinals(sub_ordinal, output_ordinals); break; default: break; } } @@ -95,13 +89,7 @@ void topology::sub_topology_nodes(const NodeArray & nodes, unsigned sub_rank, un { case NODE_RANK: *output_nodes = nodes[sub_ordinal]; break; case EDGE_RANK: edge_nodes(nodes, sub_ordinal, output_nodes); break; - case FACE_RANK: - if (has_mixed_rank_sides() && sub_ordinal >= num_faces()) { - edge_nodes(nodes, sub_ordinal - num_faces(), output_nodes); - } else { - face_nodes(nodes, sub_ordinal, output_nodes); - } - break; + case FACE_RANK: face_nodes(nodes, sub_ordinal, output_nodes); break; default: break; } } @@ -126,11 +114,7 @@ topology topology::sub_topology(unsigned sub_rank, unsigned sub_ordinal) const { case NODE_RANK: return NODE; case EDGE_RANK: return edge_topology(sub_ordinal); - case FACE_RANK: - if (has_mixed_rank_sides() && sub_ordinal >= num_faces()) { - return edge_topology(sub_ordinal - num_faces()); - } - return face_topology(sub_ordinal); + case FACE_RANK: return face_topology(sub_ordinal); default: break; } return INVALID_TOPOLOGY; @@ -140,22 +124,20 @@ template STK_INLINE_FUNCTION void topology::side_node_ordinals(unsigned side_ordinal, OrdinalOutputIterator output_ordinals) const { - if (has_mixed_rank_sides() && is_shell_side_ordinal(side_ordinal)) { - sub_topology_node_ordinals(EDGE_RANK, side_ordinal-num_faces(), output_ordinals); - } else { - sub_topology_node_ordinals( side_rank(), side_ordinal, output_ordinals); - } + auto fix_ordinal = has_mixed_rank_sides() && side_ordinal >= num_sub_topology(side_rank()); + auto adjusted_ordinal = (fix_ordinal) ? side_ordinal - num_sub_topology(side_rank()) : side_ordinal; + + sub_topology_node_ordinals(side_rank(side_ordinal), adjusted_ordinal, output_ordinals); } template STK_INLINE_FUNCTION void topology::side_nodes(const NodeArray & nodes, unsigned side_ordinal, NodeOutputIterator output_nodes) const { - if (has_mixed_rank_sides() && is_shell_side_ordinal(side_ordinal)) { - sub_topology_nodes( nodes, EDGE_RANK, side_ordinal-num_faces(), output_nodes); - } else { - sub_topology_nodes( nodes, side_rank(), side_ordinal, output_nodes); - } + auto fix_ordinal = has_mixed_rank_sides() && side_ordinal >= num_sub_topology(side_rank()); + auto adjusted_ordinal = (fix_ordinal) ? side_ordinal - num_sub_topology(side_rank()) : side_ordinal; + + sub_topology_nodes(nodes, side_rank(side_ordinal), adjusted_ordinal, output_nodes); } STK_INLINE_FUNCTION @@ -165,7 +147,7 @@ unsigned topology::num_sides() const if (side_rank() != INVALID_RANK) { num_sides_out = side_rank() > NODE_RANK ? num_sub_topology(side_rank()) : num_vertices(); - if (has_mixed_rank_sides()) { + if (has_mixed_rank_sides() && side_rank() > EDGE_RANK) { num_sides_out += num_sub_topology(EDGE_RANK); } } @@ -175,10 +157,10 @@ unsigned topology::num_sides() const STK_INLINE_FUNCTION topology topology::side_topology(unsigned side_ordinal) const { - if (has_mixed_rank_sides() && is_shell_side_ordinal(side_ordinal)) - return shell_side_topology(side_ordinal-num_faces()); + auto fix_ordinal = has_mixed_rank_sides() && side_ordinal >= num_sub_topology(side_rank()); + auto adjusted_ordinal = (fix_ordinal) ? side_ordinal - num_sub_topology(side_rank()) : side_ordinal; - return sub_topology(side_rank(), side_ordinal); + return sub_topology(side_rank(side_ordinal), adjusted_ordinal); } STK_INLINE_FUNCTION diff --git a/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp b/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp index 49ec7291e66f..383d4132448b 100644 --- a/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp +++ b/packages/stk/stk_transfer/stk_transfer/copy_by_id/SearchById.hpp @@ -50,6 +50,7 @@ class SearchById { using KeyToTargetProcessor = std::vector>; using MeshIDSet = std::set; + virtual ~SearchById() = default; virtual void intialize(const TransferCopyByIdMeshAdapter & mesha, const TransferCopyByIdMeshAdapter & meshb) =0; virtual void do_search(const TransferCopyByIdMeshAdapter & mesha, const TransferCopyByIdMeshAdapter & meshb, diff --git a/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp b/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp index 02edd92df1c3..1648da606e66 100644 --- a/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp +++ b/packages/stk/stk_transfer/stk_transfer/copy_by_id/TransferCopyTranslator.hpp @@ -111,6 +111,7 @@ class TranslatorBase TranslatorBase() {} virtual void translate(const void* srcAddr, unsigned srcDataByteSize, DataTypeKey::data_t destType, void* destAddr, unsigned destDataByteSize) const = 0; + virtual ~TranslatorBase() = default; }; struct TranslatorInfo diff --git a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp index 1f52825509ad..977e40aa4fca 100644 --- a/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp +++ b/packages/stk/stk_unit_test_utils/stk_unit_test_utils/TextMeshStkTopologyMapping.hpp @@ -45,7 +45,18 @@ struct StkTopologyMapEntry { bool operator!=(const StkTopologyMapEntry &rhs) const { return !(*this == rhs); } - int num_sides() const { return topology.num_sides(); } + int num_face_sides() const { + return 2; // FIXME: Number of stackable faces for a 3D shell is always 2 in STK + } + + int num_sides() const { + if (topology.is_shell()) { + if (topology.dimension() == 3) { + return num_face_sides(); // FIXME: Number of stackable faces for a 3D shell is always 2 in STK + } + } + return topology.num_sides(); + } bool valid_side(unsigned side) const { diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp index cd873a952e1c..a53ab4ac9fdb 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestGmeshFixture.cpp @@ -33,14 +33,12 @@ // #include // for to_string -#include // for Utils #include // for size_t -#include // for ostream +#include #include #include // for Field #include // for MetaData #include -#include // for allocator, operator+, etc #include // for vector #include "gtest/gtest.h" // for AssertHelper #include "stk_mesh/base/Types.hpp" // for PartVector @@ -49,9 +47,10 @@ enum { SpaceDim = 3 }; TEST(UnitTestGmeshFixture, testUnit) { + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 4) { GTEST_SKIP(); } const size_t num_x = 1; const size_t num_y = 2; - const size_t num_z = 3; + const size_t num_z = 4; const size_t num_surf = 6; std::string config_mesh = std::to_string(num_x) + "x" + std::to_string(num_y) + "x" + diff --git a/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp b/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp index c2957d78e73e..0a07a1462b7b 100644 --- a/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp +++ b/packages/stk/stk_unit_tests/stk_io/UnitTestReadFieldData.cpp @@ -33,6 +33,7 @@ // #include "gtest/gtest.h" // for AssertHelper, ASSERT_TRUE +#include #include // for is_part_io_part #include // for StkMeshIoBroker #include // for BulkData @@ -130,4 +131,40 @@ TEST(StkMeshIoBroker, missingInputField) { unlink(fieldDataFile.c_str()); } +TEST(StkMeshIoBroker, testMissingInputField) { + const std::string fieldDataFile = "testMeshWithMissingFieldData.e"; + std::vector transientTimeSteps = {0.0, 1.0, 2.0}; + std::string transientFieldName = "transient_field"; + + write_mesh_with_transient_field_data(fieldDataFile, transientTimeSteps, transientFieldName); + + std::unique_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + stk::mesh::MetaData& meta = bulk->mesh_meta_data(); + + const stk::mesh::EntityRank rank = stk::topology::NODE_RANK; + + const std::string fieldNameBad = transientFieldName+"_scalar_bad_field"; + stk::mesh::Field &scalarFieldBad = meta.declare_field(rank, fieldNameBad, 1); + stk::mesh::put_field_on_mesh(scalarFieldBad, meta.universal_part(), nullptr); + stk::io::MeshField meshFieldBad(&scalarFieldBad, fieldNameBad); + + const std::string fieldNameGood = transientFieldName+"_scalar"; + stk::mesh::Field &scalarFieldGood = meta.declare_field(rank, fieldNameGood, 1); + stk::mesh::put_field_on_mesh(scalarFieldGood, meta.universal_part(), nullptr); + stk::io::MeshField meshFieldGood(&scalarFieldGood, fieldNameGood); + + stk::io::StkMeshIoBroker broker(MPI_COMM_WORLD); + + broker.set_bulk_data(*bulk); + broker.add_mesh_database(fieldDataFile, stk::io::READ_MESH); + broker.create_input_mesh(); + + EXPECT_FALSE(stk::io::verify_field_request(broker, meshFieldBad)); + EXPECT_TRUE(stk::io::verify_field_request(broker, meshFieldGood)); + + broker.populate_bulk_data(); + + unlink(fieldDataFile.c_str()); +} + } diff --git a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp index dd064bd2b7a7..6aa92e3d9685 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/UnitTestBucket.cpp @@ -125,7 +125,7 @@ TEST(UnitTestingOfBucket, testBucket) std::stringstream out1_str; out1_str << (*b1); bool equal = (gold1 == out1_str.str()); - ASSERT_TRUE(equal); + ASSERT_TRUE(equal)<<"expected str="< bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +/* shell-tri-3 mesh: */ +/* 3 */ +/* * */ +/* /|\ */ +/* / | \ */ +/* 1* | *4 */ +/* \ | / */ +/* \|/ */ +/* * */ +/* 2 */ +/* */ + const std::string meshDesc = + "0,1,SHELL_TRI_3_ALL_FACE_SIDES, 1,2,3, block_1\n\ + 0,2,SHELL_TRI_3_ALL_FACE_SIDES, 2,4,3, block_1"; + + std::vector coords = {0,1,0, 1,0,0, 1,2,0, 2,1,0}; + +//FIXME! text-mesh doesn't recognize the all-face-sides topologies. + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); + + EXPECT_EQ(0u, stk::mesh::count_selected_entities(bulk->mesh_meta_data().universal_part(), bulk->buckets(stk::topology::FACE_RANK))); + + bulk->modification_begin(); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + const unsigned sideOrdinal = 3; + stk::mesh::PartVector emptySideParts; + stk::mesh::Entity side = bulk->declare_element_side(elem1, sideOrdinal, emptySideParts); + bulk->modification_end(); + + EXPECT_EQ(stk::topology::SHELL_SIDE_BEAM_2, bulk->bucket(side).topology()); +} + +void check_ordinal_and_permutation(const stk::mesh::BulkData& bulk, + stk::mesh::Entity elem, + stk::mesh::EntityRank rank, + const stk::mesh::EntityVector& sideNodes, + stk::mesh::ConnectivityOrdinal expectedSideOrdinal, + stk::mesh::Permutation expectedPerm) +{ + stk::mesh::OrdinalAndPermutation ordPerm = + stk::mesh::get_ordinal_and_permutation(bulk, elem, rank, sideNodes); + EXPECT_EQ(expectedSideOrdinal, ordPerm.first); + EXPECT_EQ(expectedPerm, ordPerm.second); +} + +TEST(DeclareElementSide, shell_tri_3_all_face_sides_no_elem_graph) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +/* shell-tri-3 mesh: */ +/* 3 */ +/* * */ +/* /|\ */ +/* / | \ */ +/* 1* | *4 */ +/* \ | / */ +/* \|/ */ +/* * */ +/* 2 */ +/* */ + bulk->modification_begin(); + + stk::mesh::Part& shellPart = bulk->mesh_meta_data().declare_part_with_topology("shell_part", stk::topology::SHELL_TRI_3_ALL_FACE_SIDES); + + stk::mesh::EntityId elemId = 1; + stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + elemId = 2; + nodeIds = {2, 4, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + bulk->modification_end(); + + EXPECT_EQ(0u, stk::mesh::count_selected_entities(bulk->mesh_meta_data().universal_part(), bulk->buckets(stk::topology::FACE_RANK))); + + bulk->modification_begin(); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + const unsigned sideOrdinal = 3; + stk::mesh::PartVector emptySideParts; + + stk::mesh::EntityVector sideNodes = { + bulk->get_entity(stk::topology::NODE_RANK, 2), + bulk->get_entity(stk::topology::NODE_RANK, 3) + }; + stk::mesh::ConnectivityOrdinal expectedSideOrdinal = sideOrdinal; + stk::mesh::Permutation expectedPerm = static_cast(0); + std::cout<<"checking elem1/sideNodes"<get_entity(stk::topology::ELEM_RANK, 2); + expectedSideOrdinal = 4; + stk::mesh::EntityVector reversedSideNodes = { + bulk->get_entity(stk::topology::NODE_RANK, 3), + bulk->get_entity(stk::topology::NODE_RANK, 2) + }; + expectedPerm = static_cast(0); + std::cout<<"checking elem2/reversedSideNodes"<(1); + std::cout<<"checking elem2/sideNodes"<declare_element_side(elem1, sideOrdinal, emptySideParts); + bulk->modification_end(); + + EXPECT_EQ(stk::topology::SHELL_SIDE_BEAM_2, bulk->bucket(side).topology()); + +//FIXME! +//The following expect should be '2u' but that fails because the side is only +//onnected to 1 element. (Note that this is the 'no-graph' version of this +//test, so the issue is not related to the face-adjacent-elem-graph.) + EXPECT_EQ(1u, bulk->num_connectivity(side, stk::topology::ELEM_RANK)); +} + +TEST(DeclareElementSide, shell_tri_3_all_face_sides_with_elem_graph) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +/* shell-tri-3 mesh: */ +/* 3 */ +/* * */ +/* /|\ */ +/* / | \ */ +/* 1* | *4 */ +/* \ | / */ +/* \|/ */ +/* * */ +/* 2 */ +/* */ + bulk->modification_begin(); + + stk::mesh::Part& shellPart = bulk->mesh_meta_data().declare_part_with_topology("shell_part", stk::topology::SHELL_TRI_3_ALL_FACE_SIDES); + + stk::mesh::EntityId elemId = 1; + stk::mesh::EntityIdVector nodeIds = {1, 2, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + elemId = 2; + nodeIds = {2, 4, 3}; + stk::mesh::declare_element(*bulk, shellPart, elemId, nodeIds); + + bulk->modification_end(); + + bulk->initialize_face_adjacent_element_graph(); + + EXPECT_EQ(0u, stk::mesh::count_selected_entities(bulk->mesh_meta_data().universal_part(), bulk->buckets(stk::topology::FACE_RANK))); + + bulk->modification_begin(); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + const unsigned sideOrdinal = 3; + stk::mesh::PartVector emptySideParts; + stk::mesh::Entity side = bulk->declare_element_side(elem1, sideOrdinal, emptySideParts); + bulk->modification_end(); + + EXPECT_EQ(stk::topology::SHELL_SIDE_BEAM_2, bulk->bucket(side).topology()); + EXPECT_EQ(2u, bulk->num_connectivity(side, stk::topology::ELEM_RANK)); +} + +TEST(GetSides, hex8) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + stk::io::fill_mesh("generated:1x1x1|sideset:xXyYzZ", *bulk); + + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + EXPECT_TRUE(bulk->is_valid(elem1)); + EXPECT_EQ(stk::topology::HEX_8, bulk->bucket(elem1).topology()); + + EXPECT_EQ(6u, stk::mesh::num_sides(*bulk, elem1)); + + stk::mesh::EntityVector sides = stk::mesh::get_sides(*bulk, elem1); + std::vector sideOrds = stk::mesh::get_side_ordinals(*bulk, elem1); + ASSERT_EQ(6u, sides.size()); + ASSERT_EQ(6u, sideOrds.size()); + EXPECT_EQ(stk::topology::FACE_RANK, bulk->entity_rank(sides[0])); + EXPECT_EQ(stk::topology::FACE_RANK, bulk->entity_rank(sides[1])); +} + +TEST(GetSides, textmesh_shell_quad_4_EdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + std::shared_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); //shell-quad-4 mesh: // 6 @@ -1165,7 +1359,18 @@ TEST(CreateAndWrite, DISABLED_textmesh_shell_quad_4_EdgeSides) stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); - stk::io::write_mesh("shellq4_edge_sides.g", *bulk); + stk::mesh::Entity elem1 = bulk->get_entity(stk::topology::ELEM_RANK, 1); + EXPECT_TRUE(bulk->is_valid(elem1)); + EXPECT_EQ(stk::topology::SHELL_QUAD_4, bulk->bucket(elem1).topology()); + + EXPECT_EQ(2u, stk::mesh::num_sides(*bulk, elem1)); + + stk::mesh::EntityVector sides = stk::mesh::get_sides(*bulk, elem1); + std::vector sideOrds = stk::mesh::get_side_ordinals(*bulk, elem1); + ASSERT_EQ(2u, sides.size()); + ASSERT_EQ(2u, sideOrds.size()); + EXPECT_EQ(stk::topology::EDGE_RANK, bulk->entity_rank(sides[0])); + EXPECT_EQ(stk::topology::EDGE_RANK, bulk->entity_rank(sides[1])); } TEST(CreateAndWrite, DISABLED_textmesh_shell_quad_4_FullExteriorSkin) @@ -1607,7 +1812,6 @@ TEST(Skinning, createSidesForShellQuad4Block) // 1*----*----*7 // 4 // - stk::mesh::Part& skinPart = bulk->mesh_meta_data().declare_part("mySkin"); const std::string meshDesc = "0,1,SHELL_QUAD_4, 1,4,5,2, block_1\n\ 0,2,SHELL_QUAD_4, 2,5,6,3, block_1\n\ @@ -1620,8 +1824,42 @@ TEST(Skinning, createSidesForShellQuad4Block) stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); + auto skinPart = bulk->mesh_meta_data().get_part("surface_1"); + EXPECT_EQ(0u, stk::mesh::count_entities(*bulk, stk::topology::FACE_RANK, *skinPart)); + EXPECT_EQ(8u, stk::mesh::count_entities(*bulk, stk::topology::EDGE_RANK, *skinPart)); +} + +TEST(Skinning, createSidesForShellQuad4BlockExposedBoundary) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + std::unique_ptr bulk = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); +//shell-quad-4 mesh: +// 6 +// 3*----*----*9 +// | E2 | E4 | +// | | | +// 2*---5*----*8 +// | E1 | E3 | +// | | | +// 1*----*----*7 +// 4 +// + stk::mesh::Part& skinPart = bulk->mesh_meta_data().declare_part("mySkin"); + const std::string meshDesc = + "0,1,SHELL_QUAD_4, 1,4,5,2, block_1\n\ + 0,2,SHELL_QUAD_4, 2,5,6,3, block_1\n\ + 0,3,SHELL_QUAD_4, 4,7,8,5, block_1\n\ + 0,4,SHELL_QUAD_4, 5,8,9,6, block_1|sideset:name=surface_1"; + + std::vector coords = {0,0,0, 0,1,0, 0,2,0, + 1,0,0, 1,1,0, 1,2,0, + 2,0,0, 2,1,0, 2,2,0}; + + stk::unit_test_util::setup_text_mesh(*bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords)); + stk::mesh::create_exposed_block_boundary_sides(*bulk, bulk->mesh_meta_data().universal_part(), stk::mesh::PartVector{&skinPart}); EXPECT_EQ(8u, stk::mesh::count_entities(*bulk, stk::topology::FACE_RANK, skinPart)); + EXPECT_EQ(0u, stk::mesh::count_entities(*bulk, stk::topology::EDGE_RANK, skinPart)); } TEST(Skinning, createSidesForShellQuad8Block) @@ -1768,3 +2006,132 @@ TEST(CreateAndConvert, read_write_shell_4_all_face_sides) unlink(fileName.c_str()); } + +class CreateReadAndWrite : public stk::unit_test_util::MeshFixture +{ + protected: + std::string get_meshspec_single_shell_quad4_with_all_sides() { + //shell-quad-4 mesh: + // + // 4*---3* + // | E1 | + // | | + // 1*---2* + // + // + const std::string meshDesc = + "0,1,SHELL_QUAD_4, 1,2,3,4, block_1\n\ + |sideset:name=surface_1; data=1,1, 1,2, 1,3, 1,4, 1,5, 1,6; split=topology"; + + std::vector coords = {0,0,0, 1,0,0, 1,1,0, 0,1,0}; + + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords); + } + + std::string get_meshspec_four_shell_quad4_with_sideset() { + //shell-quad-4 mesh: + // 6 + // 3*----*----*9 + // | E2 | E4 | + // | | | + // 2*---5*----*8 + // | E1 | E3 | + // | | | + // 1*----*----*7 + // 4 + // + const std::string meshDesc = + "0,1,SHELL_QUAD_4, 1,4,5,2, block_1\n\ + 0,2,SHELL_QUAD_4, 2,5,6,3, block_1\n\ + 0,3,SHELL_QUAD_4, 4,7,8,5, block_1\n\ + 0,4,SHELL_QUAD_4, 5,8,9,6, block_1\ + |sideset:name=surface_1; data=1,3, 3,3, 3,4, 4,4, 4,5, 2,5, 2,6, 1,6; split=topology"; + + std::vector coords = {0,0,0, 0,1,0, 0,2,0, + 1,0,0, 1,1,0, 1,2,0, + 2,0,0, 2,1,0, 2,2,0}; + + return stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coords); + } + + void create_1_shell_using_ioss_text_mesh(stk::mesh::BulkData& bulk) { + stk::io::fill_mesh("textmesh:" + get_meshspec_single_shell_quad4_with_all_sides(), bulk); + } + + void create_4_shells_using_stk_text_mesh(stk::mesh::BulkData& bulk) { + stk::unit_test_util::setup_text_mesh(bulk, get_meshspec_four_shell_quad4_with_sideset()); + } + + void create_4_shells_using_ioss_text_mesh(stk::mesh::BulkData& bulk) { + stk::io::fill_mesh("textmesh:" + get_meshspec_four_shell_quad4_with_sideset(), bulk); + } + + void check_mesh_properties(stk::mesh::BulkData& bulk, std::vector val) { + stk::mesh::EntityVector entities; + stk::mesh::get_entities(bulk, stk::topology::ELEM_RANK, entities); + + for (auto entity : entities) { + EXPECT_EQ(val[0], bulk.num_nodes(entity)) << bulk.entity_key(entity); + EXPECT_EQ(val[1], bulk.num_edges(entity)) << bulk.entity_key(entity); + EXPECT_EQ(val[2], bulk.num_faces(entity)) << bulk.entity_key(entity); + EXPECT_EQ(val[3], bulk.num_sides(entity)) << bulk.entity_key(entity); + } + + EXPECT_EQ(val[4], stk::mesh::count_selected_entities(bulk.mesh_meta_data().locally_owned_part(), bulk.buckets(stk::topology::EDGE_RANK))); + EXPECT_EQ(val[5], stk::mesh::count_selected_entities(bulk.mesh_meta_data().locally_owned_part(), bulk.buckets(stk::topology::FACE_RANK))); + } +}; + +TEST_F(CreateReadAndWrite, DISABLED_stk_textmesh_shell_quad_4_EdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk1 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + std::shared_ptr bulk2 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + std::string fileName("shell_quad4_edge_sides_test.g"); + create_4_shells_using_stk_text_mesh(*bulk1); + stk::io::write_mesh(fileName, *bulk1); + check_mesh_properties(*bulk1, {4, 2, 0, 2, 8, 0}); + + stk::io::fill_mesh(fileName, *bulk2); + check_mesh_properties(*bulk2, {4, 2, 0, 2, 8, 0}); + + unlink(fileName.c_str()); +} + +TEST_F(CreateReadAndWrite, ioss_textmesh_shell_quad_4_EdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk1 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + std::shared_ptr bulk2 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + std::string fileName("shell_quad4_edge_sides_test.g"); + create_4_shells_using_ioss_text_mesh(*bulk1); + stk::io::write_mesh(fileName, *bulk1); + check_mesh_properties(*bulk1, {4, 2, 0, 2, 8, 0}); + + stk::io::fill_mesh(fileName, *bulk2); + check_mesh_properties(*bulk2, {4, 2, 0, 2, 8, 0}); + + unlink(fileName.c_str()); +} + +TEST_F(CreateReadAndWrite, ioss_textmesh_shell_quad_4_FaceAndEdgeSides) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) { GTEST_SKIP(); } + + std::shared_ptr bulk1 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + std::shared_ptr bulk2 = stk::mesh::MeshBuilder(MPI_COMM_WORLD).set_spatial_dimension(3).create(); + + std::string fileName("shell_quad4_face_and_edge_sides_test.g"); + create_1_shell_using_ioss_text_mesh(*bulk1); + stk::io::write_mesh(fileName, *bulk1); + check_mesh_properties(*bulk1, {4, 4, 2, 6, 4, 2}); + + stk::io::fill_mesh(fileName, *bulk2); + check_mesh_properties(*bulk2, {4, 4, 2, 6, 4, 2}); + + unlink(fileName.c_str()); +} diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp index cb07bb52eb81..df30626384ae 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp @@ -43,8 +43,11 @@ #include #include -template using NgpDebugger = stk::mesh::NgpFieldSyncDebugger; -template using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; +template +using NgpDebugger = stk::mesh::NgpFieldSyncDebugger; + +template +using StkDebugger = typename NgpDebugger::StkFieldSyncDebuggerType; void extract_warning(std::string & stdoutString, int numExpectedOccurrences, const std::string & warningString); @@ -107,6 +110,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture stk::mesh::Selector & fieldParts, unsigned numStates = 1) { + get_meta().enable_field_sync_debugger(); const T init = 1; stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); stk::mesh::put_field_on_mesh(field, fieldParts, &init); @@ -119,6 +123,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture unsigned numComponents, stk::mesh::Selector & fieldParts) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 1; const std::vector init(numComponents, 1); stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); @@ -200,13 +205,13 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture template void initialize_ngp_field(stk::mesh::Field & stkField) { - stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::get_updated_ngp_field(stkField); } template void initialize_ngp_field(stk::mesh::FieldBase & stkField) { - stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::get_updated_ngp_field(stkField); } template @@ -393,7 +398,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture { const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); ngpField.sync_to_device(); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, selector, @@ -412,7 +417,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void write_vector_field_on_device(stk::mesh::FieldBase & stkField, const stk::mesh::Selector& selector, T value) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); ngpField.sync_to_device(); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, selector, @@ -434,7 +439,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void device_field_set_all(stk::mesh::Field & stkField, T value) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); ngpField.set_all(ngpMesh, value); } @@ -490,7 +495,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void read_field_on_device(stk::mesh::FieldBase & stkField, const stk::mesh::Selector& selector) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), selector); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -568,7 +573,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void read_field_on_device_using_entity_field_data(stk::mesh::Field & stkField) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), stkField); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -592,7 +597,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::MeshFixture void read_field_on_device_using_mesh_index(stk::mesh::Field & stkField) { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), stkField); stk::mesh::EntityRank rank = ngpField.get_rank(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp index e96770e3cbe3..712c0b47d652 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpMeshTest.cpp @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include #include @@ -47,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -96,18 +99,82 @@ class NgpMeshTest : public stk::mesh::fixtures::TestHexFixture numNodesVec.copy_device_to_host(); ASSERT_EQ(8u, numNodesVec[0]); } + + void run_edge_check(unsigned numExpectedEdgesPerElem) + { + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); + stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, get_meta().universal_part(), + KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& entityIndex) { + stk::mesh::ConnectedEntities edges = ngpMesh.get_edges(stk::topology::ELEM_RANK, entityIndex); + NGP_EXPECT_EQ(numExpectedEdgesPerElem, edges.size()); + } + ); + } + + void delete_edge_on_each_element() + { + get_bulk().modification_begin(); + + stk::mesh::Entity elem1 = get_bulk().get_entity(stk::topology::ELEM_RANK, 1); + stk::mesh::ConnectedEntities edges = get_bulk().get_connected_entities(elem1, stk::topology::EDGE_RANK); + stk::mesh::ConnectedEntities edgeElems = get_bulk().get_connected_entities(edges[0], stk::topology::ELEM_RANK); + EXPECT_EQ(1u, edgeElems.size()); + EXPECT_EQ(elem1, edgeElems[0]); + + const stk::mesh::ConnectivityOrdinal* edgeElemOrds = get_bulk().begin_ordinals(edges[0], stk::topology::ELEM_RANK); + stk::mesh::Entity edge = edges[0]; + EXPECT_TRUE(get_bulk().destroy_relation(elem1, edge, edgeElemOrds[0])); + EXPECT_TRUE(get_bulk().destroy_entity(edge)); + + stk::mesh::Entity elem2 = get_bulk().get_entity(stk::topology::ELEM_RANK, 2); + edges = get_bulk().get_connected_entities(elem2, stk::topology::EDGE_RANK); + EXPECT_EQ(12u, edges.size()); + edgeElems = get_bulk().get_connected_entities(edges[5], stk::topology::ELEM_RANK); + EXPECT_EQ(1u, edgeElems.size()); + EXPECT_EQ(elem2, edgeElems[0]); + edgeElemOrds = get_bulk().begin_ordinals(edges[5], stk::topology::ELEM_RANK); + edge = edges[5]; + EXPECT_TRUE(get_bulk().destroy_relation(elem2, edge, edgeElemOrds[0])); + EXPECT_TRUE(get_bulk().destroy_entity(edge)); + + get_bulk().modification_end(); + } }; -TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex) +NGP_TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex) { run_get_nodes_using_FastMeshIndex_test(); } -TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex_custom_NgpMemSpace) +NGP_TEST_F(NgpMeshTest, get_nodes_using_FastMeshIndex_custom_NgpMemSpace) { run_get_nodes_using_FastMeshIndex_test(); } +NGP_TEST_F(NgpMeshTest, hexes_with_edges_update_connectivity) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + + setup_mesh(1,1,2); + stk::mesh::get_updated_ngp_mesh(get_bulk()); + + stk::mesh::Part& edgePart = get_meta().declare_part("edges", stk::topology::EDGE_RANK); + + stk::mesh::create_edges(get_bulk(), get_meta().universal_part(), &edgePart); + stk::mesh::get_updated_ngp_mesh(get_bulk()); + + EXPECT_EQ(20u, stk::mesh::count_entities(get_bulk(), stk::topology::EDGE_RANK, edgePart)); + + unsigned numExpectedEdgesPerElement = 12; + run_edge_check(numExpectedEdgesPerElement); + + delete_edge_on_each_element(); + EXPECT_EQ(18u, stk::mesh::count_entities(get_bulk(), stk::topology::EDGE_RANK, edgePart)); + + numExpectedEdgesPerElement = 11; + run_edge_check(numExpectedEdgesPerElement); +} + class NgpMeshRankLimit : public stk::mesh::fixtures::TestHexFixture {}; TEST_F(NgpMeshRankLimit, tooManyRanksThrowWithMessage) @@ -247,6 +314,60 @@ NGP_TEST_F(NgpMeshTest, volatileFastSharedCommMap_custom_NgpMemSpace) } } +void test_ngp_permutations_1side_2perms(const stk::mesh::BulkData& mesh, + const stk::mesh::Part& sidePart) +{ + stk::mesh::NgpMesh& ngpMesh = stk::mesh::get_updated_ngp_mesh(mesh); + + stk::mesh::EntityRank sideRank = mesh.mesh_meta_data().side_rank(); + stk::mesh::EntityVector sides; + stk::mesh::get_entities(mesh, sideRank, sidePart, sides); + EXPECT_EQ(1u, sides.size()); + EXPECT_EQ(2u, mesh.num_connectivity(sides[0], stk::topology::ELEM_RANK)); + const stk::mesh::Permutation* hostPerms = mesh.begin_permutations(sides[0], stk::topology::ELEM_RANK); + stk::mesh::Permutation expectedPerm1 = hostPerms[0]; + stk::mesh::Permutation expectedPerm2 = hostPerms[1]; + + stk::mesh::for_each_entity_run(ngpMesh, sideRank, sidePart, + KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& sideIndex) { + stk::mesh::NgpMesh::Permutations perms = ngpMesh.get_permutations(sideRank, sideIndex, stk::topology::ELEM_RANK); + NGP_EXPECT_EQ(2u, perms.size()); + NGP_EXPECT_EQ(expectedPerm1, perms[0]); + NGP_EXPECT_EQ(expectedPerm2, perms[1]); + }); +} + +NGP_TEST(TestNgpMesh, permutations) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + + std::string meshDesc = + "0,1,TRI_3_2D,1,2,3,block_1\n" + "0,2,TRI_3_2D,2,4,3,block_2\n" + "|dimension:2|sideset:name=surface_1; data=1,2"; + + std::shared_ptr mesh = stk::mesh::MeshBuilder(MPI_COMM_WORLD) + .set_spatial_dimension(2).create(); + stk::unit_test_util::setup_text_mesh(*mesh, meshDesc); + + stk::mesh::EntityRank sideRank = mesh->mesh_meta_data().side_rank(); + stk::mesh::Part* sidePart = mesh->mesh_meta_data().get_part("surface_1"); + STK_ThrowAssertMsg(sidePart != nullptr, "failed to find part for surface_1"); + + stk::mesh::EntityVector sides; + stk::mesh::get_entities(*mesh, sideRank, *sidePart, sides); + EXPECT_EQ(1u, sides.size()); + EXPECT_EQ(2u, mesh->num_connectivity(sides[0], stk::topology::ELEM_RANK)); + + stk::mesh::Permutation expectedPerm1 = static_cast(0); + stk::mesh::Permutation expectedPerm2 = static_cast(1); + const stk::mesh::Permutation* permutations = mesh->begin_permutations(sides[0], stk::topology::ELEM_RANK); + EXPECT_EQ(expectedPerm1, permutations[0]); + EXPECT_EQ(expectedPerm2, permutations[1]); + + test_ngp_permutations_1side_2perms(*mesh, *sidePart); +} + namespace { double reduce_on_host(stk::mesh::BulkData& bulk) { @@ -279,6 +400,27 @@ TEST(NgpHostMesh, FieldForEachEntityReduceOnHost_fromTylerVoskuilen) EXPECT_EQ(1.0, maxZ); } +TEST(NgpDeviceMesh, dont_let_stacksize_get_out_of_control) +{ + constexpr size_t tol = 50; + +#ifdef SIERRA_MIGRATION + constexpr size_t expectedBulkDataSize = 1320; +#else + constexpr size_t expectedBulkDataSize = 1256; +#endif + EXPECT_NEAR(expectedBulkDataSize, sizeof(stk::mesh::BulkData), tol); + + constexpr size_t expectedBucketSize = 1120; + EXPECT_NEAR(expectedBucketSize, sizeof(stk::mesh::Bucket), tol); + + constexpr size_t expectedDeviceMeshSize = 472; + EXPECT_NEAR(expectedDeviceMeshSize, sizeof(stk::mesh::DeviceMesh), tol); + + constexpr size_t expectedDeviceBucketSize = 264; + EXPECT_NEAR(expectedDeviceBucketSize, sizeof(stk::mesh::DeviceBucket), tol); +} + void add_elements(std::unique_ptr& bulk) { stk::mesh::MetaData& meta = bulk->mesh_meta_data(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp index 4c869fb317de..89ef24dfc2ca 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgp.cpp @@ -16,7 +16,7 @@ namespace { -using IntDualViewType = Kokkos::DualView; +using UnsignedDualViewType = Kokkos::DualView; void test_view_of_fields(const stk::mesh::BulkData& bulk, stk::mesh::Field& field1, @@ -39,19 +39,19 @@ void test_view_of_fields(const stk::mesh::BulkData& bulk, Kokkos::deep_copy(fields, hostFields); unsigned numResults = 2; - IntDualViewType result = ngp_unit_test_utils::create_dualview("result",numResults); + UnsignedDualViewType result = ngp_unit_test_utils::create_dualview("result",numResults); Kokkos::parallel_for(stk::ngp::DeviceRangePolicy(0, 2), KOKKOS_LAMBDA(const unsigned& i) { - result.d_view(i) = fields(i).get_ordinal() == i ? 1 : 0; + result.d_view(i) = fields(i).get_ordinal(); }); - result.modify(); - result.sync(); + result.modify(); + result.sync(); - EXPECT_EQ(1, result.h_view(0)); - EXPECT_EQ(1, result.h_view(1)); + EXPECT_EQ(hostFields(0).get_ordinal(), result.h_view(0)); + EXPECT_EQ(hostFields(1).get_ordinal(), result.h_view(1)); #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) for (unsigned i = 0; i < 2; ++i) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp index 78a62cadccb6..2ac1ae71994c 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync.cpp @@ -172,8 +172,8 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture } } - template class NgpDebugger = stk::mesh::DefaultNgpFieldSyncDebugger> - void write_scalar_host_field_on_device(stk::mesh::HostField & hostField, T value) + template class NgpDebugger = stk::mesh::DefaultNgpFieldSyncDebugger> + void write_scalar_host_field_on_device(stk::mesh::HostField & hostField, T value) { const int component = 0; stk::mesh::HostMesh hostMesh(get_bulk()); @@ -205,7 +205,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const int component = 1; // Just write to the second component stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), meta.locally_owned_part()); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -226,7 +226,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const int component = 1; // Just write to the second component stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, meta.locally_owned_part(), KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& entity) { @@ -269,7 +269,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), meta.locally_owned_part()); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -334,6 +334,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture template stk::mesh::Field & create_scalar_field(stk::topology::rank_t rank, const std::string & name) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 1; const T init = 1; stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); @@ -344,6 +345,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture template stk::mesh::Field & create_scalar_multistate_field(stk::topology::rank_t rank, const std::string & name) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 2; const T init = 1; stk::mesh::Field & field = get_meta().declare_field(rank, name, numStates); @@ -354,6 +356,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture template stk::mesh::Field & create_vector_field(stk::topology::rank_t rank, const std::string & name) { + get_meta().enable_field_sync_debugger(); unsigned numStates = 1; unsigned numScalarsPerEntity = 3; const T init[] = {1, 2, 3}; @@ -375,6 +378,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const std::vector> & numElemsInEachPart) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Field & stkField = create_scalar_field(stk::topology::ELEM_RANK, fieldName); create_parts(numElemsInEachPart); @@ -395,6 +399,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const std::vector> & numElemsInEachPart) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Field & stkField = create_scalar_field(stk::topology::ELEM_RANK, fieldName); create_parts(numElemsInEachPart); @@ -416,6 +421,7 @@ class NgpDebugFieldSync : public NgpDebugFieldSyncFixture const std::vector> & numElemsInEachPart) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Field & stkField = create_vector_field(stk::topology::ELEM_RANK, fieldName); create_parts(numElemsInEachPart); @@ -2237,7 +2243,7 @@ TEST_F(NgpDebugFieldSync, ForcedDebugger_HostField_UsageNotProblematic_UsingEnti { if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; stk::mesh::Field & stkField = build_mesh_with_scalar_field("doubleScalarField", {{2, "Part1"}}); - stk::mesh::HostField hostField(get_bulk(), stkField); + stk::mesh::HostField hostField(get_bulk(), stkField); testing::internal::CaptureStdout(); write_scalar_host_field_on_device(hostField, 3.14); @@ -2251,7 +2257,7 @@ TEST_F(NgpDebugFieldSync, ForcedDebugger_HostField_UsageNotProblematic_UsingBuck { if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; stk::mesh::Field & stkField = build_mesh_with_scalar_field("doubleScalarField", {{2, "Part1"}}); - stk::mesh::HostField hostField(get_bulk(), stkField); + stk::mesh::HostField hostField(get_bulk(), stkField); testing::internal::CaptureStdout(); write_scalar_host_field_on_device(hostField, 3.14); @@ -2267,6 +2273,7 @@ class NgpDebugFieldSync_SeparateFieldRestrictions : public NgpDebugFieldSyncFixt void setup_mesh_and_field_with_multiple_restrictions(const std::string& fieldName) { setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + get_meta().enable_field_sync_debugger(); stk::mesh::Part& part1 = get_meta().declare_part_with_topology("Part1", stk::topology::HEX_8); stk::mesh::Part& part2 = get_meta().declare_part_with_topology("Part2", stk::topology::HEX_8); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp index 815c3f86c40b..e8cf1e4bb8d8 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_AccessDuringMeshModification.cpp @@ -102,7 +102,7 @@ class NgpDebugFieldSync_AccessDuringMeshModification : public NgpDebugFieldSyncF { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); const stk::mesh::MetaData & meta = get_bulk().mesh_meta_data(); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), meta.locally_owned_part()); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -1372,7 +1372,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_C declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part2", "Part1"}}, stkField, 3.14); @@ -1392,7 +1392,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_C declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element_with_scalar_field_write_using_entity({{3, "Part1"}, {4, "Part1"}}, stkField, 3.14); @@ -1413,7 +1413,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_D declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element_with_scalar_field_write_using_entity({2}, stkField, 3.14); @@ -1433,7 +1433,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_M declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part2", "Part1"}}, stkField, 3.14); @@ -1459,7 +1459,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_C declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element_with_scalar_field_write_using_entity({{3, "Part1"}, {4, "Part1"}}, stkField, 3.14); @@ -1486,7 +1486,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_D declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element_with_scalar_field_write_using_entity({2}, stkField, 3.14); @@ -2111,7 +2111,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, TwoConsecutiveMods_Change declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -2133,7 +2133,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, TwoConsecutiveMods_Create declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element_with_scalar_field_write_using_entity({{3, "Part1"}, {4, "Part1"}}, stkField, 3.14); @@ -2155,7 +2155,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, TwoConsecutiveMods_Delete declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element_with_scalar_field_write_using_entity({2}, stkField, 3.14); @@ -2446,7 +2446,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_T declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -2469,7 +2469,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_T declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); const stk::mesh::EntityId maxIdToRead = 1; // Avoid memory corruption due to accessing old Field after new bucket allocation @@ -2493,7 +2493,7 @@ TEST_F(NgpDebugFieldSync_AccessDuringMeshModification, ScalarAccessUsingEntity_T declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp index a4064c4fa6fc..d7784e7b2e12 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_MeshModification.cpp @@ -389,7 +389,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, ChangeBucket_MissingDeviceFieldUpdate declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part2", "Part1"}}); @@ -411,7 +411,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, CreateBucket_MissingDeviceFieldUpdate declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -434,7 +434,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, DeleteBucket_MissingDeviceFieldUpdate declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -455,7 +455,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, ModifyBucket_StaleDeviceFieldCopy_Acc declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part2", "Part1"}}); @@ -482,7 +482,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, CreateBucket_StaleDeviceFieldCopy_Acc declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -510,7 +510,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, DeleteBucket_StaleDeviceFieldCopy_Acc declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -536,7 +536,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, ModifyBucket_StaleDeviceFieldCopy_Cle declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part2", "Part1"}}); @@ -563,7 +563,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, CreateBucket_StaleDeviceFieldCopy_Cle declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -591,7 +591,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, DeleteBucket_StaleDeviceFieldCopy_Cle declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 2}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -1138,7 +1138,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoConsecutiveMods_ChangeBucket_Chang declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -1162,7 +1162,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoConsecutiveMods_CreateBucket_Creat declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); create_element({{3, "Part1"}}, stkField); @@ -1186,7 +1186,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoConsecutiveMods_DeleteBucket_Delet declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); delete_element({2}); @@ -1788,7 +1788,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoMods_ChangeBucket_ChangeBucket_Mis declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); @@ -1813,7 +1813,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoMods_CreateBucket_CreateBucket_Mis declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 1}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); const stk::mesh::EntityId maxIdToRead = 1; // Avoid memory corruption due to accessing old Field after new bucket allocation @@ -1839,7 +1839,7 @@ TEST_F(NgpDebugFieldSync_MeshModification, TwoMods_DeleteBucket_DeleteBucket_Mis declare_scalar_field("doubleScalarField", {"Part1", "Part2"}); build_mesh({{"Part1", 3}, {"Part2", 1}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp index 8c7e854e1c9e..455fe72e0f43 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpDebugFieldSync_PartialAllocation.cpp @@ -45,7 +45,7 @@ class NgpDebugFieldSync_PartialAllocation : public NgpDebugFieldSyncFixture { const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::mesh::Selector fieldSelector(stkField); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, fieldSelector, @@ -64,7 +64,7 @@ class NgpDebugFieldSync_PartialAllocation : public NgpDebugFieldSyncFixture { const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); stk::NgpVector bucketIds = ngpMesh.get_bucket_ids(stkField.entity_rank(), stkField); stk::mesh::EntityRank rank = ngpField.get_rank(); @@ -1324,6 +1324,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, EmptyField_MeshModification_Properly if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) return; setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); create_parts({"Part1", "Part2", "Part3"}); + get_meta().enable_field_sync_debugger(); get_meta().declare_field(stk::topology::ELEM_RANK, "doubleScalarField", 1); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); @@ -1560,7 +1561,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}); @@ -1582,7 +1583,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}, stkField, 3.14); @@ -1603,7 +1604,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}); @@ -1628,7 +1629,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 2}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); batch_modify_element_part_membership({{3, "Part3", "Part2"}}); @@ -1654,7 +1655,7 @@ TEST_F(NgpDebugFieldSync_PartialAllocation, SecondBlock_ScalarAccessUsingEntity_ declare_scalar_field("doubleScalarField", {"Part2", "Part3"}); build_mesh({{"Part1", 1}, {"Part2", 1}, {"Part3", 3}}); stk::mesh::Field & stkField = initialized_field("doubleScalarField"); - stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); + stk::mesh::NgpField ngpFieldCopy = stk::mesh::get_updated_ngp_field(stkField); testing::internal::CaptureStdout(); modify_element_part_membership_with_scalar_field_write_using_entity({{2, "Part1", "Part2"}, {3, "Part1", "Part3"}}, stkField, 3.14); diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp index 7e70384d89f9..a9edbc02b662 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestNgpMeshModification.cpp @@ -441,4 +441,33 @@ TEST_F(NgpBatchChangeEntityParts, failedHostAccessAfterDeviceMeshMod) } } +TEST_F(NgpBatchChangeEntityParts, impl_addPartToNode_ngpDevice) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 1) GTEST_SKIP(); + + build_empty_mesh(1, 1); + + stk::mesh::Part & part1 = m_meta->declare_part_with_topology("part1", stk::topology::NODE); + stk::mesh::Part & part2 = m_meta->declare_part_with_topology("part2", stk::topology::NODE); + const unsigned nodeId = 1; + const stk::mesh::Entity node1 = create_node(*m_bulk, nodeId, {&part1}); + check_bucket_layout(*m_bulk, {{{"part1"}, {nodeId}}}, stk::topology::NODE_RANK); + + DeviceEntitiesType entities("deviceEntities", 1); + DevicePartOrdinalsType addPartOrdinals("deviceAddParts", 1); + DevicePartOrdinalsType removePartOrdinals("deviceRemoveParts", 0); + + stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(*m_bulk); + fill_device_views_add_remove_part_from_node(entities, addPartOrdinals, removePartOrdinals, ngpMesh, + node1, &part2, nullptr); + + ngpMesh.impl_batch_change_entity_parts(entities, addPartOrdinals, removePartOrdinals); +// confirm_host_mesh_is_not_synchronized_from_device(ngpMesh); +// +// ngpMesh.sync_to_host(); +// confirm_host_mesh_is_synchronized_from_device(ngpMesh); +// +// check_bucket_layout(*m_bulk, {{{"part1", "part2"}, {nodeId}}}, stk::topology::NODE_RANK); +} + } // namespace diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp index 6031e1b1ac84..d0b61df0ec13 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/UnitTestTransposePinnedMapped.cpp @@ -108,7 +108,7 @@ class TestTranspose : public ::testing::Test void setup_views(unsigned numBuckets, double overallocationFactor) { - deviceFieldData = stk::mesh::FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "deviceFieldData"), numBuckets, ORDER_INDICES(bucketCapacity, numPerEntity)); + deviceFieldData = stk::mesh::FieldDataDeviceViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "deviceFieldData"), numBuckets, ORDER_INDICES(bucketCapacity, numPerEntity)); goldHostFieldData = stk::mesh::FieldDataHostViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "goldHostFieldData"), numBuckets, ORDER_INDICES(bucketCapacity,numPerEntity)); fill_gold_host_field_data(numBuckets); @@ -171,7 +171,7 @@ class TestTranspose : public ::testing::Test stk::mesh::FieldDataPointerHostViewType hostBucketPtrData; stk::mesh::FieldDataPointerDeviceViewType deviceBucketPtrData; - stk::mesh::FieldDataDeviceViewType deviceFieldData; + stk::mesh::FieldDataDeviceViewType deviceFieldData; stk::mesh::FieldDataHostViewType goldHostFieldData; stk::mesh::UnsignedViewType deviceBucketSizes; diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp index 8cd3fb2dafad..40fa498d66fb 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp @@ -1768,6 +1768,7 @@ TEST_F(NgpFieldFixture, LateFieldUsage) get_meta().enable_late_fields(); stk::mesh::Field & stkLateIntField = create_field(stk::topology::ELEM_RANK, "lateIntField"); + initialize_ngp_field(stkIntField); // Must update early fields after adding late field initialize_ngp_field(stkLateIntField); int multiplier = 2; @@ -2039,17 +2040,6 @@ TEST(DeviceField, checkSizeof) EXPECT_TRUE(sizeof(stk::mesh::DeviceField) <= expectedNumBytes); } -TEST(DeviceBucket, checkSizeof) -{ -#ifndef STK_HIDE_DEPRECATED_CODE // Delete after 2024/06/26 - size_t expectedNumBytes = 176; -#else - size_t expectedNumBytes = 152; // Value after removing DeviceBucket::m_hostEntities -#endif - std::cout << "sizeof(stk::mesh::DeviceBucket): " << sizeof(stk::mesh::DeviceBucket) << std::endl; - EXPECT_TRUE(sizeof(stk::mesh::DeviceBucket) <= expectedNumBytes); -} - enum PartIds : int { part_1 = 1, @@ -2632,4 +2622,76 @@ TEST_F(NgpFieldUpdate, MoveBackwardForwardBackward) check_field_values(); } +class NgpFieldExecSpaceTestFixture : public stk::unit_test_util::MeshFixture +{ +public: + void setup_empty_mesh_and_field() + { + setup_empty_mesh(stk::mesh::BulkData::NO_AUTO_AURA); + + const std::vector init(1, 1); + stk::mesh::Field& field = get_meta().declare_field(stk::topology::ELEM_RANK, "", 1); + stk::mesh::put_field_on_mesh(field, get_meta().universal_part(), 1, init.data()); + } + + auto get_default_field() + { + return get_meta().get_field(stk::topology::ELEM_RANK, ""); + } +}; + +TEST_F(NgpFieldExecSpaceTestFixture, CheckValidMemSpace) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + setup_empty_mesh_and_field(); + auto field = get_default_field(); + + EXPECT_NO_THROW((stk::mesh::get_updated_ngp_field(*field))); + + EXPECT_NO_THROW((stk::mesh::get_updated_ngp_field(*field))); + +#ifdef STK_ENABLE_GPU + EXPECT_ANY_THROW( +#else + EXPECT_NO_THROW( +#endif + (stk::mesh::get_updated_ngp_field(*field))); +} + +TEST_F(NgpFieldExecSpaceTestFixture, CheckSameMemSpace) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + setup_empty_mesh_and_field(); + auto field = get_default_field(); + + auto& ngpField1 = stk::mesh::get_updated_ngp_field(*field); + auto& ngpField2 = stk::mesh::get_updated_ngp_field(*field); + + EXPECT_TRUE((std::is_same_v::MemSpace, stk::mesh::NgpMeshDefaultMemSpace>)); + EXPECT_TRUE((std::is_same_v::MemSpace, std::remove_reference_t::MemSpace>)); +} + +TEST_F(NgpFieldExecSpaceTestFixture, UseNonDefaultMemSpace) +{ + if (get_parallel_size() != 1) GTEST_SKIP(); + setup_empty_mesh_and_field(); + auto field = get_default_field(); + + EXPECT_NO_THROW((stk::mesh::get_updated_ngp_field(*field))); + +#ifdef STK_ENABLE_GPU + EXPECT_ANY_THROW( +#else + EXPECT_NO_THROW( +#endif + (stk::mesh::get_updated_ngp_field(*field))); + +#ifdef STK_ENABLE_GPU + EXPECT_ANY_THROW( +#else + EXPECT_NO_THROW( +#endif + (stk::mesh::get_updated_ngp_field(*field))); +} + } // namespace ngp_field_test diff --git a/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp b/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp index ec39418446e8..0089d3514845 100644 --- a/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp +++ b/packages/stk/stk_unit_tests/stk_middle_mesh_util/test_stk_field_copier.cpp @@ -74,7 +74,8 @@ void check_field(std::shared_ptr bulkDataPtr, stk::mesh::Fi { const stk::mesh::FieldBase& coordField = *(bulkDataPtr->mesh_meta_data_ptr()->coordinate_field()); - stk::mesh::Selector selector(field); + auto meshMetaDataPtr = bulkDataPtr->mesh_meta_data_ptr(); + stk::mesh::Selector selector(field & (meshMetaDataPtr->locally_owned_part() | meshMetaDataPtr->globally_shared_part())); const stk::mesh::BucketVector& buckets = bulkDataPtr->get_buckets(stk::topology::NODE_RANK, selector); for (stk::mesh::Bucket* bucket : buckets) @@ -100,9 +101,9 @@ void check_field(std::shared_ptr bulkDataPtr, stk::mesh::Fi TEST(StkFieldCopier, MiddleMeshToStk) { - std::string meshFileName1 = "generated:3x3x1|sideset:Z|bbox:0,0,0,1,1,1"; + std::string meshFileName1 = "generated:3x3x2|sideset:Z|bbox:0,0,0,1,1,1"; std::string partName1 = "surface_1"; - stk_interface::StkMeshCreator creator1(meshFileName1, "NONE", MPI_COMM_WORLD); + stk_interface::StkMeshCreator creator1(meshFileName1, "RCB", MPI_COMM_WORLD); stk_interface::MeshPart meshPart = creator1.create_mesh_from_part(partName1); mesh::FieldPtr meshField = mesh::create_field(meshPart.mesh, mesh::FieldShape(2, 0, 0), 3); @@ -121,7 +122,7 @@ TEST(StkFieldCopier, MiddleMeshToStk) TEST(StkFieldCopier, StkToMiddleMesh) { - std::string meshFileName1 = "generated:3x3x1|sideset:Z|bbox:0,0,0,1,1,1"; + std::string meshFileName1 = "generated:3x3x2|sideset:Z|bbox:0,0,0,1,1,1"; std::string partName1 = "surface_1"; stk_interface::StkMeshCreator creator1(meshFileName1, "NONE", MPI_COMM_WORLD); stk_interface::MeshPart meshPart = creator1.create_mesh_from_part(partName1); @@ -139,4 +140,4 @@ TEST(StkFieldCopier, StkToMiddleMesh) copier.copy(meshField, *stkField); check_field(creator1.get_bulk_data_ptr(), *stkField); -} \ No newline at end of file +} diff --git a/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp b/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp index 0f975a2adcb6..811691769c87 100644 --- a/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp +++ b/packages/stk/stk_unit_tests/stk_ngp_test/utest_VirtualFunction.cpp @@ -70,6 +70,7 @@ struct SimpleStruct { }; struct BaseStruct { + virtual ~BaseStruct() = default; virtual void set_i(const int) = 0; KOKKOS_FUNCTION virtual void print() { diff --git a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp index 25ced2655de8..8ac8f984a259 100644 --- a/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp +++ b/packages/stk/stk_unit_tests/stk_search/UnitTestCoarseSearchTwoBox.cpp @@ -401,4 +401,44 @@ TEST(CoarseSearchCorrectness, Ngp_Local_NotQuiteEdgeOverlappingBoxes_ARBORX) device_local_runTwoBoxTest(stk::search::ARBORX, distanceBetweenBoxCenters, boxSize, expectedNumOverlap); } +TEST(CoarseSearchCorrectness, UpdateInteriorNodeBVsAtomicsIssueReproducer) +{ + std::vector> boxes(256); + + double coord_min = -2.1; + double coord_max = 2.1; + + int x_points = 5; + int y_points = 5; + int z_points = 9; + + for (int i=0; i < z_points; i++) { + double z_coord_min = coord_min + 1*i; + double z_coord_max = coord_max + 1*i; + + for (int j=0; j < y_points; j++) { + double y_coord_min = coord_min + 1*j; + double y_coord_max = coord_max + 1*j; + + for (int k=0; k < x_points; k++) { + double x_coord_min = coord_min + 1*k; + double x_coord_max = coord_max + 1*k; + + int index = k + x_points*j + x_points*y_points*i; + boxes[index] = stk::search::Box(x_coord_min, y_coord_min, z_coord_min, + x_coord_max, y_coord_max, z_coord_max); + } + } + } + + using ExecSpace = Kokkos::DefaultExecutionSpace; + stk::search::CollisionList collisions("collision_list"); + stk::search::morton_lbvh_search::value_type, ExecSpace, stk::search::Box>(boxes, boxes, collisions); + collisions.sync_from_device(); + + int numExpectedCollisions = 38125; + EXPECT_EQ(collisions.get_num_collisions(), numExpectedCollisions); + +} + } diff --git a/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp b/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp index 5266109ae525..04074a484175 100644 --- a/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp +++ b/packages/stk/stk_unit_tests/stk_topology/topology_test_utils.hpp @@ -130,7 +130,10 @@ inline void check_side_node_ordinals(stk::topology topology, const std::vector 0) ? sideTopo.num_nodes() : 1; std::vector side_node_ordinals(numSideNodes); topology.side_node_ordinals(side, side_node_ordinals.data()); - EXPECT_EQ(gold_side_node_ordinals[side], side_node_ordinals); + + for (unsigned i = 0; i < numSideNodes; ++i) { + EXPECT_EQ(gold_side_node_ordinals[side][i], side_node_ordinals[i]); + } } } diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp index 8ed599943345..10aa5aa01785 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_quad.cpp @@ -71,7 +71,7 @@ std::vector> get_gold_side_node_ordinals_shell_quad4() { }; } -TEST(stk_topology, shell_shell_quad4) +TEST(stk_topology, shell_quad_4) { stk::topology t = stk::topology::SHELL_QUAD_4; @@ -104,10 +104,10 @@ TEST(stk_topology, shell_shell_quad4) EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_4); EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_4); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(5), stk::topology::LINE_2); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_quad4()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_quad4()); @@ -159,10 +159,10 @@ void check_shell_quad_4_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_4); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_4); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_2); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(5), stk::topology::LINE_2); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -210,7 +210,7 @@ std::vector> get_gold_side_node_ordinals_shell_quad8() { }; } -TEST(stk_topology, shell_shell_quad8) +TEST(stk_topology, shell_quad_8) { stk::topology t = stk::topology::SHELL_QUAD_8; @@ -243,10 +243,10 @@ TEST(stk_topology, shell_shell_quad8) EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_8); EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_8); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_quad8()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_quad8()); @@ -298,10 +298,10 @@ void check_shell_quad_8_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_8); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_8); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -349,7 +349,7 @@ std::vector> get_gold_side_node_ordinals_shell_quad9() { }; } -TEST(stk_topology, shell_shell_quad9) +TEST(stk_topology, shell_quad_9) { stk::topology t = stk::topology::SHELL_QUAD_9; @@ -382,10 +382,10 @@ TEST(stk_topology, shell_shell_quad9) EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_9); EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_9); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_quad9()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_quad9()); @@ -437,10 +437,10 @@ void check_shell_quad_9_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::QUAD_9); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::QUAD_9); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(5), stk::topology::SHELL_SIDE_BEAM_3); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(5), stk::topology::LINE_3); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp index 5bf526b6ec65..ff1cad2ab650 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri.cpp @@ -76,6 +76,7 @@ TEST(stk_topology, shell_tri_3) EXPECT_TRUE(t.is_valid()); EXPECT_TRUE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); //FIXME this will become false EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -101,9 +102,9 @@ TEST(stk_topology, shell_tri_3) EXPECT_EQ(t.side_topology(0), stk::topology::TRI_3); EXPECT_EQ(t.side_topology(1), stk::topology::TRI_3); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_tri3()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_tri3()); @@ -155,9 +156,9 @@ void check_shell_tri_3_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::TRI_3); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::TRI_3); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -235,9 +236,9 @@ TEST(stk_topology, shell_tri_4) EXPECT_EQ(t.side_topology(0), stk::topology::TRI_4); EXPECT_EQ(t.side_topology(1), stk::topology::TRI_4); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_tri4()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_tri4()); @@ -289,9 +290,9 @@ void check_shell_tri_4_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::TRI_4); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::TRI_4); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_2); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_2); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_2); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_2); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); @@ -369,9 +370,9 @@ TEST(stk_topology, shell_tri_6) EXPECT_EQ(t.side_topology(0), stk::topology::TRI_6); EXPECT_EQ(t.side_topology(1), stk::topology::TRI_6); - EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); + EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); check_edge_node_ordinals(t, get_gold_edge_node_ordinals_shell_tri6()); check_edge_nodes(t, get_gold_edge_node_ordinals_shell_tri6()); @@ -427,9 +428,9 @@ void check_shell_tri_6_on_device() NGP_EXPECT_EQ(t.side_topology(0), stk::topology::TRI_6); NGP_EXPECT_EQ(t.side_topology(1), stk::topology::TRI_6); - NGP_EXPECT_EQ(t.side_topology(2), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(3), stk::topology::SHELL_SIDE_BEAM_3); - NGP_EXPECT_EQ(t.side_topology(4), stk::topology::SHELL_SIDE_BEAM_3); + NGP_EXPECT_EQ(t.side_topology(2), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(3), stk::topology::LINE_3); + NGP_EXPECT_EQ(t.side_topology(4), stk::topology::LINE_3); check_edge_node_ordinals_ngp(t, goldEdgeNodeOrdinals); check_edge_nodes_ngp(t, goldEdgeNodeOrdinals); diff --git a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp index eb357fe92c4a..dad3e3e13ae4 100644 --- a/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp +++ b/packages/stk/stk_unit_tests/stk_topology/utest_c/unit_test_shell_tri_all_face_sides.cpp @@ -35,7 +35,7 @@ #include "Kokkos_Core.hpp" // for parallel_for, KOKKOS_LAMBDA #include "gtest/gtest.h" // for AssertionResult, Message, TestPartResult #include "stk_ngp_test/ngp_test.hpp" // for NGP_EXPECT_EQ, NGP_EXPECT_FALSE, NGP_EXPECT_... -#include "stk_topology/topology.hpp" // for topology, topology::QUAD_4, topology::QUAD_8 +#include "stk_topology/topology.hpp" #include "topology_test_utils.hpp" // for check_edge_node_ordinals, check_edge_node_or... #include // for size_t #include // for operator<<, basic_ostream, basic_ostream<>::... @@ -78,6 +78,7 @@ TEST(stk_topology, shell_tri_3_all_face_sides) EXPECT_TRUE(t.is_valid()); EXPECT_FALSE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -135,6 +136,7 @@ void check_shell_tri_3_all_face_sides_on_device() NGP_EXPECT_TRUE(t.is_valid()); NGP_EXPECT_FALSE(t.has_homogeneous_faces()); NGP_EXPECT_TRUE(t.is_shell()); + NGP_EXPECT_TRUE(t.is_shell_with_face_sides()); NGP_EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); NGP_EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -221,6 +223,7 @@ TEST(stk_topology, shell_tri_4_all_face_sides) EXPECT_TRUE(t.is_valid()); EXPECT_FALSE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -278,6 +281,7 @@ void check_shell_tri_4_all_face_sides_on_device() NGP_EXPECT_TRUE(t.is_valid()); NGP_EXPECT_FALSE(t.has_homogeneous_faces()); NGP_EXPECT_TRUE(t.is_shell()); + NGP_EXPECT_TRUE(t.is_shell_with_face_sides()); NGP_EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); NGP_EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -364,6 +368,7 @@ TEST(stk_topology, shell_tri_6_all_face_sides) EXPECT_TRUE(t.is_valid()); EXPECT_FALSE(t.has_homogeneous_faces()); EXPECT_TRUE(t.is_shell()); + EXPECT_TRUE(t.is_shell_with_face_sides()); EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); @@ -421,6 +426,7 @@ void check_shell_tri_6_all_face_sides_on_device() NGP_EXPECT_TRUE(t.is_valid()); NGP_EXPECT_FALSE(t.has_homogeneous_faces()); NGP_EXPECT_TRUE(t.is_shell()); + NGP_EXPECT_TRUE(t.is_shell_with_face_sides()); NGP_EXPECT_EQ(t.rank(),stk::topology::ELEMENT_RANK); NGP_EXPECT_EQ(t.side_rank(),stk::topology::FACE_RANK); diff --git a/packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp new file mode 100644 index 000000000000..d28a0568e7d8 --- /dev/null +++ b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestParallelTimerImpl.cpp @@ -0,0 +1,255 @@ +#include "gtest/gtest.h" +#include "stk_util/diag/ParallelTimerImpl.hpp" +#include "stk_util/diag/Timer.hpp" +#include "stk_util/diag/TimerImpl.hpp" +#include "stk_util/diag/TimerMetricTraits.hpp" +#include "stk_util/parallel/Parallel.hpp" + +namespace { +stk::diag::impl::ParallelTimer create_timer(const std::string& name, double val) +{ + stk::diag::impl::ParallelTimer timer; + timer.m_name = name; + timer.m_cpuTime.m_value = val; + timer.m_cpuTime.m_sum = val; + timer.m_cpuTime.m_min = val; + timer.m_cpuTime.m_max = val; + + return timer; +} +} + +namespace stk::diag { + +class TimerTester +{ + public: + TimerTester(Timer& timer) : + m_timer(timer) + {} + + double getCPUTime() const + { + return m_timer.getMetric().m_accumulatedLap; + } + + void setCPUTime(double val) + { + m_timer.m_timerImpl->m_cpuTime.m_accumulatedLap = val; + } + + private: + Timer& m_timer; +}; +} + +TEST(ParallelTimer, MergeSingleLevelTimers) +{ + double val1 = 1.0, val2 = 2.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + + stk::diag::impl::merge_parallel_timer(t1, t2, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val2); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val2); +} + +TEST(ParallelTimer, MergeTwoLevelTimers) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer2", val4); + + t1.m_subtimerList.push_back(t2); + t3.m_subtimerList.push_back(t4); + + stk::diag::impl::merge_parallel_timer(t1, t3, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val3); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val3); + EXPECT_EQ(t1.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t2Merged = t1.m_subtimerList.front(); + EXPECT_EQ(t2Merged.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_sum, val2 + val4); + EXPECT_EQ(t2Merged.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_max, val4); +} + +TEST(ParallelTimer, MergeTwoLevelTimersDifferentNames) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer4", val4); + + t1.m_subtimerList.push_back(t2); + t3.m_subtimerList.push_back(t4); + + stk::diag::impl::merge_parallel_timer(t1, t3, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val3); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val3); + EXPECT_EQ(t1.m_subtimerList.size(), 2U); + + stk::diag::impl::ParallelTimer t2Copy = t1.m_subtimerList.front(); + EXPECT_EQ(t2Copy.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_sum, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_max, val2); + + stk::diag::impl::ParallelTimer t4Copy = *(++t1.m_subtimerList.begin()); + EXPECT_EQ(t4Copy.m_cpuTime.m_value, val4); + EXPECT_EQ(t4Copy.m_cpuTime.m_sum, val4); + EXPECT_EQ(t4Copy.m_cpuTime.m_min, val4); + EXPECT_EQ(t4Copy.m_cpuTime.m_max, val4); +} + +TEST(ParallelTimer, MergeThreeLevelTimers) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + double val5 = 5.0, val6 = 6.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer1", val4); + stk::diag::impl::ParallelTimer t5 = create_timer("timer2", val5); + stk::diag::impl::ParallelTimer t6 = create_timer("timer3", val6); + + t2.m_subtimerList.push_back(t3); + t1.m_subtimerList.push_back(t2); + + t5.m_subtimerList.push_back(t6); + t4.m_subtimerList.push_back(t5); + + stk::diag::impl::merge_parallel_timer(t1, t4, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val4); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val4); + EXPECT_EQ(t1.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t2Merged = t1.m_subtimerList.front(); + EXPECT_EQ(t2Merged.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_sum, val2 + val5); + EXPECT_EQ(t2Merged.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Merged.m_cpuTime.m_max, val5); + EXPECT_EQ(t2Merged.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t3Merged = t2Merged.m_subtimerList.front(); + EXPECT_EQ(t3Merged.m_cpuTime.m_value, val3); + EXPECT_EQ(t3Merged.m_cpuTime.m_sum, val3 + val6); + EXPECT_EQ(t3Merged.m_cpuTime.m_min, val3); + EXPECT_EQ(t3Merged.m_cpuTime.m_max, val6); + EXPECT_EQ(t3Merged.m_subtimerList.size(), 0U); +} + +TEST(ParallelTimer, MergeThreeLevelTimersDifferentNames) +{ + double val1 = 1.0, val2 = 2.0, val3 = 3.0, val4 = 4.0; + double val5 = 5.0, val6 = 6.0; + stk::diag::impl::ParallelTimer t1 = create_timer("timer1", val1); + stk::diag::impl::ParallelTimer t2 = create_timer("timer2", val2); + stk::diag::impl::ParallelTimer t3 = create_timer("timer3", val3); + stk::diag::impl::ParallelTimer t4 = create_timer("timer4", val4); + stk::diag::impl::ParallelTimer t5 = create_timer("timer5", val5); + stk::diag::impl::ParallelTimer t6 = create_timer("timer6", val6); + + t2.m_subtimerList.push_back(t3); + t1.m_subtimerList.push_back(t2); + + t5.m_subtimerList.push_back(t6); + t4.m_subtimerList.push_back(t5); + + stk::diag::impl::merge_parallel_timer(t1, t4, false); + + EXPECT_EQ(t1.m_cpuTime.m_value, val1); + EXPECT_EQ(t1.m_cpuTime.m_sum, val1 + val4); + EXPECT_EQ(t1.m_cpuTime.m_min, val1); + EXPECT_EQ(t1.m_cpuTime.m_max, val4); + EXPECT_EQ(t1.m_subtimerList.size(), 2U); + + stk::diag::impl::ParallelTimer t2Copy = t1.m_subtimerList.front(); + EXPECT_EQ(t2Copy.m_cpuTime.m_value, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_sum, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_min, val2); + EXPECT_EQ(t2Copy.m_cpuTime.m_max, val2); + EXPECT_EQ(t2Copy.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t5Copy = t1.m_subtimerList.back(); + EXPECT_EQ(t5Copy.m_cpuTime.m_value, val5); + EXPECT_EQ(t5Copy.m_cpuTime.m_sum, val5); + EXPECT_EQ(t5Copy.m_cpuTime.m_min, val5); + EXPECT_EQ(t5Copy.m_cpuTime.m_max, val5); + EXPECT_EQ(t5Copy.m_subtimerList.size(), 1U); + + stk::diag::impl::ParallelTimer t3Copy = t2Copy.m_subtimerList.front(); + EXPECT_EQ(t3Copy.m_cpuTime.m_value, val3); + EXPECT_EQ(t3Copy.m_cpuTime.m_sum, val3); + EXPECT_EQ(t3Copy.m_cpuTime.m_min, val3); + EXPECT_EQ(t3Copy.m_cpuTime.m_max, val3); + EXPECT_EQ(t3Copy.m_subtimerList.size(), 0U); + + stk::diag::impl::ParallelTimer t6Copy = t5Copy.m_subtimerList.front(); + EXPECT_EQ(t6Copy.m_cpuTime.m_value, val6); + EXPECT_EQ(t6Copy.m_cpuTime.m_sum, val6); + EXPECT_EQ(t6Copy.m_cpuTime.m_min, val6); + EXPECT_EQ(t6Copy.m_cpuTime.m_max, val6); + EXPECT_EQ(t6Copy.m_subtimerList.size(), 0U); +} + + +TEST(ParallelTimer, CollectTimersChunkSize1) +{ + stk::ParallelMachine comm = stk::parallel_machine_world(); + int commSize = stk::parallel_machine_size(comm); + int commRank = stk::parallel_machine_rank(comm); + double cpuTimeVal = commRank; + stk::diag::Timer rootTimer = stk::diag::createRootTimer("rootTimer", stk::diag::TimerSet(stk::diag::getEnabledTimerMetricsMask())); + stk::diag::TimerTester(rootTimer).setCPUTime(cpuTimeVal); + + const int maxProcsPerGather = 1; + stk::diag::impl::ParallelTimer parallelTimer = stk::diag::impl::collect_timers(rootTimer, false, comm, maxProcsPerGather); + + if (commRank == 0) + { + EXPECT_EQ(parallelTimer.m_cpuTime.m_min, 0.0); + EXPECT_EQ(parallelTimer.m_cpuTime.m_max, commSize - 1); + EXPECT_EQ(parallelTimer.m_cpuTime.m_sum, commSize * (commSize - 1) / 2.0); + } + + stk::diag::deleteRootTimer(rootTimer); +} + +TEST(ParallelTimer, CollectTimersChunkSize2) +{ + stk::ParallelMachine comm = stk::parallel_machine_world(); + int commSize = stk::parallel_machine_size(comm); + int commRank = stk::parallel_machine_rank(comm); + double cpuTimeVal = commRank + 1; + stk::diag::Timer rootTimer = stk::diag::createRootTimer("rootTimer", stk::diag::TimerSet(stk::diag::getEnabledTimerMetricsMask())); + stk::diag::TimerTester(rootTimer).setCPUTime(cpuTimeVal); + + const int maxProcsPerGather = 2; + stk::diag::impl::ParallelTimer parallelTimer = stk::diag::impl::collect_timers(rootTimer, false, comm, maxProcsPerGather); + + if (commRank == 0) + { + EXPECT_EQ(parallelTimer.m_cpuTime.m_min, 1.0); + EXPECT_EQ(parallelTimer.m_cpuTime.m_max, commSize); + EXPECT_EQ(parallelTimer.m_cpuTime.m_sum, commSize * (1 + commSize) / 2.0); + } + + stk::diag::deleteRootTimer(rootTimer); +} \ No newline at end of file diff --git a/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp b/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp index 64098c3ae44f..bdf5390c59e2 100644 --- a/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp +++ b/packages/stk/stk_unit_tests/stk_util/parallel/UnitTestParallelComm.cpp @@ -245,10 +245,10 @@ class DenseParallelCommTesterBase : public ParallelCommTester set_send_buffers_values(); } - void set_recv_buffer_sizes(std::vector< std::vector >& recvLists) + void set_recv_buffer_sizes(std::vector< std::vector >& rcvLists) { for (int src=0; src < commSize; ++src) { - recvLists[src].resize(this->get_size(src, myrank)); + rcvLists[src].resize(this->get_size(src, myrank)); } } @@ -256,10 +256,10 @@ class DenseParallelCommTesterBase : public ParallelCommTester virtual int get_num_recvs() override { return commSize; } - void test_results(std::vector< std::vector >& recvLists) + void test_results(std::vector< std::vector >& rcvLists) { for (int src=0; src < commSize; ++src) { - test_recv_vals(recvLists[src], src); + test_recv_vals(rcvLists[src], src); } } @@ -275,9 +275,9 @@ class DenseParallelCommTesterBase : public ParallelCommTester } } - void test_send_ranks(std::vector< std::vector >& sendLists) + void test_send_ranks(std::vector< std::vector >& sndLists) { - test_ranks_inner(sendLists); + test_ranks_inner(sndLists); } @@ -348,22 +348,22 @@ class NeighborParallelCommTesterBase : public ParallelCommTester set_send_buffers_values(); } - void set_recv_buffer_sizes(std::vector< std::vector >& recvLists) + void set_recv_buffer_sizes(std::vector< std::vector >& rcvLists) { int src1 = (myrank - 1 + commSize) % commSize; int src2 = (myrank - 2 + commSize) % commSize; - recvLists[src1].resize(this->get_size(src1, myrank)); - recvLists[src2].resize(this->get_size(src2, myrank)); + rcvLists[src1].resize(this->get_size(src1, myrank)); + rcvLists[src2].resize(this->get_size(src2, myrank)); } virtual int get_num_sends() override { return std::min(2, commSize); } virtual int get_num_recvs() override { return std::min(2, commSize); } - void test_results(std::vector< std::vector >& recvLists) + void test_results(std::vector< std::vector >& rcvLists) { for (int src=0; src < commSize; ++src) { - test_recv_vals(recvLists[src], src); + test_recv_vals(rcvLists[src], src); } } @@ -382,10 +382,10 @@ class NeighborParallelCommTesterBase : public ParallelCommTester } } - void test_send_ranks(std::vector>& sendLists) + void test_send_ranks(std::vector>& sndLists) { - std::vector sendRanks = get_ranks(sendLists); + std::vector sendRanks = get_ranks(sndLists); int len = sendRanks.size(); int dest1 = (myrank + 1) % commSize; @@ -404,9 +404,9 @@ class NeighborParallelCommTesterBase : public ParallelCommTester } } - void test_recv_ranks(std::vector>& recvLists) + void test_recv_ranks(std::vector>& rcvLists) { - auto recvRanks = get_ranks(recvLists); + auto recvRanks = get_ranks(rcvLists); int len = recvRanks.size(); int dest1 = (myrank - 1 + commSize) % commSize; diff --git a/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp b/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp index eb5d63d7f9f5..3fdf583b1926 100644 --- a/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp +++ b/packages/stk/stk_unit_tests/stk_util/util/UnitTestStridedArray.cpp @@ -32,8 +32,9 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -#include "gtest/gtest.h" #include "stk_util/util/StridedArray.hpp" +#include "Kokkos_Core.hpp" +#include "gtest/gtest.h" #include TEST( StridedArray, ptr_and_size) diff --git a/packages/stk/stk_util/stk_util/Version.hpp b/packages/stk/stk_util/stk_util/Version.hpp index 4f4efaf1655d..6d7b47a1ec5a 100644 --- a/packages/stk/stk_util/stk_util/Version.hpp +++ b/packages/stk/stk_util/stk_util/Version.hpp @@ -44,7 +44,7 @@ //See the file CHANGELOG.md for a listing that shows the //correspondence between version numbers and API changes. -#define STK_VERSION 5210601 +#define STK_VERSION 5230200 namespace stk diff --git a/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp b/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp index a3c4a8c17459..a5316a474ee6 100644 --- a/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp +++ b/packages/stk/stk_util/stk_util/command_line/CommandLineParser.hpp @@ -52,15 +52,14 @@ struct CommandLineOption class CommandLineParser { public: - enum ParseState { ParseComplete, ParseError, ParseHelpOnly, ParseVersionOnly }; - CommandLineParser() : CommandLineParser("Options") {} - explicit CommandLineParser(const std::string &usagePreamble) - : optionsSpec(usagePreamble), - parsedOptions(), - positionalIndex(0) - { - add_flag("help,h", "display this help message and exit"); - add_flag("version,v", "display version information and exit"); + virtual ~CommandLineParser() = default; + enum ParseState { ParseComplete, ParseError, ParseHelpOnly, ParseVersionOnly }; + CommandLineParser() : CommandLineParser("Options") {} + explicit CommandLineParser(const std::string &usagePreamble) + : optionsSpec(usagePreamble), parsedOptions(), positionalIndex(0) + { + add_flag("help,h", "display this help message and exit"); + add_flag("version,v", "display version information and exit"); } void disallow_unrecognized() diff --git a/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp new file mode 100644 index 000000000000..e8dca41f2527 --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.cpp @@ -0,0 +1,254 @@ +#include "ParallelTimerImpl.hpp" +#include "stk_util/util/Marshal.hpp" + +namespace stk::diag::impl { + +ParallelTimer::ParallelTimer() + : m_name(), + m_timerMask(0), + m_subtimerLapCount(0), + m_lapCount(), + m_cpuTime(), + m_wallTime(), + m_MPICount(), + m_MPIByteCount(), + m_heapAlloc(), + m_subtimerList() +{} + +ParallelTimer::ParallelTimer(const ParallelTimer ¶llel_timer) + : m_name(parallel_timer.m_name), + m_timerMask(parallel_timer.m_timerMask), + m_subtimerLapCount(parallel_timer.m_subtimerLapCount), + m_lapCount(parallel_timer.m_lapCount), + m_cpuTime(parallel_timer.m_cpuTime), + m_wallTime(parallel_timer.m_wallTime), + m_MPICount(parallel_timer.m_MPICount), + m_MPIByteCount(parallel_timer.m_MPIByteCount), + m_heapAlloc(parallel_timer.m_heapAlloc), + m_subtimerList(parallel_timer.m_subtimerList) +{} + +ParallelTimer &ParallelTimer::operator=(const ParallelTimer ¶llel_timer) { + m_name = parallel_timer.m_name; + m_timerMask = parallel_timer.m_timerMask; + m_subtimerLapCount = parallel_timer.m_subtimerLapCount; + m_lapCount = parallel_timer.m_lapCount; + m_cpuTime = parallel_timer.m_cpuTime; + m_wallTime = parallel_timer.m_wallTime; + m_MPICount = parallel_timer.m_MPICount; + m_heapAlloc = parallel_timer.m_heapAlloc; + m_subtimerList = parallel_timer.m_subtimerList; + + return *this; +} + + +Writer & +ParallelTimer::dump(Writer &dout) const { + if (dout.shouldPrint()) { + dout << "ParallelTimer " << m_name << push << dendl; + dout << "m_name " << m_name << dendl; + dout << "m_timerMask " << hex << m_timerMask << dendl; + dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl; + dout << "m_lapCount " << m_lapCount << dendl; + dout << "m_cpuTime " << m_cpuTime << dendl; + dout << "m_wallTime " << m_wallTime << dendl; + dout << "m_MPICount " << m_MPICount << dendl; + dout << "m_MPIByteCount " << m_MPIByteCount << dendl; + dout << "m_heapAlloc " << m_heapAlloc << dendl; + dout << "m_subtimerList " << m_subtimerList << dendl; + dout << pop; + } + return dout; +} + +void +merge_parallel_timer( + ParallelTimer & p0, + const ParallelTimer & p1, + bool checkpoint) +{ + p0.m_timerMask = p1.m_timerMask; + p0.m_subtimerLapCount += p1.m_subtimerLapCount; + p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint); + p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint); + p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint); + p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint); + p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint); + p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint); + + + for (std::list::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) { + std::list::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name)); + if (p0_it == p0.m_subtimerList.end()) { + p0.m_subtimerList.push_back((*p1_it)); + } + else + merge_parallel_timer(*p0_it, *p1_it, checkpoint); + } +} + +stk::Marshal &operator>>(stk::Marshal &min, ParallelTimer &t) { + min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount + >> t.m_lapCount.m_value + >> t.m_lapCount.m_checkpoint + >> t.m_cpuTime.m_value + >> t.m_cpuTime.m_checkpoint + >> t.m_wallTime.m_value + >> t.m_wallTime.m_checkpoint + >> t.m_MPICount.m_value + >> t.m_MPICount.m_checkpoint + >> t.m_MPIByteCount.m_value + >> t.m_MPIByteCount.m_checkpoint + >> t.m_heapAlloc.m_value + >> t.m_heapAlloc.m_checkpoint; + + min >> t.m_subtimerList; + + return min; +} + + +#ifdef STK_HAS_MPI +size_t round_up_to_next_word(size_t value) +{ + const size_t SIZE_OF_WORD = 4; + size_t remainder = value % SIZE_OF_WORD; + if (remainder == 0) { + return value; + } + return value + SIZE_OF_WORD - remainder; +} +#endif + +ParallelTimer +collect_timers( + const Timer & root_timer, + bool checkpoint, + ParallelMachine comm, + const int max_procs_per_gather) +{ + Marshal mout; + mout << root_timer; + impl::ParallelTimer root_parallel_timer; + +#ifdef STK_HAS_MPI + const int parallel_root = 0 ; + const int parallel_size = parallel_machine_size(comm); + const int parallel_rank = parallel_machine_rank(comm); + + // Gather the send counts on root processor + std::string send_string(mout.str()); + int send_count = send_string.size(); + send_string.resize(round_up_to_next_word(send_count)); + int padded_send_count = send_string.size(); + + + //We need to gather the timer data in a number of 'cycles' where we + //only receive from a portion of the other processors each cycle. + //This is because buffer allocation-failures have been observed for + //runs on very large numbers of processors if the 'root' processor tries + //to allocate a buffer large enough to hold timing data from all other + //procesors. + //We will set an arbitrary limit for now, making sure that no more than + //a given number of processors' worth of timer data is gathered at a time. + int num_cycles = parallel_size/max_procs_per_gather; + if (parallel_size < max_procs_per_gather || num_cycles < 1) { + num_cycles = 1; + } + + std::vector recv_buffer; + + for(int ii=0; ii recv_count(parallel_size, 0); + std::vector padded_recv_count(parallel_size, 0); + + { + int result = MPI_Gather(&send_count_this_cycle, 1, MPI_INT, + recv_count.data(), 1, MPI_INT, + parallel_root, comm); + if (MPI_SUCCESS != result) { + std::ostringstream message ; + message << "stk::diag::collect_timers FAILED: send_count MPI_Gather = " << result ; + throw std::runtime_error(message.str()); + } + } + + { + int result = MPI_Gather(&padded_send_count_this_cycle, 1, MPI_INT, + padded_recv_count.data(), 1, MPI_INT, + parallel_root, comm); + if (MPI_SUCCESS != result) { + std::ostringstream message ; + message << "stk::diag::collect_timers FAILED: padded_send_count MPI_Gather = " << result ; + throw std::runtime_error(message.str()); + } + } + + // Receive counts are only non-zero on the root processor: + std::vector recv_displ(parallel_size + 1, 0); + std::vector recv_end(parallel_size + 1, 0); + + for (int i = 0 ; i < parallel_size ; ++i) { + recv_displ[i + 1] = recv_displ[i] + padded_recv_count[i] ; + recv_end[i] = recv_displ[i] + recv_count[i] ; + } + + const int recv_size = recv_displ[parallel_size] ; + + recv_buffer.assign(recv_size, 0); + + { + int result = MPI_Gatherv(send_string.data(), padded_send_count_this_cycle, MPI_CHAR, + recv_buffer.data(), padded_recv_count.data(), recv_displ.data(), MPI_CHAR, + parallel_root, comm); + if (MPI_SUCCESS != result) { + std::ostringstream message ; + message << "stk::diag::collect_timers FAILED: MPI_Gatherv = " << result ; + throw std::runtime_error(message.str()); + } + + std::vector parallel_timer_vector; + parallel_timer_vector.reserve(parallel_size); + + if (parallel_rank == parallel_root) { + for (int j = 0; j < parallel_size; ++j) { + int received_count = recv_displ[j+1] - recv_displ[j]; + if (received_count > 0) { + //grow parallel_timer_vector by 1: + parallel_timer_vector.resize(parallel_timer_vector.size()+1); + Marshal min(std::string(recv_buffer.data() + recv_displ[j], recv_buffer.data() + recv_end[j])); + //put this data into the last entry of parallel_timer_vector: + min >> parallel_timer_vector[parallel_timer_vector.size()-1]; + } + } + + if (parallel_rank==parallel_root && send_count_this_cycle>0) + { + root_parallel_timer = parallel_timer_vector[0]; + } + + for (size_t j = 0; j < parallel_timer_vector.size(); ++j) + { + merge_parallel_timer(root_parallel_timer, parallel_timer_vector[j], checkpoint); + } + } + } + } +#else + Marshal min(mout.str()); + min >> root_parallel_timer; + merge_parallel_timer(root_parallel_timer, root_parallel_timer, checkpoint); +#endif + + return root_parallel_timer; +} + +} \ No newline at end of file diff --git a/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp new file mode 100644 index 000000000000..c18de9b4774b --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/ParallelTimerImpl.hpp @@ -0,0 +1,210 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_UTIL_DIAG_ParallelTimerImpl_hpp +#define STK_UTIL_DIAG_ParallelTimerImpl_hpp + +#include "stk_util/diag/Timer.hpp" +#include "stk_util/util/Writer.hpp" +#include "WriterExt.hpp" +#include "stk_util/util/string_case_compare.hpp" // for equal_case +#include "TimerMetricTraits.hpp" +#include +#include +#include + +namespace stk { struct Marshal; } + +namespace stk::diag { +namespace impl { + +struct ParallelTimer +{ + template + struct Metric + { + Metric() + : m_value(0), + m_sum(0.0), + m_min(std::numeric_limits::max()), + m_max(0.0) + {} + + typename MetricTraits::Type m_value; ///< Metric value + typename MetricTraits::Type m_checkpoint; ///< Metric checkpointed value + double m_sum; ///< Reduction sum + double m_min; ///< Reduction min + double m_max; ///< Reduction max + + void accumulate(const Metric &metric, bool checkpoint) { + double value = static_cast(metric.m_value); + if (checkpoint) + value -= static_cast(metric.m_checkpoint); + + m_sum += value; + m_min = std::min(m_min, value); + m_max = std::max(m_max, value); + } + + Writer &dump(Writer &dout) const { + if (dout.shouldPrint()) { + dout << "Metric<" << typeid(typename MetricTraits::Type) << ">" << push << dendl; + dout << "m_value " << m_value << dendl; + dout << "m_checkpoint " << m_value << dendl; + dout << "m_sum " << m_sum << dendl; + dout << "m_min " << m_min << dendl; + dout << "m_max " << m_max << dendl; + dout << pop; + } + return dout; + } + }; + + ParallelTimer(); + + ParallelTimer(const ParallelTimer ¶llel_timer); + + ParallelTimer &operator=(const ParallelTimer ¶llel_timer); + + template + const Metric &getMetric() const; + + std::string m_name; ///< Name of the timer + TimerMask m_timerMask; + double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount + + Metric m_lapCount; ///< Number of laps accumulated + Metric m_cpuTime; ///< CPU time + Metric m_wallTime; ///< Wall time + Metric m_MPICount; ///< MPI call count + Metric m_MPIByteCount; ///< MPI byte count + Metric m_heapAlloc; ///< MPI byte count + + std::list m_subtimerList; ///< Sub timers + + Writer &dump(Writer &dout) const; +}; + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_lapCount; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_cpuTime; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_wallTime; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_MPICount; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_MPIByteCount; +} + + +template<> +inline const ParallelTimer::Metric & +ParallelTimer::getMetric() const { + return m_heapAlloc; +} + + +template +Writer &operator<<(Writer &dout, const ParallelTimer::Metric &t) { + return t.dump(dout); +} + +inline Writer &operator<<(Writer &dout, const ParallelTimer ¶llel_timer) { + return parallel_timer.dump(dout); +} + +stk::Marshal &operator>>(stk::Marshal &min, ParallelTimer &t); + +#ifdef __INTEL_COMPILER +#pragma warning(push) +#pragma warning(disable: 444) +#endif +class finder +{ +public: + finder(const std::string &name) + : m_name(name) + {} + + bool operator()(const ParallelTimer ¶llel_timer) const { + return equal_case(parallel_timer.m_name, m_name); + } + +private: + std::string m_name; +}; +#ifdef __INTEL_COMPILER +#pragma warning(pop) +#endif + +void +merge_parallel_timer( + ParallelTimer & p0, + const ParallelTimer & p1, + bool checkpoint); + +ParallelTimer +collect_timers( + const Timer & root_timer, + bool checkpoint, + ParallelMachine comm, + const int max_procs_per_gather = 64); + +} +} + +#endif diff --git a/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp b/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp index 209a947bf996..48b2ec3a988a 100644 --- a/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp +++ b/packages/stk/stk_util/stk_util/diag/PrintTimer.cpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,19 +30,19 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #include "stk_util/diag/PrintTimer.hpp" #include "stk_util/diag/PrintTable.hpp" // for operator<<, PrintTable, end_col, PrintT... #include "stk_util/diag/Timer.hpp" // for getEnabledTimerMetricsMask, Timer, Time... #include "stk_util/diag/WriterExt.hpp" // for operator<< +#include "stk_util/diag/ParallelTimerImpl.hpp" #include "stk_util/environment/WallTime.hpp" // for wall_time #include "stk_util/parallel/Parallel.hpp" // for parallel_machine_rank, MPI_Gather, para... #include "stk_util/stk_config.h" // for STK_HAS_MPI #include "stk_util/util/Marshal.hpp" // for operator>>, Marshal, operator<< #include "stk_util/util/Writer.hpp" // for operator<<, Writer, dendl, pop, push #include "stk_util/util/WriterManip.hpp" // for hex -#include "stk_util/util/string_case_compare.hpp" // for equal_case #include // for size_t #include // for find_if, max, min #include // for unary_function @@ -54,17 +54,6 @@ #include // for string, char_traits, operator<< #include // for vector -namespace stk { namespace diag { namespace { struct ParallelTimer; } } } - -namespace stk { - -template -Marshal &operator<<(Marshal &mout, const diag::Timer::Metric &t); - -Marshal &operator<<(Marshal &mout, const diag::Timer &t); - -Marshal &operator>>(Marshal &min, diag::ParallelTimer &t); -} namespace stk { namespace diag { @@ -120,7 +109,7 @@ Percent::operator()( strout << "(" << std::setw(5) << std::setprecision(1) << std::fixed << ratio << "%)"; else if (ratio >= 10.0) strout << "(" << std::setw(5) << std::setprecision(2) << std::fixed << ratio << "%)"; - else + else strout << "(" << std::setw(5) << std::setprecision(3) << std::fixed << ratio << "%)"; } @@ -131,370 +120,6 @@ inline std::ostream &operator<<(std::ostream &os, const Percent &p) { return p(os); } -struct ParallelTimer -{ - template - struct Metric - { - Metric() - : m_value(0), - m_sum(0.0), - m_min(std::numeric_limits::max()), - m_max(0.0) - {} - - typename MetricTraits::Type m_value; ///< Metric value - typename MetricTraits::Type m_checkpoint; ///< Metric checkpointed value - double m_sum; ///< Reduction sum - double m_min; ///< Reduction min - double m_max; ///< Reduction max - - void accumulate(const Metric &metric, bool checkpoint) { - double value = static_cast(metric.m_value); - if (checkpoint) - value -= static_cast(metric.m_checkpoint); - - m_sum += value; - m_min = std::min(m_min, value); - m_max = std::max(m_max, value); - } - - Writer &dump(Writer &dout) const { - if (dout.shouldPrint()) { - dout << "Metric<" << typeid(typename MetricTraits::Type) << ">" << push << dendl; - dout << "m_value " << m_value << dendl; - dout << "m_checkpoint " << m_value << dendl; - dout << "m_sum " << m_sum << dendl; - dout << "m_min " << m_min << dendl; - dout << "m_max " << m_max << dendl; - dout << pop; - } - return dout; - } - }; - - ParallelTimer() - : m_name(), - m_timerMask(0), - m_subtimerLapCount(0), - m_lapCount(), - m_cpuTime(), - m_wallTime(), - m_MPICount(), - m_MPIByteCount(), - m_heapAlloc(), - m_subtimerList() - {} - - ParallelTimer(const ParallelTimer ¶llel_timer) - : m_name(parallel_timer.m_name), - m_timerMask(parallel_timer.m_timerMask), - m_subtimerLapCount(parallel_timer.m_subtimerLapCount), - m_lapCount(parallel_timer.m_lapCount), - m_cpuTime(parallel_timer.m_cpuTime), - m_wallTime(parallel_timer.m_wallTime), - m_MPICount(parallel_timer.m_MPICount), - m_MPIByteCount(parallel_timer.m_MPIByteCount), - m_heapAlloc(parallel_timer.m_heapAlloc), - m_subtimerList(parallel_timer.m_subtimerList) - {} - - ParallelTimer &operator=(const ParallelTimer ¶llel_timer) { - m_name = parallel_timer.m_name; - m_timerMask = parallel_timer.m_timerMask; - m_subtimerLapCount = parallel_timer.m_subtimerLapCount; - m_lapCount = parallel_timer.m_lapCount; - m_cpuTime = parallel_timer.m_cpuTime; - m_wallTime = parallel_timer.m_wallTime; - m_MPICount = parallel_timer.m_MPICount; - m_heapAlloc = parallel_timer.m_heapAlloc; - m_subtimerList = parallel_timer.m_subtimerList; - - return *this; - } - - template - const Metric &getMetric() const; - - std::string m_name; ///< Name of the timer - TimerMask m_timerMask; - double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount - - Metric m_lapCount; ///< Number of laps accumulated - Metric m_cpuTime; ///< CPU time - Metric m_wallTime; ///< Wall time - Metric m_MPICount; ///< MPI call count - Metric m_MPIByteCount; ///< MPI byte count - Metric m_heapAlloc; ///< MPI byte count - - std::list m_subtimerList; ///< Sub timers - - Writer &dump(Writer &dout) const; -}; - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_lapCount; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_cpuTime; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_wallTime; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_MPICount; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_MPIByteCount; -} - - -template<> -const ParallelTimer::Metric & -ParallelTimer::getMetric() const { - return m_heapAlloc; -} - - -template -Writer &operator<<(Writer &dout, const ParallelTimer::Metric &t) { - return t.dump(dout); -} - -Writer &operator<<(Writer &dout, const ParallelTimer ¶llel_timer) { - return parallel_timer.dump(dout); -} - -Writer & -ParallelTimer::dump(Writer &dout) const { - if (dout.shouldPrint()) { - dout << "ParallelTimer " << m_name << push << dendl; - dout << "m_name " << m_name << dendl; - dout << "m_timerMask " << hex << m_timerMask << dendl; - dout << "m_subtimerLapCount " << m_subtimerLapCount << dendl; - dout << "m_lapCount " << m_lapCount << dendl; - dout << "m_cpuTime " << m_cpuTime << dendl; - dout << "m_wallTime " << m_wallTime << dendl; - dout << "m_MPICount " << m_MPICount << dendl; - dout << "m_MPIByteCount " << m_MPIByteCount << dendl; - dout << "m_heapAlloc " << m_heapAlloc << dendl; - dout << "m_subtimerList " << m_subtimerList << dendl; - dout << pop; - } - return dout; -} - -#ifdef __INTEL_COMPILER -#pragma warning(push) -#pragma warning(disable: 444) -#endif -class finder -{ -public: - finder(const std::string &name) - : m_name(name) - {} - - bool operator()(const ParallelTimer ¶llel_timer) const { - return equal_case(parallel_timer.m_name, m_name); - } - -private: - std::string m_name; -}; -#ifdef __INTEL_COMPILER -#pragma warning(pop) -#endif - - -void -merge_parallel_timer( - ParallelTimer & p0, - const ParallelTimer & p1, - bool checkpoint) -{ - p0.m_timerMask = p1.m_timerMask; - p0.m_subtimerLapCount += p1.m_subtimerLapCount; - p0.m_lapCount.accumulate(p1.m_lapCount, checkpoint); - p0.m_cpuTime.accumulate(p1.m_cpuTime, checkpoint); - p0.m_wallTime.accumulate(p1.m_wallTime, checkpoint); - p0.m_MPICount.accumulate(p1.m_MPICount, checkpoint); - p0.m_MPIByteCount.accumulate(p1.m_MPIByteCount, checkpoint); - p0.m_heapAlloc.accumulate(p1.m_heapAlloc, checkpoint); - - - for (std::list::const_iterator p1_it = p1.m_subtimerList.begin(); p1_it != p1.m_subtimerList.end(); ++p1_it) { - std::list::iterator p0_it = std::find_if(p0.m_subtimerList.begin(), p0.m_subtimerList.end(), finder((*p1_it).m_name)); - if (p0_it == p0.m_subtimerList.end()) { - p0.m_subtimerList.push_back((*p1_it)); - p0_it = --p0.m_subtimerList.end(); - merge_parallel_timer(*p0_it, *p1_it, checkpoint); - } - else - merge_parallel_timer(*p0_it, *p1_it, checkpoint); - } -} - -#ifdef STK_HAS_MPI -size_t round_up_to_next_word(size_t value) -{ - const size_t SIZE_OF_WORD = 4; - size_t remainder = value % SIZE_OF_WORD; - if (remainder == 0) { - return value; - } - return value + SIZE_OF_WORD - remainder; -} -#endif - -void -collect_timers( - Timer & root_timer, - ParallelTimer & parallel_timer, - bool checkpoint, - ParallelMachine comm) -{ - Marshal mout; - mout << root_timer; - -#ifdef STK_HAS_MPI - const int parallel_root = 0 ; - const int parallel_size = parallel_machine_size(comm); - const int parallel_rank = parallel_machine_rank(comm); - - // Gather the send counts on root processor - std::string send_string(mout.str()); - - ParallelTimer root_parallel_timer; - - //We need to gather the timer data in a number of 'cycles' where we - //only receive from a portion of the other processors each cycle. - //This is because buffer allocation-failures have been observed for - //runs on very large numbers of processors if the 'root' processor tries - //to allocate a buffer large enough to hold timing data from all other - //procesors. - //We will set an arbitrary limit for now, making sure that no more than - //64 processors' worth of timer data is gathered at a time. - const int max_procs_per_gather = 64; - int num_cycles = parallel_size/max_procs_per_gather; - if (parallel_size < max_procs_per_gather || num_cycles < 1) { - num_cycles = 1; - } - - std::vector buffer; - - for(int ii=0; ii recv_count(parallel_size, 0); - int * const recv_count_ptr = recv_count.data() ; - std::vector padded_recv_count(parallel_size, 0); - int * const padded_recv_count_ptr = padded_recv_count.data() ; - - //should this processor send on the current cycle ? If not, set send_count to 0. - if ((parallel_rank+ii)%num_cycles!=0) { - send_count = 0; - } - - { - int result = MPI_Gather(&send_count, 1, MPI_INT, - recv_count_ptr, 1, MPI_INT, - parallel_root, comm); - if (MPI_SUCCESS != result) { - std::ostringstream message ; - message << "stk::diag::collect_timers FAILED: send_count MPI_Gather = " << result ; - throw std::runtime_error(message.str()); - } - } - - { - int result = MPI_Gather(&padded_send_count, 1, MPI_INT, - padded_recv_count_ptr, 1, MPI_INT, - parallel_root, comm); - if (MPI_SUCCESS != result) { - std::ostringstream message ; - message << "stk::diag::collect_timers FAILED: padded_send_count MPI_Gather = " << result ; - throw std::runtime_error(message.str()); - } - } - - // Receive counts are only non-zero on the root processor: - std::vector recv_displ(parallel_size + 1, 0); - std::vector recv_end(parallel_size + 1, 0); - - for (int i = 0 ; i < parallel_size ; ++i) { - recv_displ[i + 1] = recv_displ[i] + padded_recv_count[i] ; - recv_end[i] = recv_displ[i] + recv_count[i] ; - } - - const int recv_size = recv_displ[parallel_size] ; - - buffer.assign(recv_size, 0); - - { - const char * const send_ptr = send_string.data(); - char * const recv_ptr = recv_size ? buffer.data() : nullptr; - int * const recv_displ_ptr = recv_displ.data() ; - - int result = MPI_Gatherv(const_cast(send_ptr), padded_send_count, MPI_CHAR, - recv_ptr, padded_recv_count_ptr, recv_displ_ptr, MPI_CHAR, - parallel_root, comm); -// int result = MPI_Gather(const_cast(send_ptr), padded_send_count, MPI_CHAR, -// recv_ptr, padded_send_count, MPI_CHAR, -// parallel_root, comm); - if (MPI_SUCCESS != result) { - std::ostringstream message ; - message << "stk::diag::collect_timers FAILED: MPI_Gatherv = " << result ; - throw std::runtime_error(message.str()); - } - - std::vector parallel_timer_vector; - parallel_timer_vector.reserve(parallel_size); - - if (parallel_rank == parallel_root) { - for (int j = 0; j < parallel_size; ++j) { - int received_count = recv_displ[j+1] - recv_displ[j]; - if (received_count > 0) { - //grow parallel_timer_vector by 1: - parallel_timer_vector.resize(parallel_timer_vector.size()+1); - Marshal min(std::string(recv_ptr + recv_displ[j], recv_ptr + recv_end[j])); - //put this data into the last entry of parallel_timer_vector: - min >> parallel_timer_vector[parallel_timer_vector.size()-1]; - } - } - - if (parallel_rank==parallel_root && send_count>0) root_parallel_timer = parallel_timer_vector[0]; - - for (size_t j = 0; j < parallel_timer_vector.size(); ++j) - merge_parallel_timer(root_parallel_timer, parallel_timer_vector[j], checkpoint); - } - } - } - parallel_timer = root_parallel_timer; -#endif -} - // PrintTable &printTable(PrintTable &table, MPI_Comm mpi_comm, MetricsMask metrics_mask) const; PrintTable & @@ -546,8 +171,8 @@ printSubtable( PrintTable & printSubtable( PrintTable & table, - const ParallelTimer & root_timer, - const ParallelTimer & timer, + const impl::ParallelTimer & root_timer, + const impl::ParallelTimer & timer, MetricsMask metrics_mask, int depth, bool timer_checkpoint) @@ -593,14 +218,14 @@ printSubtable( << justify(PrintTable::Cell::RIGHT) << std::setw(12) << MetricTraits::format(timer.getMetric().m_max) << " " << std::setw(8) << Percent(timer.getMetric().m_max, root_timer.getMetric().m_sum) << end_col; } - else + else table << justify(PrintTable::Cell::LEFT) << indent(depth) << span << timer.m_name << end_col; table << end_row; depth++; } - for (std::list::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it) + for (std::list::const_iterator it = timer.m_subtimerList.begin(); it != timer.m_subtimerList.end(); ++it) printSubtable(table, root_timer, *it, metrics_mask, depth, timer_checkpoint); return table; @@ -661,9 +286,7 @@ printTable( root_timer.accumulateSubtimerLapCounts(); - ParallelTimer parallel_timer; - - stk::diag::collect_timers(root_timer, parallel_timer, timer_checkpoint, parallel_machine); + impl::ParallelTimer parallel_timer = stk::diag::impl::collect_timers(root_timer, timer_checkpoint, parallel_machine); int parallel_rank = parallel_machine_rank(parallel_machine); if (parallel_rank == 0) { @@ -671,7 +294,7 @@ printTable( table.setAutoEndCol(false); table << end_col << end_col; - + if (metrics_mask & getEnabledTimerMetricsMask() & MetricTraits::METRIC) table << justify(PrintTable::Cell::CENTER) << MetricTraits::table_header() << end_col << justify(PrintTable::Cell::CENTER) << MetricTraits::table_header() << end_col @@ -722,7 +345,7 @@ printTable( printSubtable(table, parallel_timer, parallel_timer, metrics_mask, 0, timer_checkpoint); } - + if (timer_checkpoint) root_timer.checkpoint(); } @@ -756,15 +379,15 @@ std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask m { double startTimeToPrintTable = stk::wall_time(); stk::PrintTable print_table; - + int parallel_size = parallel_machine_size(parallel_machine); if (parallel_size == 1) printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint); else printTable(print_table, root_timer, metrics_mask, 40, timer_checkpoint, parallel_machine); - + os << print_table; - + double durationToPrintTable = stk::wall_time() - startTimeToPrintTable; if (parallel_machine_rank(parallel_machine) == 0) printTimeToPrintTable(os, durationToPrintTable); @@ -773,43 +396,5 @@ std::ostream &printTimersTable(std::ostream& os, Timer root_timer, MetricsMask m } // namespace diag -Marshal &operator<<(stk::Marshal &mout, const diag::Timer &t); - -template -Marshal &operator<<(Marshal &mout, const diag::Timer::Metric &t) { - mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true); - - return mout; -} - -Marshal &operator<<(Marshal &mout, const diag::Timer &t) { - mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount() - << t.getMetric() << t.getMetric() << t.getMetric() - << t.getMetric() << t.getMetric() << t.getMetric(); - - mout << t.getTimerList(); - - return mout; -} - -Marshal &operator>>(Marshal &min, diag::ParallelTimer &t) { - min >> t.m_name >> t.m_timerMask >> t.m_subtimerLapCount - >> t.m_lapCount.m_value - >> t.m_lapCount.m_checkpoint - >> t.m_cpuTime.m_value - >> t.m_cpuTime.m_checkpoint - >> t.m_wallTime.m_value - >> t.m_wallTime.m_checkpoint - >> t.m_MPICount.m_value - >> t.m_MPICount.m_checkpoint - >> t.m_MPIByteCount.m_value - >> t.m_MPIByteCount.m_checkpoint - >> t.m_heapAlloc.m_value - >> t.m_heapAlloc.m_checkpoint; - - min >> t.m_subtimerList; - - return min; -} } // namespace stk diff --git a/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp b/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp index fe381e7b2900..8743068c96c2 100644 --- a/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp +++ b/packages/stk/stk_util/stk_util/diag/PrintTimer.hpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #ifndef STK_UTIL_DIAG_PrintTimer_hpp #define STK_UTIL_DIAG_PrintTimer_hpp diff --git a/packages/stk/stk_util/stk_util/diag/Timer.cpp b/packages/stk/stk_util/stk_util/diag/Timer.cpp index ef85027841ba..2f43b4e378b7 100644 --- a/packages/stk/stk_util/stk_util/diag/Timer.cpp +++ b/packages/stk/stk_util/stk_util/diag/Timer.cpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,13 +30,13 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #include "stk_util/diag/Timer.hpp" +#include "stk_util/diag/TimerImpl.hpp" #include "stk_util/diag/WriterExt.hpp" // for operator<< #include "stk_util/stk_config.h" // for STK_HAS_MPI #include "stk_util/util/Writer.hpp" // for operator<<, Writer, dendl, pop, push -#include "stk_util/util/string_case_compare.hpp" // for equal_case #include // for find_if #include // for exception #include // for unary_function @@ -47,22 +47,8 @@ namespace stk { namespace diag { -namespace { - MetricsMask s_enabledMetricsMask = METRICS_LAP_COUNT | METRICS_CPU_TIME | METRICS_WALL_TIME; ///< Bit mask of enabled metrics -template -typename MetricTraits::Type -value_now() { - if (MetricTraits::METRIC & getEnabledTimerMetricsMask()) - return MetricTraits::value_now(); - else - return 0; -} - -} // namespace - - MetricsMask getEnabledTimerMetricsMask() { return s_enabledMetricsMask; @@ -75,284 +61,6 @@ setEnabledTimerMetricsMask( s_enabledMetricsMask = timer_mask | METRICS_LAP_COUNT; } - -/** - * Class TimerImpl is the core timer class. The Timer class is a - * wrapper around TimerImpl so that the buried references can be constructed more easily. - * - * Each timer has a lap counter, cpu timer, wall timer and other metrics. Each time a timer is - * started, the cpu start time, wall start time and other metrics, set to the process' current - * values. When the timer is stopped, the lap counter is incremented, and the cpu, wall, and other - * values are accumulated with the difference between now and the start time. - * - * Each timer may have a list of subordinate timers. The relationship is purely - * hierarchical in that a there is no timing relationship assumed between the timers other - * than the grouping. There is no relation between the starting and stopping of parent - * and subordinate timers. - * - * The subordinate timers are stored as pointers to a new timer on the heap, since the - * calling function will be receiving a reference to this memory which can never change - * location. The subordinate timers are not sorted in the list as they should very - * rarely be created or looked up by name, rather the calling function stores the - * reference via the Timer class. - * - */ -class TimerImpl -{ - friend class Timer; - -public: - static void updateRootTimer(TimerImpl *root_timer); - - static Timer createRootTimer(const std::string &name, const TimerSet &timer_set); - - static void deleteRootTimer(TimerImpl *root_timer); - - static void findTimer(TimerImpl *timer, std::vector &path_tail_vector, std::vector &found_timers); - -private: - /** - * Static function reg returns a reference to an existing timer or newly - * created timer of the specified name which is subordinate to the - * parent timer. - * - * @return a TimerImpl reference to the timer with the - * specified name that is subordinate to the - * parent timer. - */ - static TimerImpl *reg(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set) { - return parent_timer->addSubtimer(name, timer_mask, timer_set); - } - - /** - * Creates a new Timer instance. - * - * @param name a std::string const reference to the name of - * the timer. - * - */ - TimerImpl(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set); - - /** - * Destroys a TimerImpl instance. - * - */ - ~TimerImpl(); - - TimerImpl(const TimerImpl &TimerImpl); - TimerImpl &operator=(const TimerImpl &TimerImpl); - - /** - * Class finder is a binary predicate for finding a subordinate timer. - * - * Note that the subordinate timer is an unsorted list as there are very few timers - * created and should rarely be looked up by name. - */ -#ifdef __INTEL_COMPILER -#pragma warning(push) -#pragma warning(disable: 444) -#endif - class finder - { - public: - explicit finder(const std::string &name) - : m_name(name) - {} - - bool operator()(Timer timer) const { - return equal_case(timer.getName(), m_name); - } - - private: - std::string m_name; - }; -#ifdef __INTEL_COMPILER -#pragma warning(pop) -#endif - -public: - /** - * Member function getName returns the name of the timer. - * - * @return a std::string const reference to the timer's - * name. - */ - const std::string &getName() const { - return m_name; - } - - /** - * Member function getTimerMask returns the timer mask of the timer. - * - * @return a TimerMask value to the timer mask. - */ - TimerMask getTimerMask() const { - return m_timerMask; - } - - /** - * Member function getTimerSet returns the timer set of the timer. - * - * @return a TimerSet const reference to the timer set. - */ - const TimerSet &getTimerSet() const { - return m_timerSet; - } - - /** - * Member function shouldRecord returns true if any of the specified timer - * bit masks are set in the enable timer bit mask. - */ - bool shouldRecord() const { - return m_timerSet.shouldRecord(m_timerMask) && s_enabledMetricsMask; - } - - /** - * Member function getSubtimerLapCount returns the subtimer lap counter. - * - * @return a Counter value of the subtimer lap counter. - */ - double getSubtimerLapCount() const { - return m_subtimerLapCount; - } - - void setSubtimerLapCount(double value) { - m_subtimerLapCount = value; - } - - /** - * Member function getLapCount returns the lap counter metric. The lap - * count metric is the number of times the stop function has been executed. - * - * @return a CounterMetric const reference of the lap counter - * metric. - */ - template - const Timer::Metric &getMetric() const; - - /** - * Member function getTimerList returns the subtimers associated with - * this timer. - * - * @return a TimerList const reference to the sub - * time list. - */ - const TimerList &getTimerList() const { - return m_subtimerList; - } - - TimerList::iterator begin() { - return m_subtimerList.begin(); - } - - TimerList::const_iterator begin() const { - return m_subtimerList.begin(); - } - - TimerList::iterator end() { - return m_subtimerList.end(); - } - - TimerList::const_iterator end() const { - return m_subtimerList.end(); - } - - /** - * Member function reset resets the accumulated time and lap times. - * - */ - void reset(); - - /** - * Member function checkpoint checkpoints the timer and all subtimers. - * - */ - void checkpoint() const; - - /** - * Member function start sets the start timer. - * - * @return a TimerImpl reference to the timer. - */ - TimerImpl &start(); - - /** - * Member function lap sets the stop timer. - * - * @return a TimerImpl reference to the timer. - */ - TimerImpl &lap(); - - /** - * Member function stop sets the stop timer and sums the just completed lap - * time to the timer. - * - * @return a TimerImpl reference to the timer. - */ - TimerImpl &stop(); - - /** - * Member function accumulateSubtimerLapCounts sums the lap counter of all - * subordinate timers. This is used to determin which timers have been activated at all. - * - * @return an int value of the number of subordinate - * timer laps. - */ - double accumulateSubtimerLapCounts() const; - - Timer getSubtimer(const std::string &name); - -public: - /** - * Member function dump writes the timer to the specified - * diagnostic writer. - * - * @param dout a Writer variable reference to write the timer to. - * - * @return a Writer reference to dout. - */ - Writer &dump(Writer &dout) const; - -private: - /** - * Member function addSubtimer returns a reference to an existing or new - * subtimer with the specified name. - * - * @param name a std::string value of the timer's name. - * - * @param timer_mask a TimerMask value of the class of the timer. - * - * @return a TimerImpl reference to the timer with - * specified name. - */ - TimerImpl *addSubtimer(const std::string &name, TimerMask timer_mask, const TimerSet &timer_set); - TimerImpl & child_notifies_of_start(); - TimerImpl & child_notifies_of_stop(); - -private: - std::string m_name; ///< Name of the timer - TimerMask m_timerMask; ///< Bit mask to enable timer - TimerImpl * m_parentTimer; ///< Parent timer - mutable double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount - unsigned m_lapStartCount; ///< Number of pending lap stops - unsigned m_activeChildCount; ///< How many children timers have been started - bool m_childCausedStart; ///< Was this timer started because a child was started? - - TimerList m_subtimerList; ///< List of subordinate timers - - const TimerSet & m_timerSet; ///< Timer enabled mask - Timer::Metric m_lapCount; ///< Number of laps accumulated - Timer::Metric m_cpuTime; ///< CPU time - Timer::Metric m_wallTime; ///< Wall time - Timer::Metric m_MPICount; ///< MPI call count - Timer::Metric m_MPIByteCount; ///< MPI byte count - Timer::Metric m_heapAlloc; ///< Heap allocated -}; - -inline Writer &operator<<(Writer &dout, const TimerImpl &timer) { - return timer.dump(dout); -} - void updateRootTimer( Timer root_timer) @@ -379,321 +87,6 @@ deleteRootTimer( } -TimerImpl::TimerImpl( - const std::string & name, - TimerMask timer_mask, - TimerImpl * parent_timer, - const TimerSet & timer_set) - : m_name(name), - m_timerMask(timer_mask), - m_parentTimer(parent_timer), - m_subtimerLapCount(0.0), - m_lapStartCount(0), - m_activeChildCount(0), - m_childCausedStart(false), - m_subtimerList(), - m_timerSet(timer_set) -{} - - -TimerImpl::~TimerImpl() -{ - try { - for (TimerList::iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - delete (*it).m_timerImpl; - } - catch (std::exception &) { - } -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_lapCount; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_cpuTime; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_wallTime; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_MPICount; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_MPIByteCount; -} - - -template<> -const Timer::Metric & -TimerImpl::getMetric() const { - return m_heapAlloc; -} - - -void -TimerImpl::reset() -{ - m_lapStartCount = 0; - m_childCausedStart = false; - m_activeChildCount = 0; - - m_lapCount.reset(); - m_cpuTime.reset(); - m_wallTime.reset(); - m_MPICount.reset(); - m_MPIByteCount.reset(); - m_heapAlloc.reset(); -} - - -Timer -TimerImpl::getSubtimer( - const std::string & name) -{ - TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); - - if (it == m_subtimerList.end()) - throw std::runtime_error("Timer not found"); - else - return *it; -} - - -TimerImpl * -TimerImpl::addSubtimer( - const std::string & name, - TimerMask timer_mask, - const TimerSet & timer_set) -{ - TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); - - if (it == m_subtimerList.end()) { - TimerImpl *timer_impl = new TimerImpl(name, timer_mask, this, timer_set); - m_subtimerList.push_back(Timer(timer_impl)); - return timer_impl; - } - else - return (*it).m_timerImpl; -} - - -TimerImpl & -TimerImpl::start() -{ - if (shouldRecord()) { - if (m_lapStartCount == 0) { - ++m_lapStartCount; - m_lapCount.m_lapStart = m_lapCount.m_lapStop; - - m_cpuTime.m_lapStop = m_cpuTime.m_lapStart = value_now(); - m_wallTime.m_lapStop = m_wallTime.m_lapStart = value_now(); - m_MPICount.m_lapStop = m_MPICount.m_lapStart = value_now(); - m_MPIByteCount.m_lapStop = m_MPIByteCount.m_lapStart = value_now(); - m_heapAlloc.m_lapStop = m_heapAlloc.m_lapStart = value_now(); - if(m_parentTimer) - m_parentTimer->child_notifies_of_start(); - } - } - - return *this; -} - - -TimerImpl & -TimerImpl::lap() -{ - if (shouldRecord()) { - if (m_lapStartCount > 0) { - m_cpuTime.m_lapStop = value_now(); - m_wallTime.m_lapStop = value_now(); - m_MPICount.m_lapStop = value_now(); - m_MPIByteCount.m_lapStop = value_now(); - m_heapAlloc.m_lapStop = value_now(); - } - } - - return *this; -} - -TimerImpl & TimerImpl::child_notifies_of_start() -{ - //Start only if not already started and this isn't a root timer - if(m_lapStartCount == 0 && m_parentTimer) - { - start(); - m_childCausedStart = true; - } - m_activeChildCount++; - - return *this; -} - -TimerImpl & TimerImpl::child_notifies_of_stop() -{ - m_activeChildCount--; - if(m_activeChildCount == 0 && m_childCausedStart) - { - stop(); - } - return *this; -} - -TimerImpl & -TimerImpl::stop() -{ - if (shouldRecord()) { - if (m_lapStartCount > 0) { - m_lapStartCount = 0; - m_lapCount.m_lapStop++; - m_childCausedStart = false; - m_activeChildCount = 0; - - m_cpuTime.m_lapStop = value_now(); - m_wallTime.m_lapStop = value_now(); - m_MPICount.m_lapStop = value_now(); - m_MPIByteCount.m_lapStop = value_now(); - m_heapAlloc.m_lapStop = value_now(); - - m_lapCount.addLap(); - m_cpuTime.addLap(); - m_wallTime.addLap(); - m_MPICount.addLap(); - m_MPIByteCount.addLap(); - m_heapAlloc.addLap(); - if(m_parentTimer) - m_parentTimer->child_notifies_of_stop(); - } - } - - return *this; -} - - -double -TimerImpl::accumulateSubtimerLapCounts() const -{ - m_subtimerLapCount = m_lapCount.getAccumulatedLap(false); - - for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - (*it).m_timerImpl->accumulateSubtimerLapCounts(); - - for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - m_subtimerLapCount += (*it).m_timerImpl->m_subtimerLapCount; - - return m_subtimerLapCount; -} - - -void -TimerImpl::checkpoint() const -{ - m_lapCount.checkpoint(); - m_cpuTime.checkpoint(); - m_wallTime.checkpoint(); - m_MPICount.checkpoint(); - m_MPIByteCount.checkpoint(); - m_heapAlloc.checkpoint(); - - for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) - (*it).m_timerImpl->checkpoint(); -} - - -void -TimerImpl::updateRootTimer(TimerImpl *root_timer) -{ - root_timer->m_lapCount.m_lapStop = value_now(); - root_timer->m_cpuTime.m_lapStop = value_now(); - root_timer->m_wallTime.m_lapStop = value_now(); - root_timer->m_MPICount.m_lapStop = value_now(); - root_timer->m_MPIByteCount.m_lapStop = value_now(); - root_timer->m_heapAlloc.m_lapStop = value_now(); - - root_timer->m_lapCount.m_accumulatedLap = root_timer->m_lapCount.m_lapStop - root_timer->m_lapCount.m_lapStart; - root_timer->m_cpuTime.m_accumulatedLap = root_timer->m_cpuTime.m_lapStop - root_timer->m_cpuTime.m_lapStart; - root_timer->m_wallTime.m_accumulatedLap = root_timer->m_wallTime.m_lapStop - root_timer->m_wallTime.m_lapStart; - root_timer->m_MPICount.m_accumulatedLap = root_timer->m_MPICount.m_lapStop - root_timer->m_MPICount.m_lapStart; - root_timer->m_MPIByteCount.m_accumulatedLap = root_timer->m_MPIByteCount.m_lapStop - root_timer->m_MPIByteCount.m_lapStart; - root_timer->m_heapAlloc.m_accumulatedLap = root_timer->m_heapAlloc.m_lapStop - root_timer->m_heapAlloc.m_lapStart; -} - - - -Timer -TimerImpl::createRootTimer( - const std::string & name, - const TimerSet & timer_set) -{ - TimerImpl *timer_impl = new TimerImpl(name, 0, 0, timer_set); - return Timer(timer_impl); -} - - -void -TimerImpl::deleteRootTimer( - TimerImpl * root_timer) -{ - delete root_timer; -} - - -void -TimerImpl::findTimer( - TimerImpl * timer, - std::vector & path_tail_vector, - std::vector & found_timers) -{ - if (timer->begin() == timer->end()) { // at leaf - } - else - for (TimerList::const_iterator it = timer->begin(); it != timer->end(); ++it) - findTimer((*it).m_timerImpl, path_tail_vector, found_timers); -} - - -Writer & -TimerImpl::dump( - Writer & dout) const -{ - if (dout.shouldPrint()) { - dout << "TimerImpl" << push << dendl; - dout << "m_name, " << m_name << dendl; - dout << "m_timerMask, " << m_timerMask << dendl; - dout << "m_subtimerLapCount, " << m_subtimerLapCount << dendl; - dout << "m_lapStartCount, " << m_lapStartCount << dendl; - - dout << "m_lapCount, " << m_lapCount << dendl; - dout << "m_cpuTime, " << m_cpuTime << dendl; - dout << "m_wallTime, " << m_wallTime << dendl; - dout << "m_MPICount, " << m_MPICount << dendl; - dout << "m_MPIByteCount, " << m_MPIByteCount << dendl; - dout << "m_heapAlloc, " << m_heapAlloc << dendl; - - dout << "m_subtimerList, " << m_subtimerList << dendl; - dout << pop; - } - - return dout; -} Timer::~Timer() {} @@ -765,25 +158,25 @@ Timer::begin() { return m_timerImpl->begin(); } - + TimerList::const_iterator Timer::begin() const { return m_timerImpl->begin(); } - + TimerList::iterator Timer::end() { return m_timerImpl->end(); } - + TimerList::const_iterator Timer::end() const { return m_timerImpl->end(); } - + double Timer::accumulateSubtimerLapCounts() const { return m_timerImpl->accumulateSubtimerLapCounts(); @@ -891,9 +284,9 @@ TimeBlockSynchronized::stop() namespace sierra { namespace Diag { -// +// // SierraRootTimer member functions: -// +// SierraRootTimer::SierraRootTimer() : m_sierraTimer(stk::diag::createRootTimer("Sierra", sierraTimerSet())) { } @@ -1057,14 +450,14 @@ TimerParser::parse( m_metricsSetMask = 0; m_metricsMask = 0; m_optionMask = getEnabledTimerMask(); - + m_optionMask = OptionMaskParser::parse(option_mask); setEnabledTimerMask(m_optionMask); - + if (m_metricsSetMask != 0) stk::diag::setEnabledTimerMetricsMask(m_metricsMask); - + return m_optionMask; } diff --git a/packages/stk/stk_util/stk_util/diag/Timer.hpp b/packages/stk/stk_util/stk_util/diag/Timer.hpp index 466c06d52e75..f4f9c391d4e0 100644 --- a/packages/stk/stk_util/stk_util/diag/Timer.hpp +++ b/packages/stk/stk_util/stk_util/diag/Timer.hpp @@ -6,15 +6,15 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: -// +// // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. -// +// // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. -// +// // * Neither the name of NTESS nor the names of its contributors // may be used to endorse or promote products derived from this // software without specific prior written permission. @@ -30,7 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// #ifndef STK_UTIL_DIAG_Timer_hpp #define STK_UTIL_DIAG_Timer_hpp @@ -38,6 +38,7 @@ #include "stk_util/diag/Option.hpp" // for OptionMask, OptionMaskParser, OptionMaskP... #include "stk_util/diag/TimerMetricTraits.hpp" // for MetricTraits, MetricsMask, CPUTime (ptr o... #include "stk_util/environment/FormatTime.hpp" // for TimeFormat +#include "stk_util/util/Marshal.hpp" #include "stk_util/parallel/Parallel.hpp" // for ParallelMachine, ompi_communicator_t #include // for size_t #include // for list @@ -182,6 +183,7 @@ class Timer friend class TimerImpl; friend class TimeBlock; friend class TimeBlockSynchronized; + friend class TimerTester; friend void updateRootTimer(Timer); friend Timer createRootTimer(const std::string &, const TimerSet &); friend void deleteRootTimer(Timer); @@ -469,6 +471,53 @@ class Timer TimerImpl * m_timerImpl; ///< Reference to the actual timer }; +template +Marshal &operator<<(Marshal &mout, const Timer::Metric &t) { + mout << t.getAccumulatedLap(false) << t.getAccumulatedLap(true); + + return mout; +} + +inline Marshal &operator<<(Marshal &mout, const Timer &t) { + mout << t.getName() << t.getTimerMask() << t.getSubtimerLapCount() + << t.getMetric() << t.getMetric() << t.getMetric() + << t.getMetric() << t.getMetric() << t.getMetric(); + + mout << t.getTimerList(); + + return mout; +} + +/** + * @brief Function operator<< writes a timer to the diagnostic stream. + * + * @param dout a Writer reference to the diagnostic writer to print + * to. + * + * @param timer a Timer::Metric const reference to the timer + * to print. + * + * @return a Writer reference to dout. + */ +template +inline Writer &operator<<(Writer &dout, const Timer::Metric &timer) { + return timer.dump(dout); +} + +/** + * Function operator<< writes a timer metric to the diagnostic stream. + * + * @param dout a Writer reference to the diagnostic writer to print + * to. + * + * @param timer a Timer::Metric const reference to the timer + * to print. + * + * @return a Writer reference to dout. + */ +inline Writer &operator<<(Writer &dout, const Timer &timer) { + return timer.dump(dout); +} /** @@ -604,36 +653,6 @@ class TimeBlockSynchronized }; -/** - * @brief Function operator<< writes a timer to the diagnostic stream. - * - * @param dout a Writer reference to the diagnostic writer to print - * to. - * - * @param timer a Timer::Metric const reference to the timer - * to print. - * - * @return a Writer reference to dout. - */ -template -inline Writer &operator<<(Writer &dout, const Timer::Metric &timer) { - return timer.dump(dout); -} - -/** - * Function operator<< writes a timer metric to the diagnostic stream. - * - * @param dout a Writer reference to the diagnostic writer to print - * to. - * - * @param timer a Timer::Metric const reference to the timer - * to print. - * - * @return a Writer reference to dout. - */ -inline Writer &operator<<(Writer &dout, const Timer &timer) { - return timer.dump(dout); -} } // namespace diag } // namespace stk @@ -780,14 +799,14 @@ class TimerParser : public OptionMaskParser * @param arg a std::string const reference to the argument * values. */ - virtual void parseArg(const std::string &name, const std::string &arg) const; + virtual void parseArg(const std::string &name, const std::string &arg) const; mutable stk::diag::MetricsMask m_metricsSetMask; mutable stk::diag::MetricsMask m_metricsMask; }; -class SierraRootTimer +class SierraRootTimer { public: SierraRootTimer(); @@ -795,7 +814,7 @@ class SierraRootTimer stk::diag::Timer & sierraTimer(); private: - stk::diag::Timer m_sierraTimer; + stk::diag::Timer m_sierraTimer; }; } // namespace Diag diff --git a/packages/stk/stk_util/stk_util/diag/TimerImpl.cpp b/packages/stk/stk_util/stk_util/diag/TimerImpl.cpp new file mode 100644 index 000000000000..39181c702ab5 --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/TimerImpl.cpp @@ -0,0 +1,333 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "stk_util/diag/TimerImpl.hpp" +#include "stk_util/diag/Timer.hpp" + +namespace stk::diag { + +namespace { + +template +typename MetricTraits::Type +value_now() { + if (MetricTraits::METRIC & getEnabledTimerMetricsMask()) + return MetricTraits::value_now(); + else + return 0; +} + +} // namespace + + +TimerImpl::TimerImpl( + const std::string & name, + TimerMask timer_mask, + TimerImpl * parent_timer, + const TimerSet & timer_set) + : m_name(name), + m_timerMask(timer_mask), + m_parentTimer(parent_timer), + m_subtimerLapCount(0.0), + m_lapStartCount(0), + m_activeChildCount(0), + m_childCausedStart(false), + m_subtimerList(), + m_timerSet(timer_set) +{} + + +TimerImpl::~TimerImpl() +{ + try { + for (TimerList::iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + delete (*it).m_timerImpl; + } + catch (std::exception &) { + } +} + +bool TimerImpl::shouldRecord() const { + return m_timerSet.shouldRecord(m_timerMask) && getEnabledTimerMetricsMask(); +} + +void +TimerImpl::reset() +{ + m_lapStartCount = 0; + m_childCausedStart = false; + m_activeChildCount = 0; + + m_lapCount.reset(); + m_cpuTime.reset(); + m_wallTime.reset(); + m_MPICount.reset(); + m_MPIByteCount.reset(); + m_heapAlloc.reset(); +} + + +Timer +TimerImpl::getSubtimer( + const std::string & name) +{ + TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); + + if (it == m_subtimerList.end()) + throw std::runtime_error("Timer not found"); + else + return *it; +} + + +TimerImpl * +TimerImpl::addSubtimer( + const std::string & name, + TimerMask timer_mask, + const TimerSet & timer_set) +{ + TimerList::iterator it = std::find_if(m_subtimerList.begin(), m_subtimerList.end(), finder(name)); + + if (it == m_subtimerList.end()) { + TimerImpl *timer_impl = new TimerImpl(name, timer_mask, this, timer_set); + m_subtimerList.push_back(Timer(timer_impl)); + return timer_impl; + } + else + return (*it).m_timerImpl; +} + + +TimerImpl & +TimerImpl::start() +{ + if (shouldRecord()) { + if (m_lapStartCount == 0) { + ++m_lapStartCount; + m_lapCount.m_lapStart = m_lapCount.m_lapStop; + + m_cpuTime.m_lapStop = m_cpuTime.m_lapStart = value_now(); + m_wallTime.m_lapStop = m_wallTime.m_lapStart = value_now(); + m_MPICount.m_lapStop = m_MPICount.m_lapStart = value_now(); + m_MPIByteCount.m_lapStop = m_MPIByteCount.m_lapStart = value_now(); + m_heapAlloc.m_lapStop = m_heapAlloc.m_lapStart = value_now(); + if(m_parentTimer) + m_parentTimer->child_notifies_of_start(); + } + } + + return *this; +} + + +TimerImpl & +TimerImpl::lap() +{ + if (shouldRecord()) { + if (m_lapStartCount > 0) { + m_cpuTime.m_lapStop = value_now(); + m_wallTime.m_lapStop = value_now(); + m_MPICount.m_lapStop = value_now(); + m_MPIByteCount.m_lapStop = value_now(); + m_heapAlloc.m_lapStop = value_now(); + } + } + + return *this; +} + +TimerImpl & TimerImpl::child_notifies_of_start() +{ + //Start only if not already started and this isn't a root timer + if(m_lapStartCount == 0 && m_parentTimer) + { + start(); + m_childCausedStart = true; + } + m_activeChildCount++; + + return *this; +} + +TimerImpl & TimerImpl::child_notifies_of_stop() +{ + m_activeChildCount--; + if(m_activeChildCount == 0 && m_childCausedStart) + { + stop(); + } + return *this; +} + +TimerImpl & +TimerImpl::stop() +{ + if (shouldRecord()) { + if (m_lapStartCount > 0) { + m_lapStartCount = 0; + m_lapCount.m_lapStop++; + m_childCausedStart = false; + m_activeChildCount = 0; + + m_cpuTime.m_lapStop = value_now(); + m_wallTime.m_lapStop = value_now(); + m_MPICount.m_lapStop = value_now(); + m_MPIByteCount.m_lapStop = value_now(); + m_heapAlloc.m_lapStop = value_now(); + + m_lapCount.addLap(); + m_cpuTime.addLap(); + m_wallTime.addLap(); + m_MPICount.addLap(); + m_MPIByteCount.addLap(); + m_heapAlloc.addLap(); + if(m_parentTimer) + m_parentTimer->child_notifies_of_stop(); + } + } + + return *this; +} + + +double +TimerImpl::accumulateSubtimerLapCounts() const +{ + m_subtimerLapCount = m_lapCount.getAccumulatedLap(false); + + for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + (*it).m_timerImpl->accumulateSubtimerLapCounts(); + + for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + m_subtimerLapCount += (*it).m_timerImpl->m_subtimerLapCount; + + return m_subtimerLapCount; +} + + +void +TimerImpl::checkpoint() const +{ + m_lapCount.checkpoint(); + m_cpuTime.checkpoint(); + m_wallTime.checkpoint(); + m_MPICount.checkpoint(); + m_MPIByteCount.checkpoint(); + m_heapAlloc.checkpoint(); + + for (TimerList::const_iterator it = m_subtimerList.begin(); it != m_subtimerList.end(); ++it) + (*it).m_timerImpl->checkpoint(); +} + + +void +TimerImpl::updateRootTimer(TimerImpl *root_timer) +{ + root_timer->m_lapCount.m_lapStop = value_now(); + root_timer->m_cpuTime.m_lapStop = value_now(); + root_timer->m_wallTime.m_lapStop = value_now(); + root_timer->m_MPICount.m_lapStop = value_now(); + root_timer->m_MPIByteCount.m_lapStop = value_now(); + root_timer->m_heapAlloc.m_lapStop = value_now(); + + root_timer->m_lapCount.m_accumulatedLap = root_timer->m_lapCount.m_lapStop - root_timer->m_lapCount.m_lapStart; + root_timer->m_cpuTime.m_accumulatedLap = root_timer->m_cpuTime.m_lapStop - root_timer->m_cpuTime.m_lapStart; + root_timer->m_wallTime.m_accumulatedLap = root_timer->m_wallTime.m_lapStop - root_timer->m_wallTime.m_lapStart; + root_timer->m_MPICount.m_accumulatedLap = root_timer->m_MPICount.m_lapStop - root_timer->m_MPICount.m_lapStart; + root_timer->m_MPIByteCount.m_accumulatedLap = root_timer->m_MPIByteCount.m_lapStop - root_timer->m_MPIByteCount.m_lapStart; + root_timer->m_heapAlloc.m_accumulatedLap = root_timer->m_heapAlloc.m_lapStop - root_timer->m_heapAlloc.m_lapStart; +} + + + +Timer +TimerImpl::createRootTimer( + const std::string & name, + const TimerSet & timer_set) +{ + TimerImpl *timer_impl = new TimerImpl(name, 0, 0, timer_set); + return Timer(timer_impl); +} + + +void +TimerImpl::deleteRootTimer( + TimerImpl * root_timer) +{ + delete root_timer; +} + + +void +TimerImpl::findTimer( + TimerImpl * timer, + std::vector & path_tail_vector, + std::vector & found_timers) +{ + if (timer->begin() == timer->end()) { // at leaf + } + else + for (TimerList::const_iterator it = timer->begin(); it != timer->end(); ++it) + findTimer((*it).m_timerImpl, path_tail_vector, found_timers); +} + + +Writer & +TimerImpl::dump( + Writer & dout) const +{ + if (dout.shouldPrint()) { + dout << "TimerImpl" << push << dendl; + dout << "m_name, " << m_name << dendl; + dout << "m_timerMask, " << m_timerMask << dendl; + dout << "m_subtimerLapCount, " << m_subtimerLapCount << dendl; + dout << "m_lapStartCount, " << m_lapStartCount << dendl; + + dout << "m_lapCount, " << m_lapCount << dendl; + dout << "m_cpuTime, " << m_cpuTime << dendl; + dout << "m_wallTime, " << m_wallTime << dendl; + dout << "m_MPICount, " << m_MPICount << dendl; + dout << "m_MPIByteCount, " << m_MPIByteCount << dendl; + dout << "m_heapAlloc, " << m_heapAlloc << dendl; + + dout << "m_subtimerList, " << m_subtimerList << dendl; + dout << pop; + } + + return dout; +} + + + +} \ No newline at end of file diff --git a/packages/stk/stk_util/stk_util/diag/TimerImpl.hpp b/packages/stk/stk_util/stk_util/diag/TimerImpl.hpp new file mode 100644 index 000000000000..e17493e51d5f --- /dev/null +++ b/packages/stk/stk_util/stk_util/diag/TimerImpl.hpp @@ -0,0 +1,370 @@ +// Copyright 2002 - 2008, 2010, 2011 National Technology Engineering +// Solutions of Sandia, LLC (NTESS). Under the terms of Contract +// DE-NA0003525 with NTESS, the U.S. Government retains certain rights +// in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// * Neither the name of NTESS nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef STK_UTIL_DIAG_TimerImpl_hpp +#define STK_UTIL_DIAG_TimerImpl_hpp + +#include "stk_util/diag/TimerMetricTraits.hpp" +#include "stk_util/util/string_case_compare.hpp" // for equal_case +#include "stk_util/diag/Timer.hpp" +#include "stk_util/util/Writer.hpp" // for operator<<, Writer, dendl, pop, push +#include "stk_util/diag/WriterExt.hpp" // for operator<< + + +namespace stk::diag { + + +/** + * Class TimerImpl is the core timer class. The Timer class is a + * wrapper around TimerImpl so that the buried references can be constructed more easily. + * + * Each timer has a lap counter, cpu timer, wall timer and other metrics. Each time a timer is + * started, the cpu start time, wall start time and other metrics, set to the process' current + * values. When the timer is stopped, the lap counter is incremented, and the cpu, wall, and other + * values are accumulated with the difference between now and the start time. + * + * Each timer may have a list of subordinate timers. The relationship is purely + * hierarchical in that a there is no timing relationship assumed between the timers other + * than the grouping. There is no relation between the starting and stopping of parent + * and subordinate timers. + * + * The subordinate timers are stored as pointers to a new timer on the heap, since the + * calling function will be receiving a reference to this memory which can never change + * location. The subordinate timers are not sorted in the list as they should very + * rarely be created or looked up by name, rather the calling function stores the + * reference via the Timer class. + * + */ +class TimerImpl +{ + friend class Timer; + friend class TimerTester; + +public: + static void updateRootTimer(TimerImpl *root_timer); + + static Timer createRootTimer(const std::string &name, const TimerSet &timer_set); + + static void deleteRootTimer(TimerImpl *root_timer); + + static void findTimer(TimerImpl *timer, std::vector &path_tail_vector, std::vector &found_timers); + +private: + /** + * Static function reg returns a reference to an existing timer or newly + * created timer of the specified name which is subordinate to the + * parent timer. + * + * @return a TimerImpl reference to the timer with the + * specified name that is subordinate to the + * parent timer. + */ + static TimerImpl *reg(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set) { + return parent_timer->addSubtimer(name, timer_mask, timer_set); + } + + /** + * Creates a new Timer instance. + * + * @param name a std::string const reference to the name of + * the timer. + * + */ + TimerImpl(const std::string &name, TimerMask timer_mask, TimerImpl *parent_timer, const TimerSet &timer_set); + + /** + * Destroys a TimerImpl instance. + * + */ + ~TimerImpl(); + + TimerImpl(const TimerImpl &TimerImpl); + TimerImpl &operator=(const TimerImpl &TimerImpl); + + /** + * Class finder is a binary predicate for finding a subordinate timer. + * + * Note that the subordinate timer is an unsorted list as there are very few timers + * created and should rarely be looked up by name. + */ +#ifdef __INTEL_COMPILER +#pragma warning(push) +#pragma warning(disable: 444) +#endif + class finder + { + public: + explicit finder(const std::string &name) + : m_name(name) + {} + + bool operator()(Timer timer) const { + return equal_case(timer.getName(), m_name); + } + + private: + std::string m_name; + }; +#ifdef __INTEL_COMPILER +#pragma warning(pop) +#endif + +public: + /** + * Member function getName returns the name of the timer. + * + * @return a std::string const reference to the timer's + * name. + */ + const std::string &getName() const { + return m_name; + } + + /** + * Member function getTimerMask returns the timer mask of the timer. + * + * @return a TimerMask value to the timer mask. + */ + TimerMask getTimerMask() const { + return m_timerMask; + } + + /** + * Member function getTimerSet returns the timer set of the timer. + * + * @return a TimerSet const reference to the timer set. + */ + const TimerSet &getTimerSet() const { + return m_timerSet; + } + + /** + * Member function shouldRecord returns true if any of the specified timer + * bit masks are set in the enable timer bit mask. + */ + bool shouldRecord() const; + + /** + * Member function getSubtimerLapCount returns the subtimer lap counter. + * + * @return a Counter value of the subtimer lap counter. + */ + double getSubtimerLapCount() const { + return m_subtimerLapCount; + } + + void setSubtimerLapCount(double value) { + m_subtimerLapCount = value; + } + + /** + * Member function getLapCount returns the lap counter metric. The lap + * count metric is the number of times the stop function has been executed. + * + * @return a CounterMetric const reference of the lap counter + * metric. + */ + template + const Timer::Metric &getMetric() const; + + /** + * Member function getTimerList returns the subtimers associated with + * this timer. + * + * @return a TimerList const reference to the sub + * time list. + */ + const TimerList &getTimerList() const { + return m_subtimerList; + } + + TimerList::iterator begin() { + return m_subtimerList.begin(); + } + + TimerList::const_iterator begin() const { + return m_subtimerList.begin(); + } + + TimerList::iterator end() { + return m_subtimerList.end(); + } + + TimerList::const_iterator end() const { + return m_subtimerList.end(); + } + + /** + * Member function reset resets the accumulated time and lap times. + * + */ + void reset(); + + /** + * Member function checkpoint checkpoints the timer and all subtimers. + * + */ + void checkpoint() const; + + /** + * Member function start sets the start timer. + * + * @return a TimerImpl reference to the timer. + */ + TimerImpl &start(); + + /** + * Member function lap sets the stop timer. + * + * @return a TimerImpl reference to the timer. + */ + TimerImpl &lap(); + + /** + * Member function stop sets the stop timer and sums the just completed lap + * time to the timer. + * + * @return a TimerImpl reference to the timer. + */ + TimerImpl &stop(); + + /** + * Member function accumulateSubtimerLapCounts sums the lap counter of all + * subordinate timers. This is used to determin which timers have been activated at all. + * + * @return an int value of the number of subordinate + * timer laps. + */ + double accumulateSubtimerLapCounts() const; + + Timer getSubtimer(const std::string &name); + +public: + /** + * Member function dump writes the timer to the specified + * diagnostic writer. + * + * @param dout a Writer variable reference to write the timer to. + * + * @return a Writer reference to dout. + */ + Writer &dump(Writer &dout) const; + +private: + /** + * Member function addSubtimer returns a reference to an existing or new + * subtimer with the specified name. + * + * @param name a std::string value of the timer's name. + * + * @param timer_mask a TimerMask value of the class of the timer. + * + * @return a TimerImpl reference to the timer with + * specified name. + */ + TimerImpl *addSubtimer(const std::string &name, TimerMask timer_mask, const TimerSet &timer_set); + TimerImpl & child_notifies_of_start(); + TimerImpl & child_notifies_of_stop(); + +private: + std::string m_name; ///< Name of the timer + TimerMask m_timerMask; ///< Bit mask to enable timer + TimerImpl * m_parentTimer; ///< Parent timer + mutable double m_subtimerLapCount; ///< Sum of subtimer lap counts and m_lapCount + unsigned m_lapStartCount; ///< Number of pending lap stops + unsigned m_activeChildCount; ///< How many children timers have been started + bool m_childCausedStart; ///< Was this timer started because a child was started? + + TimerList m_subtimerList; ///< List of subordinate timers + + const TimerSet & m_timerSet; ///< Timer enabled mask + Timer::Metric m_lapCount; ///< Number of laps accumulated + Timer::Metric m_cpuTime; ///< CPU time + Timer::Metric m_wallTime; ///< Wall time + Timer::Metric m_MPICount; ///< MPI call count + Timer::Metric m_MPIByteCount; ///< MPI byte count + Timer::Metric m_heapAlloc; ///< Heap allocated +}; + +inline Writer &operator<<(Writer &dout, const TimerImpl &timer) { + return timer.dump(dout); +} + + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_lapCount; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_cpuTime; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_wallTime; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_MPICount; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_MPIByteCount; +} + + +template<> +inline const Timer::Metric & +TimerImpl::getMetric() const { + return m_heapAlloc; +} + + +} + +#endif \ No newline at end of file diff --git a/packages/stk/stk_util/stk_util/environment/EnvData.cpp b/packages/stk/stk_util/stk_util/environment/EnvData.cpp index b09aff4f1650..6a27223777d0 100644 --- a/packages/stk/stk_util/stk_util/environment/EnvData.cpp +++ b/packages/stk/stk_util/stk_util/environment/EnvData.cpp @@ -63,7 +63,6 @@ namespace stk { m_inputFileRequired(true), m_checkSubCycle(false), m_checkSmRegion(false), - m_isZapotec(false), m_worldComm(MPI_COMM_NULL), m_parallelComm(MPI_COMM_NULL), m_parallelSize(-1), diff --git a/packages/stk/stk_util/stk_util/environment/EnvData.hpp b/packages/stk/stk_util/stk_util/environment/EnvData.hpp index 21ba461baa46..7afce19069a5 100644 --- a/packages/stk/stk_util/stk_util/environment/EnvData.hpp +++ b/packages/stk/stk_util/stk_util/environment/EnvData.hpp @@ -108,7 +108,6 @@ struct EnvData bool m_inputFileRequired; bool m_checkSubCycle; bool m_checkSmRegion; - bool m_isZapotec; MPI_Comm m_worldComm; diff --git a/packages/stk/stk_util/stk_util/environment/Scheduler.cpp b/packages/stk/stk_util/stk_util/environment/Scheduler.cpp index 94c49b1a56df..00d1837b7ea4 100644 --- a/packages/stk/stk_util/stk_util/environment/Scheduler.cpp +++ b/packages/stk/stk_util/stk_util/environment/Scheduler.cpp @@ -196,7 +196,7 @@ bool Scheduler::internal_is_it_time(Time time) // called multiple times with the same argument, it will return the // same response. - assert(time >= lastTime_); + STK_ThrowAssertMsg(time >= lastTime_, "time = " << time << ", lastTime_ = " << lastTime_); // If this is a restart, then calculate what the lastTime_ setting would // have been for this scheduler (based only on start time and deltas). diff --git a/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp b/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp index c0dc9a2d8f34..12bc0522d186 100644 --- a/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp +++ b/packages/stk/stk_util/stk_util/ngp/NgpSpaces.hpp @@ -72,6 +72,8 @@ using MemSpace = Kokkos::HIPSpace; using MemSpace = ExecSpace::memory_space; #endif +using HostMemSpace = HostExecSpace::memory_space; + #ifdef KOKKOS_ENABLE_HIP template using RangePolicy = Kokkos::RangePolicy>; diff --git a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp index c2b9f9ded275..fe5e52134cdd 100644 --- a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp +++ b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp @@ -42,7 +42,7 @@ //In Sierra, STK_VERSION_STRING is provided on the compile line by bake. //For Trilinos stk snapshots, the following macro definition gets populated with //the real version string by the trilinos_snapshot.sh script. -#define STK_VERSION_STRING "5.23.1-605-g31b54b7f" +#define STK_VERSION_STRING "5.23.2-429-g07a311ce" #endif namespace stk { diff --git a/packages/stk/stk_util/stk_util/util/FPExceptions.hpp b/packages/stk/stk_util/stk_util/util/FPExceptions.hpp index 3d65d0a6017a..e2f94a533d09 100644 --- a/packages/stk/stk_util/stk_util/util/FPExceptions.hpp +++ b/packages/stk/stk_util/stk_util/util/FPExceptions.hpp @@ -32,13 +32,20 @@ constexpr bool have_errexcept() #endif } +constexpr int FE_EXCEPT_CHECKS = FE_ALL_EXCEPT & ~FE_INEXACT; + std::string get_fe_except_string(int fe_except_bitmask); inline void clear_fp_errors() { if constexpr (have_errexcept()) { - std::feclearexcept(FE_ALL_EXCEPT); + // experimental results show calling std::feclearexcept is *very* + // expensive, so dont call it unless needed. + if (std::fetestexcept(FE_EXCEPT_CHECKS) > 0) + { + std::feclearexcept(FE_EXCEPT_CHECKS); + } } else if constexpr (have_errno()) { errno = 0; @@ -49,7 +56,7 @@ inline void throw_or_warn_on_fp_error(const char* fname = nullptr, bool warn=fal { if constexpr (have_errexcept()) { - int fe_except_bitmask = std::fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT); + int fe_except_bitmask = std::fetestexcept(FE_EXCEPT_CHECKS); if (fe_except_bitmask != 0) { std::string msg = std::string(fname ? fname : "") + " raised floating point error(s): " + get_fe_except_string(fe_except_bitmask); @@ -76,6 +83,7 @@ inline void throw_or_warn_on_fp_error(const char* fname = nullptr, bool warn=fal } } } + } inline void warn_on_fp_error(const char* fname = nullptr, std::ostream& os = std::cerr) diff --git a/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp b/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp index 567e4f875024..f905bb7f171d 100644 --- a/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp +++ b/packages/stk/stk_util/stk_util/util/StkNgpVector.hpp @@ -44,17 +44,14 @@ class NgpVector { using HostSpace = Kokkos::DefaultHostExecutionSpace; public: - NgpVector(const std::string &n) : NgpVector(n, 0) - { - } - NgpVector() : NgpVector(get_default_name()) - { - } - NgpVector(const std::string &n, size_t s) - : mSize(s), - deviceVals(Kokkos::view_alloc(Kokkos::WithoutInitializing, n), mSize), - hostVals(Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceVals)) - { + virtual ~NgpVector() = default; + NgpVector(const std::string &n) : NgpVector(n, 0) {} + NgpVector() : NgpVector(get_default_name()) {} + NgpVector(const std::string &n, size_t s) + : mSize(s), + deviceVals(Kokkos::view_alloc(Kokkos::WithoutInitializing, n), mSize), + hostVals(Kokkos::create_mirror_view(Kokkos::WithoutInitializing, deviceVals)) + { } NgpVector(size_t s) : NgpVector(get_default_name(), s) { diff --git a/packages/stk/stk_util/stk_util/util/StridedArray.hpp b/packages/stk/stk_util/stk_util/util/StridedArray.hpp index 69881b38abeb..0e19d0de940d 100644 --- a/packages/stk/stk_util/stk_util/util/StridedArray.hpp +++ b/packages/stk/stk_util/stk_util/util/StridedArray.hpp @@ -36,7 +36,7 @@ #include #include -#include +#include "Kokkos_Macros.hpp" namespace stk {