From 5eb4c9aba83129628ece064aac765ef3983901ac Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Wed, 17 Jul 2024 13:39:30 +0200 Subject: [PATCH 01/15] [PL/HM] Rename private variables using trailing _ --- .../HydroMechanics/HydroMechanicsProcess.cpp | 106 +++++++++--------- .../HydroMechanics/HydroMechanicsProcess.h | 22 ++-- 2 files changed, 64 insertions(+), 64 deletions(-) diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp b/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp index d5a123a71ab..8280fd638ce 100644 --- a/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp +++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp @@ -41,25 +41,25 @@ HydroMechanicsProcess::HydroMechanicsProcess( : Process(std::move(name), mesh, std::move(jacobian_assembler), parameters, integration_order, std::move(process_variables), std::move(secondary_variables), use_monolithic_scheme), - _process_data(std::move(process_data)) + process_data_(std::move(process_data)) { - _nodal_forces = MeshLib::getOrCreateMeshProperty( + nodal_forces_ = MeshLib::getOrCreateMeshProperty( mesh, "NodalForces", MeshLib::MeshItemType::Node, DisplacementDim); - _hydraulic_flow = MeshLib::getOrCreateMeshProperty( + hydraulic_flow_ = MeshLib::getOrCreateMeshProperty( mesh, "MassFlowRate", MeshLib::MeshItemType::Node, 1); _integration_point_writer.emplace_back( std::make_unique( "sigma_ip", static_cast(mesh.getDimension() == 2 ? 4 : 6) /*n components*/, - integration_order, _local_assemblers, &LocalAssemblerIF::getSigma)); + integration_order, local_assemblers_, &LocalAssemblerIF::getSigma)); _integration_point_writer.emplace_back( std::make_unique( "epsilon_ip", static_cast(mesh.getDimension() == 2 ? 4 : 6) /*n components*/, - integration_order, _local_assemblers, + integration_order, local_assemblers_, &LocalAssemblerIF::getEpsilon)); if (!_use_monolithic_scheme) @@ -67,7 +67,7 @@ HydroMechanicsProcess::HydroMechanicsProcess( _integration_point_writer.emplace_back( std::make_unique( "strain_rate_variable_ip", 1, integration_order, - _local_assemblers, &LocalAssemblerIF::getStrainRateVariable)); + local_assemblers_, &LocalAssemblerIF::getStrainRateVariable)); } } @@ -84,7 +84,7 @@ HydroMechanicsProcess::getMatrixSpecifications( { // For the monolithic scheme or the M process (deformation) in the staggered // scheme. - if (process_id == _process_data.mechanics_related_process_id) + if (process_id == process_data_.mechanics_related_process_id) { auto const& l = *_local_to_global_index_map; return {l.dofSizeWithoutGhosts(), l.dofSizeWithoutGhosts(), @@ -92,9 +92,9 @@ HydroMechanicsProcess::getMatrixSpecifications( } // For staggered scheme and H process (pressure). - auto const& l = *_local_to_global_index_map_with_base_nodes; + auto const& l = *local_to_global_index_map_with_base_nodes_; return {l.dofSizeWithoutGhosts(), l.dofSizeWithoutGhosts(), - &l.getGhostIndices(), &_sparsity_pattern_with_linear_element}; + &l.getGhostIndices(), &sparsity_pattern_with_linear_element_}; } template @@ -102,28 +102,28 @@ void HydroMechanicsProcess::constructDofTable() { // Create single component dof in every of the mesh's nodes. _mesh_subset_all_nodes = std::make_unique( - _mesh, _mesh.getNodes(), _process_data.use_taylor_hood_elements); + _mesh, _mesh.getNodes(), process_data_.use_taylor_hood_elements); // Create single component dof in the mesh's base nodes. - _base_nodes = MeshLib::getBaseNodes(_mesh.getElements()); - _mesh_subset_base_nodes = std::make_unique( - _mesh, _base_nodes, _process_data.use_taylor_hood_elements); + base_nodes_ = MeshLib::getBaseNodes(_mesh.getElements()); + mesh_subset_base_nodes_ = std::make_unique( + _mesh, base_nodes_, process_data_.use_taylor_hood_elements); // TODO move the two data members somewhere else. // for extrapolation of secondary variables of stress or strain std::vector all_mesh_subsets_single_component{ *_mesh_subset_all_nodes}; - _local_to_global_index_map_single_component = + local_to_global_index_map_single_component_ = std::make_unique( std::move(all_mesh_subsets_single_component), // by location order is needed for output NumLib::ComponentOrder::BY_LOCATION); - if (_process_data.isMonolithicSchemeUsed()) + if (process_data_.isMonolithicSchemeUsed()) { // For pressure, which is the first std::vector all_mesh_subsets{ - *_mesh_subset_base_nodes}; + *mesh_subset_base_nodes_}; // For displacement. const int monolithic_process_id = 0; @@ -160,18 +160,18 @@ void HydroMechanicsProcess::constructDofTable() // For pressure equation. // Collect the mesh subsets with base nodes in a vector. std::vector all_mesh_subsets_base_nodes{ - *_mesh_subset_base_nodes}; - _local_to_global_index_map_with_base_nodes = + *mesh_subset_base_nodes_}; + local_to_global_index_map_with_base_nodes_ = std::make_unique( std::move(all_mesh_subsets_base_nodes), // by location order is needed for output NumLib::ComponentOrder::BY_LOCATION); - _sparsity_pattern_with_linear_element = NumLib::computeSparsityPattern( - *_local_to_global_index_map_with_base_nodes, _mesh); + sparsity_pattern_with_linear_element_ = NumLib::computeSparsityPattern( + *local_to_global_index_map_with_base_nodes_, _mesh); assert(_local_to_global_index_map); - assert(_local_to_global_index_map_with_base_nodes); + assert(local_to_global_index_map_with_base_nodes_); } } @@ -183,9 +183,9 @@ void HydroMechanicsProcess::initializeConcreteProcess( { ProcessLib::createLocalAssemblersHM( - mesh.getElements(), dof_table, _local_assemblers, + mesh.getElements(), dof_table, local_assemblers_, NumLib::IntegrationOrder{integration_order}, mesh.isAxiallySymmetric(), - _process_data); + process_data_); auto add_secondary_variable = [&](std::string const& name, int const num_components, @@ -194,7 +194,7 @@ void HydroMechanicsProcess::initializeConcreteProcess( _secondary_variables.addSecondaryVariable( name, makeExtrapolator(num_components, getExtrapolator(), - _local_assemblers, + local_assemblers_, std::move(get_ip_values_function))); }; @@ -215,45 +215,45 @@ void HydroMechanicsProcess::initializeConcreteProcess( // enable output of internal variables defined by material models // ProcessLib::Deformation::solidMaterialInternalToSecondaryVariables< - LocalAssemblerIF>(_process_data.solid_materials, + LocalAssemblerIF>(process_data_.solid_materials, add_secondary_variable); - _process_data.pressure_interpolated = + process_data_.pressure_interpolated = MeshLib::getOrCreateMeshProperty( const_cast(mesh), "pressure_interpolated", MeshLib::MeshItemType::Node, 1); - _process_data.principal_stress_vector[0] = + process_data_.principal_stress_vector[0] = MeshLib::getOrCreateMeshProperty( const_cast(mesh), "principal_stress_vector_1", MeshLib::MeshItemType::Cell, 3); - _process_data.principal_stress_vector[1] = + process_data_.principal_stress_vector[1] = MeshLib::getOrCreateMeshProperty( const_cast(mesh), "principal_stress_vector_2", MeshLib::MeshItemType::Cell, 3); - _process_data.principal_stress_vector[2] = + process_data_.principal_stress_vector[2] = MeshLib::getOrCreateMeshProperty( const_cast(mesh), "principal_stress_vector_3", MeshLib::MeshItemType::Cell, 3); - _process_data.principal_stress_values = + process_data_.principal_stress_values = MeshLib::getOrCreateMeshProperty( const_cast(mesh), "principal_stress_values", MeshLib::MeshItemType::Cell, 3); - _process_data.permeability = MeshLib::getOrCreateMeshProperty( + process_data_.permeability = MeshLib::getOrCreateMeshProperty( const_cast(mesh), "permeability", MeshLib::MeshItemType::Cell, MathLib::KelvinVector::kelvin_vector_dimensions(DisplacementDim)); setIPDataInitialConditions(_integration_point_writer, mesh.getProperties(), - _local_assemblers); + local_assemblers_); // Initialize local assemblers after all variables have been set. GlobalExecutor::executeMemberOnDereferenced(&LocalAssemblerIF::initialize, - _local_assemblers, + local_assemblers_, *_local_to_global_index_map); } @@ -261,7 +261,7 @@ template void HydroMechanicsProcess::initializeBoundaryConditions( std::map> const& media) { - if (_process_data.isMonolithicSchemeUsed()) + if (process_data_.isMonolithicSchemeUsed()) { const int process_id_of_hydromechanics = 0; initializeProcessBoundaryConditionsAndSourceTerms( @@ -273,7 +273,7 @@ void HydroMechanicsProcess::initializeBoundaryConditions( // for the equations of pressure const int hydraulic_process_id = 0; initializeProcessBoundaryConditionsAndSourceTerms( - *_local_to_global_index_map_with_base_nodes, hydraulic_process_id, + *local_to_global_index_map_with_base_nodes_, hydraulic_process_id, media); // for the equations of deformation. @@ -298,7 +298,7 @@ void HydroMechanicsProcess::assembleConcreteProcess( _local_to_global_index_map.get()}; GlobalExecutor::executeSelectedMemberDereferenced( - _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers, + _global_assembler, &VectorMatrixAssembler::assemble, local_assemblers_, getActiveElementIDs(), dof_table, t, dt, x, x_prev, process_id, &M, &K, &b); } @@ -311,7 +311,7 @@ void HydroMechanicsProcess:: GlobalVector& b, GlobalMatrix& Jac) { // For the monolithic scheme - bool const use_monolithic_scheme = _process_data.isMonolithicSchemeUsed(); + bool const use_monolithic_scheme = process_data_.isMonolithicSchemeUsed(); if (use_monolithic_scheme) { DBUG( @@ -321,7 +321,7 @@ void HydroMechanicsProcess:: else { // For the staggered scheme - if (process_id == _process_data.hydraulic_process_id) + if (process_id == process_data_.hydraulic_process_id) { DBUG( "Assemble the Jacobian equations of liquid fluid process in " @@ -338,7 +338,7 @@ void HydroMechanicsProcess:: auto const dof_tables = getDOFTables(x.size()); GlobalExecutor::executeSelectedMemberDereferenced( _global_assembler, &VectorMatrixAssembler::assembleWithJacobian, - _local_assemblers, getActiveElementIDs(), dof_tables, t, dt, x, x_prev, + local_assemblers_, getActiveElementIDs(), dof_tables, t, dt, x, x_prev, process_id, &b, &Jac); auto copyRhs = [&](int const variable_id, auto& output_vector) @@ -356,13 +356,13 @@ void HydroMechanicsProcess:: std::negate()); } }; - if (process_id == _process_data.hydraulic_process_id) + if (process_id == process_data_.hydraulic_process_id) { - copyRhs(0, *_hydraulic_flow); + copyRhs(0, *hydraulic_flow_); } - if (process_id == _process_data.mechanics_related_process_id) + if (process_id == process_data_.mechanics_related_process_id) { - copyRhs(1, *_nodal_forces); + copyRhs(1, *nodal_forces_); } } @@ -376,7 +376,7 @@ void HydroMechanicsProcess::preTimestepConcreteProcess( if (hasMechanicalProcess(process_id)) { GlobalExecutor::executeSelectedMemberOnDereferenced( - &LocalAssemblerIF::preTimestep, _local_assemblers, + &LocalAssemblerIF::preTimestep, local_assemblers_, getActiveElementIDs(), *_local_to_global_index_map, *x[process_id], t, dt); } @@ -388,7 +388,7 @@ void HydroMechanicsProcess::postTimestepConcreteProcess( std::vector const& x_prev, double const t, double const dt, const int process_id) { - if (process_id != _process_data.hydraulic_process_id) + if (process_id != process_data_.hydraulic_process_id) { return; } @@ -396,7 +396,7 @@ void HydroMechanicsProcess::postTimestepConcreteProcess( DBUG("PostTimestep HydroMechanicsProcess."); GlobalExecutor::executeSelectedMemberOnDereferenced( - &LocalAssemblerIF::postTimestep, _local_assemblers, + &LocalAssemblerIF::postTimestep, local_assemblers_, getActiveElementIDs(), getDOFTables(x.size()), x, x_prev, t, dt, process_id); } @@ -411,7 +411,7 @@ void HydroMechanicsProcess::postNonLinearSolverConcreteProcess( // Calculate strain, stress or other internal variables of mechanics. GlobalExecutor::executeSelectedMemberOnDereferenced( - &LocalAssemblerIF::postNonLinearSolver, _local_assemblers, + &LocalAssemblerIF::postNonLinearSolver, local_assemblers_, getActiveElementIDs(), getDOFTables(x.size()), x, x_prev, t, dt, process_id); } @@ -423,7 +423,7 @@ void HydroMechanicsProcess:: int const process_id) { // So far, this function only sets the initial stress using the input data. - if (process_id != _process_data.mechanics_related_process_id) + if (process_id != process_data_.mechanics_related_process_id) { return; } @@ -431,7 +431,7 @@ void HydroMechanicsProcess:: DBUG("Set initial conditions of HydroMechanicsProcess."); GlobalExecutor::executeSelectedMemberOnDereferenced( - &LocalAssemblerIF::setInitialConditions, _local_assemblers, + &LocalAssemblerIF::setInitialConditions, local_assemblers_, getActiveElementIDs(), getDOFTables(x.size()), x, t, process_id); } @@ -440,7 +440,7 @@ void HydroMechanicsProcess::computeSecondaryVariableConcrete( double const t, double const dt, std::vector const& x, GlobalVector const& x_prev, const int process_id) { - if (process_id != _process_data.hydraulic_process_id) + if (process_id != process_data_.hydraulic_process_id) { return; } @@ -448,7 +448,7 @@ void HydroMechanicsProcess::computeSecondaryVariableConcrete( DBUG("Compute the secondary variables for HydroMechanicsProcess."); GlobalExecutor::executeSelectedMemberOnDereferenced( - &LocalAssemblerIF::computeSecondaryVariable, _local_assemblers, + &LocalAssemblerIF::computeSecondaryVariable, local_assemblers_, getActiveElementIDs(), getDOFTables(x.size()), t, dt, x, x_prev, process_id); } @@ -458,7 +458,7 @@ std::tuple HydroMechanicsProcess::getDOFTableForExtrapolatorData() const { const bool manage_storage = false; - return std::make_tuple(_local_to_global_index_map_single_component.get(), + return std::make_tuple(local_to_global_index_map_single_component_.get(), manage_storage); } @@ -472,7 +472,7 @@ HydroMechanicsProcess::getDOFTable(const int process_id) const } // For the equation of pressure - return *_local_to_global_index_map_with_base_nodes; + return *local_to_global_index_map_with_base_nodes_; } template class HydroMechanicsProcess<2>; diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h index 775fcd7f49e..8ea50e2fdf9 100644 --- a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h +++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h @@ -110,27 +110,27 @@ class HydroMechanicsProcess final : public Process bool isMonolithicSchemeUsed() const override { - return _process_data.isMonolithicSchemeUsed(); + return process_data_.isMonolithicSchemeUsed(); } private: - std::vector _base_nodes; - std::unique_ptr _mesh_subset_base_nodes; - HydroMechanicsProcessData _process_data; + std::vector base_nodes_; + std::unique_ptr mesh_subset_base_nodes_; + HydroMechanicsProcessData process_data_; - std::vector> _local_assemblers; + std::vector> local_assemblers_; std::unique_ptr - _local_to_global_index_map_single_component; + local_to_global_index_map_single_component_; /// Local to global index mapping for base nodes, which is used for linear /// interpolation for pressure in the staggered scheme. std::unique_ptr - _local_to_global_index_map_with_base_nodes; + local_to_global_index_map_with_base_nodes_; /// Sparsity pattern for the flow equation, and it is initialized only if /// the staggered scheme is used. - GlobalSparsityPattern _sparsity_pattern_with_linear_element; + GlobalSparsityPattern sparsity_pattern_with_linear_element_; void computeSecondaryVariableConcrete(double const t, double const dt, std::vector const& x, @@ -147,11 +147,11 @@ class HydroMechanicsProcess final : public Process /// process has process_id == 1 in the staggered scheme. bool hasMechanicalProcess(int const process_id) const { - return process_id == _process_data.mechanics_related_process_id; + return process_id == process_data_.mechanics_related_process_id; } - MeshLib::PropertyVector* _nodal_forces = nullptr; - MeshLib::PropertyVector* _hydraulic_flow = nullptr; + MeshLib::PropertyVector* nodal_forces_ = nullptr; + MeshLib::PropertyVector* hydraulic_flow_ = nullptr; }; extern template class HydroMechanicsProcess<2>; From 724925a45808a57a1e40a6c7c27eff9be21cd8da Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Wed, 17 Jul 2024 14:00:55 +0200 Subject: [PATCH 02/15] [PL/HM] AssemblyMixins for openMP parallelization --- ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp | 10 +++++----- ProcessLib/HydroMechanics/HydroMechanicsProcess.h | 7 ++++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp b/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp index 8280fd638ce..fb1b871093f 100644 --- a/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp +++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp @@ -41,6 +41,8 @@ HydroMechanicsProcess::HydroMechanicsProcess( : Process(std::move(name), mesh, std::move(jacobian_assembler), parameters, integration_order, std::move(process_variables), std::move(secondary_variables), use_monolithic_scheme), + AssemblyMixin>{ + *_jacobian_assembler}, process_data_(std::move(process_data)) { nodal_forces_ = MeshLib::getOrCreateMeshProperty( @@ -335,12 +337,10 @@ void HydroMechanicsProcess:: } } - auto const dof_tables = getDOFTables(x.size()); - GlobalExecutor::executeSelectedMemberDereferenced( - _global_assembler, &VectorMatrixAssembler::assembleWithJacobian, - local_assemblers_, getActiveElementIDs(), dof_tables, t, dt, x, x_prev, - process_id, &b, &Jac); + AssemblyMixin>::assembleWithJacobian( + t, dt, x, x_prev, process_id, b, Jac); + auto const dof_tables = getDOFTables(x.size()); auto copyRhs = [&](int const variable_id, auto& output_vector) { if (use_monolithic_scheme) diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h index 8ea50e2fdf9..b47cbca7dbb 100644 --- a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h +++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h @@ -12,6 +12,7 @@ #include "HydroMechanicsProcessData.h" #include "LocalAssemblerInterface.h" +#include "ProcessLib/AssemblyMixin.h" #include "ProcessLib/Process.h" namespace ProcessLib @@ -23,8 +24,12 @@ namespace HydroMechanics /// The mixture momentum balance and the mixture mass balance are solved under /// fully saturated conditions. template -class HydroMechanicsProcess final : public Process +class HydroMechanicsProcess final + : public Process, + private AssemblyMixin> { + friend class AssemblyMixin>; + public: HydroMechanicsProcess( std::string name, From 4afbb7fb03374651ec9607eb3c11a16c2dd18336 Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Thu, 18 Jul 2024 16:37:52 +0200 Subject: [PATCH 03/15] [PL] Fix domain deactivation #3372 for asmMixins Same fix as in https://gitlab.opengeosys.org/ogs/ogs/-/merge_requests/4865 --- ProcessLib/AssemblyMixin.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/ProcessLib/AssemblyMixin.h b/ProcessLib/AssemblyMixin.h index 72e4f1a397f..920d9272011 100644 --- a/ProcessLib/AssemblyMixin.h +++ b/ProcessLib/AssemblyMixin.h @@ -190,14 +190,9 @@ class AssemblyMixin : private AssemblyMixinBase } else { - // convention: process variable 0 governs where assembly takes - // place (active element IDs) - ProcessLib::ProcessVariable const& pv = - derived().getProcessVariables(process_id)[0]; - - pvma_.assembleWithJacobian(loc_asms, pv.getActiveElementIDs(), - dof_tables, t, dt, x, x_prev, process_id, - b, Jac); + pvma_.assembleWithJacobian( + loc_asms, derived().getActiveElementIDs(), dof_tables, t, dt, x, + x_prev, process_id, b, Jac); } AssemblyMixinBase::copyResiduumVectorsToBulkMesh( From 92f9d06eab08f1de4dd3cec8bf0db0942e244c80 Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Sun, 21 Jul 2024 12:33:01 +0200 Subject: [PATCH 04/15] [PL] Separate matrix output for M,K,b and J,b Same as 3e9a70cd397af307a97011096814bcc3e4deae2a but for global matrices. A little code duplication in favor not to deal with pointers. --- ProcessLib/Assembly/MatrixOutput.cpp | 47 ++++++++++++++----- ProcessLib/Assembly/MatrixOutput.h | 8 ++-- .../ParallelVectorMatrixAssembler.cpp | 2 +- .../LargeDeformationProcess.cpp | 4 +- .../SmallDeformationProcess.cpp | 4 +- .../ThermalTwoPhaseFlowWithPPProcess.cpp | 4 +- 6 files changed, 47 insertions(+), 22 deletions(-) diff --git a/ProcessLib/Assembly/MatrixOutput.cpp b/ProcessLib/Assembly/MatrixOutput.cpp index 56ed505c69f..5aabd69dc9f 100644 --- a/ProcessLib/Assembly/MatrixOutput.cpp +++ b/ProcessLib/Assembly/MatrixOutput.cpp @@ -175,10 +175,9 @@ GlobalMatrixOutput::GlobalMatrixOutput() } void GlobalMatrixOutput::operator()(double const t, int const process_id, - GlobalMatrix const* M, - GlobalMatrix const* K, - GlobalVector const& b, - GlobalMatrix const* const Jac) + GlobalMatrix const& M, + GlobalMatrix const& K, + GlobalVector const& b) { if (!do_output_) { @@ -188,22 +187,20 @@ void GlobalMatrixOutput::operator()(double const t, int const process_id, #ifndef USE_PETSC ++counter_; - if (M) { auto fh = openGlobalMatrixOutputFile(filenamePrefix_, counter_, t, process_id, "M", "mat"); fh << "M "; - outputGlobalMatrix(*M, fh); + outputGlobalMatrix(M, fh); } - if (K) { auto fh = openGlobalMatrixOutputFile(filenamePrefix_, counter_, t, process_id, "K", "mat"); fh << "K "; - outputGlobalMatrix(*K, fh); + outputGlobalMatrix(K, fh); } { @@ -213,21 +210,47 @@ void GlobalMatrixOutput::operator()(double const t, int const process_id, fh << "b "; outputGlobalVector(b, fh); } +#else + // do nothing, warning message already printed in the constructor + (void)t; + (void)process_id; + (void)M; + (void)K; + (void)b; +#endif +} + +void GlobalMatrixOutput::operator()(double const t, int const process_id, + GlobalVector const& b, + GlobalMatrix const& Jac) +{ + if (!do_output_) + { + return; + } + +#ifndef USE_PETSC + ++counter_; + + { + auto fh = openGlobalMatrixOutputFile(filenamePrefix_, counter_, t, + process_id, "b", "vec"); + + fh << "b "; + outputGlobalVector(b, fh); + } - if (Jac) { auto fh = openGlobalMatrixOutputFile(filenamePrefix_, counter_, t, process_id, "Jac", "mat"); fh << "Jac "; - outputGlobalMatrix(*Jac, fh); + outputGlobalMatrix(Jac, fh); } #else // do nothing, warning message already printed in the constructor (void)t; (void)process_id; - (void)M; - (void)K; (void)b; (void)Jac; #endif diff --git a/ProcessLib/Assembly/MatrixOutput.h b/ProcessLib/Assembly/MatrixOutput.h index 2b3d285f7b1..855dbf33475 100644 --- a/ProcessLib/Assembly/MatrixOutput.h +++ b/ProcessLib/Assembly/MatrixOutput.h @@ -24,9 +24,11 @@ struct GlobalMatrixOutput { GlobalMatrixOutput(); - void operator()(double const t, int const process_id, GlobalMatrix const* M, - GlobalMatrix const* K, GlobalVector const& b, - GlobalMatrix const* const Jac = nullptr); + void operator()(double const t, int const process_id, GlobalMatrix const& M, + GlobalMatrix const& K, GlobalVector const& b); + + void operator()(double const t, int const process_id, GlobalVector const& b, + GlobalMatrix const& Jac); private: std::string filenamePrefix_; diff --git a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp index e799f5153ce..5b070cb55db 100644 --- a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp +++ b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp @@ -249,7 +249,7 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( stats->print(); - global_matrix_output_(t, process_id, nullptr, nullptr, b, &Jac); + global_matrix_output_(t, process_id, b, Jac); exception.rethrow(); } } // namespace ProcessLib::Assembly diff --git a/ProcessLib/LargeDeformation/LargeDeformationProcess.cpp b/ProcessLib/LargeDeformation/LargeDeformationProcess.cpp index bf5a13101ff..f6c58710518 100644 --- a/ProcessLib/LargeDeformation/LargeDeformationProcess.cpp +++ b/ProcessLib/LargeDeformation/LargeDeformationProcess.cpp @@ -145,7 +145,7 @@ void LargeDeformationProcess::assembleConcreteProcess( getActiveElementIDs(), dof_table, t, dt, x, x_prev, process_id, &M, &K, &b); - _global_output(t, process_id, &M, &K, b); + _global_output(t, process_id, M, K, b); } template @@ -168,7 +168,7 @@ void LargeDeformationProcess:: transformVariableFromGlobalVector(b, 0, *_local_to_global_index_map, *_nodal_forces, std::negate()); - _global_output(t, process_id, nullptr, nullptr, b, &Jac); + _global_output(t, process_id, b, Jac); } template diff --git a/ProcessLib/SmallDeformation/SmallDeformationProcess.cpp b/ProcessLib/SmallDeformation/SmallDeformationProcess.cpp index b588aef7a42..8e9ea109e51 100644 --- a/ProcessLib/SmallDeformation/SmallDeformationProcess.cpp +++ b/ProcessLib/SmallDeformation/SmallDeformationProcess.cpp @@ -147,7 +147,7 @@ void SmallDeformationProcess::assembleConcreteProcess( getActiveElementIDs(), dof_table, t, dt, x, x_prev, process_id, &M, &K, &b); - _global_output(t, process_id, &M, &K, b); + _global_output(t, process_id, M, K, b); } template @@ -170,7 +170,7 @@ void SmallDeformationProcess:: transformVariableFromGlobalVector(b, 0, *_local_to_global_index_map, *_nodal_forces, std::negate()); - _global_output(t, process_id, nullptr, nullptr, b, &Jac); + _global_output(t, process_id, b, Jac); } template diff --git a/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.cpp b/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.cpp index 20165029fdd..a7df17686ff 100644 --- a/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.cpp +++ b/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.cpp @@ -96,7 +96,7 @@ void ThermalTwoPhaseFlowWithPPProcess::assembleConcreteProcess( getActiveElementIDs(), dof_table, t, dt, x, x_prev, process_id, &M, &K, &b); - _global_output(t, process_id, &M, &K, b); + _global_output(t, process_id, M, K, b); } void ThermalTwoPhaseFlowWithPPProcess::assembleWithJacobianConcreteProcess( @@ -115,7 +115,7 @@ void ThermalTwoPhaseFlowWithPPProcess::assembleWithJacobianConcreteProcess( _local_assemblers, getActiveElementIDs(), dof_table, t, dt, x, x_prev, process_id, &b, &Jac); - _global_output(t, process_id, nullptr, nullptr, b, &Jac); + _global_output(t, process_id, b, Jac); } void ThermalTwoPhaseFlowWithPPProcess::preTimestepConcreteProcess( From 052236c7952d426232dcf0a805a4fe5635de5eea Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Sun, 21 Jul 2024 13:25:23 +0200 Subject: [PATCH 05/15] [T/HM] Enable omp tests for HydroMechanics --- scripts/cmake/test/AddTest.cmake | 4 ++-- scripts/cmake/test/OgsTest.cmake | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/cmake/test/AddTest.cmake b/scripts/cmake/test/AddTest.cmake index 9d35517e9e2..104027437f3 100644 --- a/scripts/cmake/test/AddTest.cmake +++ b/scripts/cmake/test/AddTest.cmake @@ -292,7 +292,7 @@ function(AddTest) # OpenMP tests for specific processes only. TODO (CL) Once all processes can # be assembled OpenMP parallel, the condition should be removed. - if("${labels}" MATCHES "TH2M|ThermoRichards") + if("${labels}" MATCHES "TH2M|ThermoRichards|^HydroMechanics") _add_test(${TEST_NAME}-omp) _set_omp_test_properties() endif() @@ -323,7 +323,7 @@ function(AddTest) # Run the tester _add_test_tester(${TEST_NAME}) - if("${labels}" MATCHES "TH2M|ThermoRichards") + if("${labels}" MATCHES "TH2M|ThermoRichards|^HydroMechanics") _add_test_tester(${TEST_NAME}-omp) endif() diff --git a/scripts/cmake/test/OgsTest.cmake b/scripts/cmake/test/OgsTest.cmake index 900bce22874..6a408e0ba88 100644 --- a/scripts/cmake/test/OgsTest.cmake +++ b/scripts/cmake/test/OgsTest.cmake @@ -84,7 +84,7 @@ function(OgsTest) # OpenMP tests for specific processes only. TODO (CL) Once all processes can # be assembled OpenMP parallel, the condition should be removed. - if("${labels}" MATCHES "TH2M|ThermoRichards") + if("${labels}" MATCHES "TH2M|ThermoRichards|^HydroMechanics") _ogs_add_test(${TEST_NAME}-omp) _set_omp_test_properties() endif() From 5cf9379c59818d94b39c4658100e4aec944efb6c Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Sun, 21 Jul 2024 18:10:43 +0200 Subject: [PATCH 06/15] [PL] pvma; remove single process restriction This is not sufficient for the staggered scheme yet. --- .../ParallelVectorMatrixAssembler.cpp | 24 ++++--------------- ProcessLib/AssemblyMixin.h | 5 ++-- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp index 5b070cb55db..293db233fdc 100644 --- a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp +++ b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp @@ -120,28 +120,14 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( GlobalVector& b, GlobalMatrix& Jac) { // checks ////////////////////////////////////////////////////////////////// - if (process_id != 0) + if (dof_tables.size() != xs.size()) { - OGS_FATAL("Process id is not 0 but {}", process_id); + OGS_FATAL("Different number of DOF tables and solution vectors."); } - if (dof_tables.size() != 1) - { - OGS_FATAL("More than 1 dof table"); - } - auto const& dof_table = *(dof_tables.front()); - - if (xs.size() != 1) - { - OGS_FATAL("More than 1 solution vector"); - } - auto const& x = *xs.front(); - - if (x_prevs.size() != 1) - { - OGS_FATAL("More than 1 x_prev vector"); - } - auto const& x_prev = *x_prevs.front(); + auto const& dof_table = *dof_tables[process_id]; + auto const& x = *xs[process_id]; + auto const& x_prev = *x_prevs[process_id]; // algorithm /////////////////////////////////////////////////////////////// diff --git a/ProcessLib/AssemblyMixin.h b/ProcessLib/AssemblyMixin.h index 920d9272011..c68fb644e36 100644 --- a/ProcessLib/AssemblyMixin.h +++ b/ProcessLib/AssemblyMixin.h @@ -161,9 +161,8 @@ class AssemblyMixin : private AssemblyMixinBase DBUG("AssemblyMixin assembleWithJacobian(t={}, dt={}, process_id={}).", t, dt, process_id); - // TODO why not getDOFTables(x.size()); ? - std::vector const dof_tables{ - derived()._local_to_global_index_map.get()}; + std::vector const dof_tables = + derived().getDOFTables(x.size()); auto const& loc_asms = derived().local_assemblers_; From cb5cf79858e258dbc07ec5cc4d13dee7c4ba2042 Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Thu, 8 Aug 2024 15:53:11 +0200 Subject: [PATCH 07/15] Add boolean conversion to ThreadException This slightly simplifies code avoiding extra boolean variable. --- BaseLib/ThreadException.h | 7 +++++++ ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp | 7 ++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/BaseLib/ThreadException.h b/BaseLib/ThreadException.h index a259d9a051a..0bc221f8ed3 100644 --- a/BaseLib/ThreadException.h +++ b/BaseLib/ThreadException.h @@ -14,6 +14,11 @@ /// Initial code from /// https://stackoverflow.com/questions/11828539/elegant-exception-handling-in-openmp +/// +/// The implementation does not guarantee, that while the exception is being +/// checked for nullptr in the bool-conversion operator, the capture is not +/// changing the pointer. This might lead to an exception being lost if there +/// are two exceptions thrown simultaneously. class ThreadException { public: @@ -31,6 +36,8 @@ class ThreadException } } + explicit operator bool() const noexcept { return exception_ != nullptr; } + private: std::exception_ptr exception_ = nullptr; std::mutex lock_; diff --git a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp index 293db233fdc..0aa9e28d5c2 100644 --- a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp +++ b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp @@ -136,7 +136,6 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( ConcurrentMatrixView Jac_view(Jac); ThreadException exception; - bool assembly_error = false; #pragma omp parallel num_threads(num_threads_) { #ifdef _OPENMP @@ -172,7 +171,7 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( for (std::ptrdiff_t element_id = 0; element_id < n_loc_asm; ++element_id) { - if (assembly_error) + if (exception) { continue; } @@ -187,7 +186,6 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( catch (...) { exception.capture(); - assembly_error = true; continue; } @@ -206,7 +204,7 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( #pragma omp for nowait for (std::ptrdiff_t i = 0; i < n_act_elem; ++i) { - if (assembly_error) + if (exception) { continue; } @@ -223,7 +221,6 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( catch (...) { exception.capture(); - assembly_error = true; continue; } From 1f17d3b31a5947a415f00e09fd05ccc291c6f651 Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Thu, 8 Aug 2024 15:55:09 +0200 Subject: [PATCH 08/15] [PL] Extract common assembly loop in parallel asm This costs some memory (now element ids and local assembler references are stored) but simplifies the code. If local assemblers would return an element id, the code could be simplified further. --- .../ParallelVectorMatrixAssembler.cpp | 155 ++++++++++-------- 1 file changed, 86 insertions(+), 69 deletions(-) diff --git a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp index 0aa9e28d5c2..3944d02f627 100644 --- a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp +++ b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp @@ -12,6 +12,11 @@ #include #include +#include +#include +#include +#include +#include #include "BaseLib/StringTools.h" #include "BaseLib/ThreadException.h" @@ -54,6 +59,77 @@ void assembleWithJacobianOneElement( cache.add(local_b_data, local_Jac_data, indices); } +/// Returns a vector of active element ids with corresponding local assemblers. +std::vector< + std::tuple>> +collectActiveLocalAssemblers( + BaseLib::PolymorphicRandomAccessContainerView< + ProcessLib::LocalAssemblerInterface> const& local_assemblers, + std::vector const& active_elements) +{ + auto id_and_local_asm = [&local_assemblers](std::size_t const id) + -> std::tuple> + { return {id, local_assemblers[id]}; }; + + auto create_ids_asm_pairs = [&](auto const& element_ids) + { + return element_ids | ranges::views::transform(id_and_local_asm) | + ranges::to(); + }; + + if (active_elements.empty()) + { + return create_ids_asm_pairs(ranges::views::iota( + static_cast(0), local_assemblers.size())); + } + return create_ids_asm_pairs(active_elements); +} + +void runAssemblyForEachLocalAssembler( + std::vector>> const& + ids_local_assemblers, + NumLib::LocalToGlobalIndexMap const& dof_table, double const t, + double const dt, GlobalVector const& x, GlobalVector const& x_prev, + std::vector& local_b_data, std::vector& local_Jac_data, + std::vector& indices, + ProcessLib::AbstractJacobianAssembler& jac_asm, ThreadException& exception, + ProcessLib::Assembly::MultiMatrixElementCache& cache, + auto local_matrix_output) +{ + // due to MSVC++ error: + // error C3016: 'element_id': index variable in OpenMP 'for' + // statement must have signed integral type + std::ptrdiff_t n_elements = + static_cast(ids_local_assemblers.size()); +#pragma omp for nowait + for (std::ptrdiff_t i = 0; i < n_elements; ++i) + { + if (exception) + { + continue; + } + auto [element_id, loc_asm] = ids_local_assemblers[i]; + + try + { + assembleWithJacobianOneElement( + element_id, loc_asm, dof_table, t, dt, x, x_prev, local_b_data, + local_Jac_data, indices, jac_asm, cache); + } + catch (...) + { + exception.capture(); + continue; + } + + local_matrix_output(element_id); + } +} + int getNumberOfThreads() { char const* const num_threads_env = std::getenv("OGS_ASM_THREADS"); @@ -158,77 +234,18 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( MultiMatrixElementCache cache{b_view, Jac_view, stats_this_thread->data}; - // TODO corner case: what if all elements on a submesh are deactivated? - if (active_elements.empty()) + auto local_matrix_output = [&](std::ptrdiff_t element_id) { - // due to MSVC++ error: - // error C3016: 'element_id': index variable in OpenMP 'for' - // statement must have signed integral type - std::ptrdiff_t const n_loc_asm = - static_cast(local_assemblers.size()); + local_matrix_output_(t, process_id, element_id, local_b_data, + local_Jac_data); + }; -#pragma omp for nowait - for (std::ptrdiff_t element_id = 0; element_id < n_loc_asm; - ++element_id) - { - if (exception) - { - continue; - } - auto& loc_asm = local_assemblers[element_id]; - - try - { - assembleWithJacobianOneElement( - element_id, loc_asm, dof_table, t, dt, x, x_prev, - local_b_data, local_Jac_data, indices, *jac_asm, cache); - } - catch (...) - { - exception.capture(); - continue; - } - - local_matrix_output_(t, process_id, element_id, local_b_data, - local_Jac_data); - } - } - else - { - // due to MSVC++ error: - // error C3016: 'i': index variable in OpenMP 'for' statement must - // have signed integral type - std::ptrdiff_t const n_act_elem = - static_cast(active_elements.size()); - -#pragma omp for nowait - for (std::ptrdiff_t i = 0; i < n_act_elem; ++i) - { - if (exception) - { - continue; - } - - auto const element_id = active_elements[i]; - auto& loc_asm = local_assemblers[element_id]; - - try - { - assembleWithJacobianOneElement( - element_id, loc_asm, dof_table, t, dt, x, x_prev, - local_b_data, local_Jac_data, indices, *jac_asm, cache); - } - catch (...) - { - exception.capture(); - continue; - } - - local_matrix_output_(t, process_id, element_id, local_b_data, - local_Jac_data); - } - } - } // OpenMP parallel section + // TODO corner case: what if all elements on a submesh are deactivated? + runAssemblyForEachLocalAssembler( + collectActiveLocalAssemblers(local_assemblers, active_elements), + dof_table, t, dt, x, x_prev, local_b_data, local_Jac_data, indices, + *jac_asm, exception, cache, local_matrix_output); + } stats->print(); From 0a83ab329a14d6d36ca3d8b6ea25d4c528c44730 Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Thu, 8 Aug 2024 17:44:41 +0200 Subject: [PATCH 09/15] [PL] Storing indices locally is measurably faster A little bit :) 0.5% maybe. Measured on TRM/Mockup ctest. --- .../Assembly/ParallelVectorMatrixAssembler.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp index 3944d02f627..e46377bb59d 100644 --- a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp +++ b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp @@ -33,11 +33,11 @@ void assembleWithJacobianOneElement( const NumLib::LocalToGlobalIndexMap& dof_table, const double t, const double dt, const GlobalVector& x, const GlobalVector& x_prev, std::vector& local_b_data, std::vector& local_Jac_data, - std::vector& indices, ProcessLib::AbstractJacobianAssembler& jacobian_assembler, ProcessLib::Assembly::MultiMatrixElementCache& cache) { - indices = NumLib::getIndices(mesh_item_id, dof_table); + std::vector const& indices = + NumLib::getIndices(mesh_item_id, dof_table); local_b_data.clear(); local_Jac_data.clear(); @@ -95,7 +95,6 @@ void runAssemblyForEachLocalAssembler( NumLib::LocalToGlobalIndexMap const& dof_table, double const t, double const dt, GlobalVector const& x, GlobalVector const& x_prev, std::vector& local_b_data, std::vector& local_Jac_data, - std::vector& indices, ProcessLib::AbstractJacobianAssembler& jac_asm, ThreadException& exception, ProcessLib::Assembly::MultiMatrixElementCache& cache, auto local_matrix_output) @@ -116,9 +115,9 @@ void runAssemblyForEachLocalAssembler( try { - assembleWithJacobianOneElement( - element_id, loc_asm, dof_table, t, dt, x, x_prev, local_b_data, - local_Jac_data, indices, jac_asm, cache); + assembleWithJacobianOneElement(element_id, loc_asm, dof_table, t, + dt, x, x_prev, local_b_data, + local_Jac_data, jac_asm, cache); } catch (...) { @@ -225,7 +224,6 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( // reallocations. std::vector local_b_data; std::vector local_Jac_data; - std::vector indices; // copy to avoid concurrent access auto const jac_asm = jacobian_assembler_.copy(); @@ -243,8 +241,8 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( // TODO corner case: what if all elements on a submesh are deactivated? runAssemblyForEachLocalAssembler( collectActiveLocalAssemblers(local_assemblers, active_elements), - dof_table, t, dt, x, x_prev, local_b_data, local_Jac_data, indices, - *jac_asm, exception, cache, local_matrix_output); + dof_table, t, dt, x, x_prev, local_b_data, local_Jac_data, *jac_asm, + exception, cache, local_matrix_output); } stats->print(); From 1ef741517ff9e39bd1218a142390e845d1dc005f Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Fri, 9 Aug 2024 13:52:07 +0200 Subject: [PATCH 10/15] [PL] Add staggered scheme assembly to parallel asm This is mostly copy from the VectorMatrixAssembler. --- .../ParallelVectorMatrixAssembler.cpp | 121 ++++++++++++++++-- 1 file changed, 113 insertions(+), 8 deletions(-) diff --git a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp index e46377bb59d..14b9d24a6b9 100644 --- a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp +++ b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp @@ -24,6 +24,7 @@ #include "ProcessLib/Assembly/MatrixAssemblyStats.h" #include "ProcessLib/Assembly/MatrixElementCache.h" #include "ProcessLib/Assembly/MatrixOutput.h" +#include "ProcessLib/CoupledSolutionsForStaggeredScheme.h" namespace { @@ -59,6 +60,52 @@ void assembleWithJacobianOneElement( cache.add(local_b_data, local_Jac_data, indices); } +void assembleWithJacobianForStaggeredSchemeOneElement( + const std::size_t mesh_item_id, + ProcessLib::LocalAssemblerInterface& local_assembler, + std::vector const& dof_tables, + const double t, const double dt, std::vector const& x, + std::vector const& x_prev, int const process_id, + std::vector& local_b_data, std::vector& local_Jac_data, + ProcessLib::AbstractJacobianAssembler& jacobian_assembler, + ProcessLib::Assembly::MultiMatrixElementCache& cache) +{ + std::vector> indices_of_processes; + indices_of_processes.reserve(dof_tables.size()); + transform(cbegin(dof_tables), cend(dof_tables), + back_inserter(indices_of_processes), + [&](auto const* dof_table) + { return NumLib::getIndices(mesh_item_id, *dof_table); }); + + auto local_coupled_xs = + ProcessLib::getCoupledLocalSolutions(x, indices_of_processes); + auto const local_x = MathLib::toVector(local_coupled_xs); + + auto local_coupled_x_prevs = + ProcessLib::getCoupledLocalSolutions(x_prev, indices_of_processes); + auto const local_x_prev = MathLib::toVector(local_coupled_x_prevs); + + std::vector const& indices = + indices_of_processes[process_id]; + + local_b_data.clear(); + local_Jac_data.clear(); + + jacobian_assembler.assembleWithJacobianForStaggeredScheme( + local_assembler, t, dt, local_x, local_x_prev, process_id, local_b_data, + local_Jac_data); + + if (local_Jac_data.empty()) + { + OGS_FATAL( + "No Jacobian has been assembled! This might be due to " + "programming errors in the local assembler of the " + "current process."); + } + + cache.add(local_b_data, local_Jac_data, indices); +} + /// Returns a vector of active element ids with corresponding local assemblers. std::vector< std::tuple>> const& + ids_local_assemblers, + std::vector const& dof_tables, + double const t, double const dt, std::vector const& x, + std::vector const& x_prev, int const process_id, + std::vector& local_b_data, std::vector& local_Jac_data, + ProcessLib::AbstractJacobianAssembler& jac_asm, ThreadException& exception, + ProcessLib::Assembly::MultiMatrixElementCache& cache, + auto local_matrix_output) +{ + // due to MSVC++ error: + // error C3016: 'element_id': index variable in OpenMP 'for' + // statement must have signed integral type + std::ptrdiff_t n_elements = + static_cast(ids_local_assemblers.size()); +#pragma omp for nowait + for (std::ptrdiff_t i = 0; i < n_elements; ++i) + { + if (exception) + { + continue; + } + auto [element_id, loc_asm] = ids_local_assemblers[i]; + + try + { + assembleWithJacobianForStaggeredSchemeOneElement( + element_id, loc_asm, dof_tables, t, dt, x, x_prev, process_id, + local_b_data, local_Jac_data, jac_asm, cache); + } + catch (...) + { + exception.capture(); + continue; + } + + local_matrix_output(element_id); + } +} + int getNumberOfThreads() { char const* const num_threads_env = std::getenv("OGS_ASM_THREADS"); @@ -200,10 +290,7 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( OGS_FATAL("Different number of DOF tables and solution vectors."); } - auto const& dof_table = *dof_tables[process_id]; - auto const& x = *xs[process_id]; - auto const& x_prev = *x_prevs[process_id]; - + std::size_t const number_of_processes = xs.size(); // algorithm /////////////////////////////////////////////////////////////// auto stats = CumulativeStats::create(); @@ -239,10 +326,28 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( }; // TODO corner case: what if all elements on a submesh are deactivated? - runAssemblyForEachLocalAssembler( - collectActiveLocalAssemblers(local_assemblers, active_elements), - dof_table, t, dt, x, x_prev, local_b_data, local_Jac_data, *jac_asm, - exception, cache, local_matrix_output); + + // Monolithic scheme + if (number_of_processes == 1) + { + assert(process_id == 0); + auto const& dof_table = *dof_tables[0]; + auto const& x = *xs[0]; + auto const& x_prev = *x_prevs[0]; + + runAssemblyForEachLocalAssembler( + collectActiveLocalAssemblers(local_assemblers, active_elements), + dof_table, t, dt, x, x_prev, local_b_data, local_Jac_data, + *jac_asm, exception, cache, local_matrix_output); + } + else // Staggered scheme + { + runStaggeredAssemblyForEachLocalAssembler( + collectActiveLocalAssemblers(local_assemblers, active_elements), + dof_tables, t, dt, xs, x_prevs, process_id, local_b_data, + local_Jac_data, *jac_asm, exception, cache, + local_matrix_output); + } } stats->print(); From 5597890d95a1b1e9569b522843b66d8df7f09b64 Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Mon, 12 Aug 2024 15:02:47 +0200 Subject: [PATCH 11/15] [PL] Extend submesh initialization for staggered ... scheme. Passing a vector of vectors (outer for processes, inner over the process variables) of residuum names for each process variable. --- ProcessLib/AssemblyMixin.cpp | 114 ++++++++++++------ ProcessLib/AssemblyMixin.h | 27 +++-- ProcessLib/CreateTimeLoop.cpp | 3 +- ProcessLib/Process.h | 7 ++ ProcessLib/SubmeshAssemblySupport.h | 6 +- ProcessLib/TH2M/TH2MProcess.cpp | 10 +- ProcessLib/TH2M/TH2MProcess.h | 2 +- .../ThermoRichardsMechanicsProcess.cpp | 9 +- .../ThermoRichardsMechanicsProcess.h | 2 +- 9 files changed, 119 insertions(+), 61 deletions(-) diff --git a/ProcessLib/AssemblyMixin.cpp b/ProcessLib/AssemblyMixin.cpp index 2b955a23644..bca2c4a6859 100644 --- a/ProcessLib/AssemblyMixin.cpp +++ b/ProcessLib/AssemblyMixin.cpp @@ -10,6 +10,10 @@ #include "AssemblyMixin.h" +#include +#include +#include +#include #include #include "MeshLib/Utils/getOrCreateMeshProperty.h" @@ -18,30 +22,79 @@ namespace { -std::vector>> -createResiduumVectors( - MeshLib::Mesh& mesh, - std::vector const& residuum_names, - std::vector> - pvs) -{ - auto const num_residua = residuum_names.size(); - std::vector>> - residuum_vectors; - residuum_vectors.reserve(num_residua); +void checkResiduumNamesVsProcessVariables( + std::vector> const& per_process_residuum_names, + std::vector< + std::vector>> const& + per_process_pvs) +{ + if (per_process_pvs.size() != per_process_residuum_names.size()) + { + OGS_FATAL( + "The number of passed residuum names ({}) does not match the " + "number of processes ({}).", + per_process_residuum_names.size(), per_process_pvs.size()); + } - for (std::size_t i = 0; i < num_residua; ++i) + auto check_sizes = [](std::size_t const process_id, auto const& rns_pvs) { - auto const& residuum_name = residuum_names[i]; - auto const& pv = pvs[i].get(); + auto const& [rns, pvs] = rns_pvs; - residuum_vectors.emplace_back(*MeshLib::getOrCreateMeshProperty( - mesh, residuum_name, MeshLib::MeshItemType::Node, - pv.getNumberOfGlobalComponents())); + if (rns.size() != pvs.size()) + { + OGS_FATAL( + "The number of passed residuum names ({}) does not match the " + "number of process variables ({}) for process {}.", + rns.size(), pvs.size(), process_id); + } + }; + for (auto const& [process_id, rns_pvs] : + ranges::views::zip(per_process_residuum_names, per_process_pvs) | + ranges::views::enumerate) + { + check_sizes(process_id, rns_pvs); } +} - return residuum_vectors; +std::vector< + std::vector>>> +createResiduumVectors( + MeshLib::Mesh& mesh, + std::vector> const& per_process_residuum_names, + std::vector< + std::vector>> const& + per_process_pvs) +{ + checkResiduumNamesVsProcessVariables(per_process_residuum_names, + per_process_pvs); + + auto create_mesh_property_for_residuum = + [&mesh](std::pair const& + residuum_name_process_variable) + -> std::reference_wrapper> + { + auto const& [rn, pv] = residuum_name_process_variable; + return *MeshLib::getOrCreateMeshProperty( + mesh, rn, MeshLib::MeshItemType::Node, + pv.getNumberOfGlobalComponents()); + }; + + auto create_mesh_properties = + [&create_mesh_property_for_residuum](auto const& rns_pvs) + { + auto const& [rns, pvs] = rns_pvs; + return ranges::views::zip(rns, pvs) | + ranges::views::transform(create_mesh_property_for_residuum) | + ranges::to>>>; + }; + + return ranges::views::zip(per_process_residuum_names, per_process_pvs) | + ranges::views::transform(create_mesh_properties) | + ranges::to>>>>; } } // namespace @@ -49,7 +102,8 @@ namespace ProcessLib { SubmeshAssemblyData::SubmeshAssemblyData( MeshLib::Mesh const& mesh, - std::vector>>&& + std::vector< + std::vector>>>&& residuum_vectors) : bulk_element_ids{*MeshLib::bulkElementIDs(mesh)}, bulk_node_ids{*MeshLib::bulkNodeIDs(mesh)}, @@ -58,24 +112,14 @@ SubmeshAssemblyData::SubmeshAssemblyData( } void AssemblyMixinBase::initializeAssemblyOnSubmeshes( - const int process_id, MeshLib::Mesh& bulk_mesh, std::vector> const& submeshes, - std::vector const& residuum_names, - std::vector> const& pvs) + std::vector> const& residuum_names, + std::vector>> const& + pvs) { DBUG("AssemblyMixinBase initializeSubmeshOutput()."); - auto const num_residua = residuum_names.size(); - - if (pvs.size() != num_residua) - { - OGS_FATAL( - "The number of passed residuum names does not match the number " - "of process variables for process id {}: {} != {}", - process_id, num_residua, pvs.size()); - } - submesh_assembly_data_.reserve(submeshes.size()); for (auto& mesh_ref : submeshes) { @@ -184,16 +228,18 @@ void AssemblyMixinBase::copyResiduumVectorsToBulkMesh( } void AssemblyMixinBase::copyResiduumVectorsToSubmesh( + int const process_id, GlobalVector const& rhs, NumLib::LocalToGlobalIndexMap const& local_to_global_index_map, SubmeshAssemblyData const& sad) { - for (std::size_t variable_id = 0; variable_id < sad.residuum_vectors.size(); + auto const& residuum_vectors = sad.residuum_vectors[process_id]; + for (std::size_t variable_id = 0; variable_id < residuum_vectors.size(); ++variable_id) { transformVariableFromGlobalVector( rhs, variable_id, local_to_global_index_map, - sad.residuum_vectors[variable_id].get(), sad.bulk_node_ids, + residuum_vectors[variable_id].get(), sad.bulk_node_ids, std::negate()); } } diff --git a/ProcessLib/AssemblyMixin.h b/ProcessLib/AssemblyMixin.h index c68fb644e36..6b4bc09511f 100644 --- a/ProcessLib/AssemblyMixin.h +++ b/ProcessLib/AssemblyMixin.h @@ -24,13 +24,15 @@ struct SubmeshAssemblyData { explicit SubmeshAssemblyData( MeshLib::Mesh const& mesh, - std::vector>>&& + std::vector>>>&& residuum_vectors); MeshLib::PropertyVector const& bulk_element_ids; MeshLib::PropertyVector const& bulk_node_ids; std::vector active_element_ids; - std::vector>> + std::vector< + std::vector>>> residuum_vectors; }; @@ -50,11 +52,11 @@ class AssemblyMixinBase : pvma_{jacobian_assembler} {}; void initializeAssemblyOnSubmeshes( - const int process_id, MeshLib::Mesh& bulk_mesh, std::vector> const& submeshes, - std::vector const& residuum_names, - std::vector> const& pvs); + std::vector> const& residuum_names, + std::vector>> const& + pvs); void updateActiveElements(ProcessLib::Process const& process); @@ -65,6 +67,7 @@ class AssemblyMixinBase residuum_vectors); static void copyResiduumVectorsToSubmesh( + int const process_id, GlobalVector const& rhs, NumLib::LocalToGlobalIndexMap const& local_to_global_index_map, SubmeshAssemblyData const& sad); @@ -74,7 +77,8 @@ class AssemblyMixinBase protected: std::vector submesh_assembly_data_; - std::vector>> + std::vector< + std::vector>>> residuum_vectors_bulk_; /// ID of the b vector on submeshes, cf. NumLib::VectorProvider. @@ -122,13 +126,12 @@ class AssemblyMixin : private AssemblyMixinBase * and staggered coupling. */ void initializeAssemblyOnSubmeshes( - const int process_id, std::vector> const& submeshes, - std::vector const& residuum_names) + std::vector> const& residuum_names) { AssemblyMixinBase::initializeAssemblyOnSubmeshes( - process_id, derived().getMesh(), submeshes, residuum_names, - derived().getProcessVariables(process_id)); + derived().getMesh(), submeshes, residuum_names, + derived().getProcessVariables()); } void updateActiveElements() @@ -182,7 +185,7 @@ class AssemblyMixin : private AssemblyMixinBase MathLib::LinAlg::axpy(b, 1.0, b_submesh); AssemblyMixinBase::copyResiduumVectorsToSubmesh( - b_submesh, *(dof_tables.front()), sad); + process_id, b_submesh, *(dof_tables[process_id]), sad); } NumLib::GlobalVectorProvider::provider.releaseVector(b_submesh); @@ -195,7 +198,7 @@ class AssemblyMixin : private AssemblyMixinBase } AssemblyMixinBase::copyResiduumVectorsToBulkMesh( - b, *(dof_tables.front()), residuum_vectors_bulk_); + b, *(dof_tables[process_id]), residuum_vectors_bulk_[process_id]); } private: diff --git a/ProcessLib/CreateTimeLoop.cpp b/ProcessLib/CreateTimeLoop.cpp index 0b22415787a..49b5bc64e85 100644 --- a/ProcessLib/CreateTimeLoop.cpp +++ b/ProcessLib/CreateTimeLoop.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include "BaseLib/ConfigTree.h" @@ -61,7 +62,7 @@ std::unique_ptr createTimeLoop( auto const& residuum_vector_names = process->initializeAssemblyOnSubmeshes(smroc.meshes); - for (auto const& name : residuum_vector_names) + for (auto const& name : residuum_vector_names | ranges::views::join) { smroc.output.doNotProjectFromBulkMeshToSubmeshes( name, MeshLib::MeshItemType::Node); diff --git a/ProcessLib/Process.h b/ProcessLib/Process.h index b8c4fd9ed64..954a71821f0 100644 --- a/ProcessLib/Process.h +++ b/ProcessLib/Process.h @@ -151,6 +151,13 @@ class Process } MeshLib::Mesh& getMesh() const { return _mesh; } + + std::vector>> const& + getProcessVariables() const + { + return _process_variables; + } + std::vector> const& getProcessVariables(const int process_id) const { diff --git a/ProcessLib/SubmeshAssemblySupport.h b/ProcessLib/SubmeshAssemblySupport.h index 669b3029c34..e188674ddf6 100644 --- a/ProcessLib/SubmeshAssemblySupport.h +++ b/ProcessLib/SubmeshAssemblySupport.h @@ -32,8 +32,10 @@ class SubmeshAssemblySupport /// \attention \c meshes must be a must be a non-overlapping cover of the /// entire simulation domain (bulk mesh)! /// - /// \return The names of the residuum vectors that will be assembled. - virtual std::vector initializeAssemblyOnSubmeshes( + /// \return The names of the residuum vectors that will be assembled for + /// each process: outer vector of size 1 for monolithic schemes and greater + /// for staggered schemes. + virtual std::vector> initializeAssemblyOnSubmeshes( std::vector> const& meshes) { DBUG( diff --git a/ProcessLib/TH2M/TH2MProcess.cpp b/ProcessLib/TH2M/TH2MProcess.cpp index c7f66f32522..3f7e384cade 100644 --- a/ProcessLib/TH2M/TH2MProcess.cpp +++ b/ProcessLib/TH2M/TH2MProcess.cpp @@ -316,17 +316,17 @@ void TH2MProcess::computeSecondaryVariableConcrete( } template -std::vector +std::vector> TH2MProcess::initializeAssemblyOnSubmeshes( std::vector> const& meshes) { INFO("TH2M process initializeSubmeshOutput()."); - const int process_id = 0; - std::vector residuum_names{ - "GasMassFlowRate", "LiquidMassFlowRate", "HeatFlowRate", "NodalForces"}; + std::vector> residuum_names{ + {"GasMassFlowRate", "LiquidMassFlowRate", "HeatFlowRate", + "NodalForces"}}; AssemblyMixin>::initializeAssemblyOnSubmeshes( - process_id, meshes, residuum_names); + meshes, residuum_names); return residuum_names; } diff --git a/ProcessLib/TH2M/TH2MProcess.h b/ProcessLib/TH2M/TH2MProcess.h index 6b8942efd44..96ff5ddb1a2 100644 --- a/ProcessLib/TH2M/TH2MProcess.h +++ b/ProcessLib/TH2M/TH2MProcess.h @@ -105,7 +105,7 @@ class TH2MProcess final : public Process, NumLib::LocalToGlobalIndexMap const& getDOFTable( const int process_id) const override; - std::vector initializeAssemblyOnSubmeshes( + std::vector> initializeAssemblyOnSubmeshes( std::vector> const& meshes) override; diff --git a/ProcessLib/ThermoRichardsMechanics/ThermoRichardsMechanicsProcess.cpp b/ProcessLib/ThermoRichardsMechanics/ThermoRichardsMechanicsProcess.cpp index 3813235dd1b..c763ac46da4 100644 --- a/ProcessLib/ThermoRichardsMechanics/ThermoRichardsMechanicsProcess.cpp +++ b/ProcessLib/ThermoRichardsMechanics/ThermoRichardsMechanicsProcess.cpp @@ -239,19 +239,18 @@ void ThermoRichardsMechanicsProcess:: } template -std::vector +std::vector> ThermoRichardsMechanicsProcess:: initializeAssemblyOnSubmeshes( std::vector> const& meshes) { INFO("TRM process initializeSubmeshOutput()."); - const int process_id = 0; - std::vector residuum_names{"HeatFlowRate", "MassFlowRate", - "NodalForces"}; + std::vector> residuum_names{ + {"HeatFlowRate", "MassFlowRate", "NodalForces"}}; AssemblyMixin< ThermoRichardsMechanicsProcess>:: - initializeAssemblyOnSubmeshes(process_id, meshes, residuum_names); + initializeAssemblyOnSubmeshes(meshes, residuum_names); return residuum_names; } diff --git a/ProcessLib/ThermoRichardsMechanics/ThermoRichardsMechanicsProcess.h b/ProcessLib/ThermoRichardsMechanics/ThermoRichardsMechanicsProcess.h index a13c2b0d338..6ae284ce890 100644 --- a/ProcessLib/ThermoRichardsMechanics/ThermoRichardsMechanicsProcess.h +++ b/ProcessLib/ThermoRichardsMechanics/ThermoRichardsMechanicsProcess.h @@ -199,7 +199,7 @@ class ThermoRichardsMechanicsProcess final NumLib::LocalToGlobalIndexMap const& getDOFTable( const int process_id) const override; - std::vector initializeAssemblyOnSubmeshes( + std::vector> initializeAssemblyOnSubmeshes( std::vector> const& meshes) override; From 1ff3c76711ea32978c50f075ffa44612a6af6c89 Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Mon, 12 Aug 2024 17:16:21 +0200 Subject: [PATCH 12/15] [PL/HM] Use assembly mixins residuum output --- .../HydroMechanics/HydroMechanicsProcess.cpp | 53 ++++++++----------- .../HydroMechanics/HydroMechanicsProcess.h | 7 +-- 2 files changed, 26 insertions(+), 34 deletions(-) diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp b/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp index fb1b871093f..eabd64447c1 100644 --- a/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp +++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess.cpp @@ -45,12 +45,6 @@ HydroMechanicsProcess::HydroMechanicsProcess( *_jacobian_assembler}, process_data_(std::move(process_data)) { - nodal_forces_ = MeshLib::getOrCreateMeshProperty( - mesh, "NodalForces", MeshLib::MeshItemType::Node, DisplacementDim); - - hydraulic_flow_ = MeshLib::getOrCreateMeshProperty( - mesh, "MassFlowRate", MeshLib::MeshItemType::Node, 1); - _integration_point_writer.emplace_back( std::make_unique( "sigma_ip", @@ -339,31 +333,6 @@ void HydroMechanicsProcess:: AssemblyMixin>::assembleWithJacobian( t, dt, x, x_prev, process_id, b, Jac); - - auto const dof_tables = getDOFTables(x.size()); - auto copyRhs = [&](int const variable_id, auto& output_vector) - { - if (use_monolithic_scheme) - { - transformVariableFromGlobalVector(b, variable_id, *dof_tables[0], - output_vector, - std::negate()); - } - else - { - transformVariableFromGlobalVector(b, 0, *dof_tables[process_id], - output_vector, - std::negate()); - } - }; - if (process_id == process_data_.hydraulic_process_id) - { - copyRhs(0, *hydraulic_flow_); - } - if (process_id == process_data_.mechanics_related_process_id) - { - copyRhs(1, *nodal_forces_); - } } template @@ -382,6 +351,28 @@ void HydroMechanicsProcess::preTimestepConcreteProcess( } } +template +std::vector> +HydroMechanicsProcess::initializeAssemblyOnSubmeshes( + std::vector> const& meshes) +{ + INFO("HydroMechanicsProcess initializeSubmeshOutput()."); + std::vector> per_process_residuum_names; + if (_process_variables.size() == 1) // monolithic + { + per_process_residuum_names = {{"MassFlowRate", "NodalForces"}}; + } + else // staggered + { + per_process_residuum_names = {{"MassFlowRate"}, {"NodalForces"}}; + } + + AssemblyMixin>:: + initializeAssemblyOnSubmeshes(meshes, per_process_residuum_names); + + return per_process_residuum_names; +} + template void HydroMechanicsProcess::postTimestepConcreteProcess( std::vector const& x, diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h index b47cbca7dbb..033e1f02db5 100644 --- a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h +++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h @@ -113,6 +113,10 @@ class HydroMechanicsProcess final NumLib::LocalToGlobalIndexMap const& getDOFTable( const int process_id) const override; + std::vector> initializeAssemblyOnSubmeshes( + std::vector> const& meshes) + override; + bool isMonolithicSchemeUsed() const override { return process_data_.isMonolithicSchemeUsed(); @@ -154,9 +158,6 @@ class HydroMechanicsProcess final { return process_id == process_data_.mechanics_related_process_id; } - - MeshLib::PropertyVector* nodal_forces_ = nullptr; - MeshLib::PropertyVector* hydraulic_flow_ = nullptr; }; extern template class HydroMechanicsProcess<2>; From 403ae53f48fc870577ca27dc9044a23887271193 Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Mon, 12 Aug 2024 17:41:12 +0200 Subject: [PATCH 13/15] [ci] Restrict omp tests to TRM; exclude TRF --- scripts/cmake/test/AddTest.cmake | 4 ++-- scripts/cmake/test/OgsTest.cmake | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/cmake/test/AddTest.cmake b/scripts/cmake/test/AddTest.cmake index 104027437f3..0ab5b41d5d8 100644 --- a/scripts/cmake/test/AddTest.cmake +++ b/scripts/cmake/test/AddTest.cmake @@ -292,7 +292,7 @@ function(AddTest) # OpenMP tests for specific processes only. TODO (CL) Once all processes can # be assembled OpenMP parallel, the condition should be removed. - if("${labels}" MATCHES "TH2M|ThermoRichards|^HydroMechanics") + if("${labels}" MATCHES "TH2M|ThermoRichardsMechanics|^HydroMechanics") _add_test(${TEST_NAME}-omp) _set_omp_test_properties() endif() @@ -323,7 +323,7 @@ function(AddTest) # Run the tester _add_test_tester(${TEST_NAME}) - if("${labels}" MATCHES "TH2M|ThermoRichards|^HydroMechanics") + if("${labels}" MATCHES "TH2M|ThermoRichardsMechanics|^HydroMechanics") _add_test_tester(${TEST_NAME}-omp) endif() diff --git a/scripts/cmake/test/OgsTest.cmake b/scripts/cmake/test/OgsTest.cmake index 6a408e0ba88..8449f9d5b37 100644 --- a/scripts/cmake/test/OgsTest.cmake +++ b/scripts/cmake/test/OgsTest.cmake @@ -84,7 +84,7 @@ function(OgsTest) # OpenMP tests for specific processes only. TODO (CL) Once all processes can # be assembled OpenMP parallel, the condition should be removed. - if("${labels}" MATCHES "TH2M|ThermoRichards|^HydroMechanics") + if("${labels}" MATCHES "TH2M|ThermoRichardsMechanics|^HydroMechanics") _ogs_add_test(${TEST_NAME}-omp) _set_omp_test_properties() endif() From 53321f240ae8c71cbe266468ee2973aac74ff09d Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Tue, 13 Aug 2024 09:43:08 +0200 Subject: [PATCH 14/15] [PL] Revert ranges to for-range loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unfortunately with gcc-13.2.1 with optimizations there is a warning, which I was not able to correct. ... inlined from ‘std::vector > > {anonymous}::collectActiveLocalAssemblers(const BaseLib::PolymorphicRandomAccessContainerView&, const std::vector&)’ at /var/lib/gitlab-runner/builds/F1XUyv4cx/1/ogs/ogs/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp:132:62: /usr/include/c++/13.2.1/bits/stl_iterator_base_funcs.h:175:9: warning: iteration 576460752303423487 invokes undefined behavior [-Waggressive-loop-optimizations] 175 | while (__n--) | ^~~~~ /usr/include/c++/13.2.1/bits/stl_iterator_base_funcs.h:175:9: note: within this loop Other compilers gcc-14, clang-17 and 18, msvc had no problems with the former code. --- .../ParallelVectorMatrixAssembler.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp index 14b9d24a6b9..c649e5ce3e6 100644 --- a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp +++ b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp @@ -12,10 +12,7 @@ #include #include -#include #include -#include -#include #include #include "BaseLib/StringTools.h" @@ -115,15 +112,18 @@ collectActiveLocalAssemblers( ProcessLib::LocalAssemblerInterface> const& local_assemblers, std::vector const& active_elements) { - auto id_and_local_asm = [&local_assemblers](std::size_t const id) - -> std::tuple> - { return {id, local_assemblers[id]}; }; - auto create_ids_asm_pairs = [&](auto const& element_ids) { - return element_ids | ranges::views::transform(id_and_local_asm) | - ranges::to(); + std::vector>> + result; + result.reserve(static_cast(element_ids.size())); + for (auto const id : element_ids) + { + result.push_back({id, local_assemblers[id]}); + } + return result; }; if (active_elements.empty()) From 86f3b6c694a5269ce3b0aafce71380a74723ebf5 Mon Sep 17 00:00:00 2001 From: Dmitri Naumov Date: Tue, 13 Aug 2024 16:18:52 +0200 Subject: [PATCH 15/15] [PL] Extract runAssembly function --- .../ParallelVectorMatrixAssembler.cpp | 84 +++++-------------- 1 file changed, 22 insertions(+), 62 deletions(-) diff --git a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp index c649e5ce3e6..a78a8e7aa6c 100644 --- a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp +++ b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp @@ -134,66 +134,18 @@ collectActiveLocalAssemblers( return create_ids_asm_pairs(active_elements); } -void runAssemblyForEachLocalAssembler( +void runAssembly( std::vector>> const& ids_local_assemblers, - NumLib::LocalToGlobalIndexMap const& dof_table, double const t, - double const dt, GlobalVector const& x, GlobalVector const& x_prev, - std::vector& local_b_data, std::vector& local_Jac_data, - ProcessLib::AbstractJacobianAssembler& jac_asm, ThreadException& exception, - ProcessLib::Assembly::MultiMatrixElementCache& cache, - auto local_matrix_output) + ThreadException& exception, + auto local_matrix_output, + auto assemble) { - // due to MSVC++ error: - // error C3016: 'element_id': index variable in OpenMP 'for' - // statement must have signed integral type std::ptrdiff_t n_elements = static_cast(ids_local_assemblers.size()); -#pragma omp for nowait - for (std::ptrdiff_t i = 0; i < n_elements; ++i) - { - if (exception) - { - continue; - } - auto [element_id, loc_asm] = ids_local_assemblers[i]; - - try - { - assembleWithJacobianOneElement(element_id, loc_asm, dof_table, t, - dt, x, x_prev, local_b_data, - local_Jac_data, jac_asm, cache); - } - catch (...) - { - exception.capture(); - continue; - } - - local_matrix_output(element_id); - } -} -void runStaggeredAssemblyForEachLocalAssembler( - std::vector>> const& - ids_local_assemblers, - std::vector const& dof_tables, - double const t, double const dt, std::vector const& x, - std::vector const& x_prev, int const process_id, - std::vector& local_b_data, std::vector& local_Jac_data, - ProcessLib::AbstractJacobianAssembler& jac_asm, ThreadException& exception, - ProcessLib::Assembly::MultiMatrixElementCache& cache, - auto local_matrix_output) -{ - // due to MSVC++ error: - // error C3016: 'element_id': index variable in OpenMP 'for' - // statement must have signed integral type - std::ptrdiff_t n_elements = - static_cast(ids_local_assemblers.size()); #pragma omp for nowait for (std::ptrdiff_t i = 0; i < n_elements; ++i) { @@ -205,9 +157,7 @@ void runStaggeredAssemblyForEachLocalAssembler( try { - assembleWithJacobianForStaggeredSchemeOneElement( - element_id, loc_asm, dof_tables, t, dt, x, x_prev, process_id, - local_b_data, local_Jac_data, jac_asm, cache); + assemble(element_id, loc_asm); } catch (...) { @@ -335,18 +285,28 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian( auto const& x = *xs[0]; auto const& x_prev = *x_prevs[0]; - runAssemblyForEachLocalAssembler( + runAssembly( collectActiveLocalAssemblers(local_assemblers, active_elements), - dof_table, t, dt, x, x_prev, local_b_data, local_Jac_data, - *jac_asm, exception, cache, local_matrix_output); + exception, local_matrix_output, + [&](auto element_id, auto& loc_asm) + { + assembleWithJacobianOneElement( + element_id, loc_asm, dof_table, t, dt, x, x_prev, + local_b_data, local_Jac_data, *jac_asm, cache); + }); } else // Staggered scheme { - runStaggeredAssemblyForEachLocalAssembler( + runAssembly( collectActiveLocalAssemblers(local_assemblers, active_elements), - dof_tables, t, dt, xs, x_prevs, process_id, local_b_data, - local_Jac_data, *jac_asm, exception, cache, - local_matrix_output); + exception, local_matrix_output, + [&](auto element_id, auto& loc_asm) + { + assembleWithJacobianForStaggeredSchemeOneElement( + element_id, loc_asm, dof_tables, t, dt, xs, x_prevs, + process_id, local_b_data, local_Jac_data, *jac_asm, + cache); + }); } }