From d7c1f4409a34685d8dcd545a97d161d483d89f66 Mon Sep 17 00:00:00 2001 From: Pierre Jolivet Date: Sun, 29 Sep 2024 16:18:43 +0200 Subject: [PATCH 01/59] Fix typos --- config/BuildSystem/config/package.py | 2 +- include/petscmacros.h | 2 +- include/petscsnes.h | 2 +- include/petscsystypes.h | 2 +- src/dm/dt/interface/dtprob.c | 2 +- src/dm/impls/plex/plexcreate.c | 2 +- src/dm/impls/plex/tests/ex47.c | 2 +- src/dm/interface/dm.c | 2 +- src/dm/interface/dmgeommodel.c | 2 +- src/dm/interface/dmperiodicity.c | 4 ++-- src/ksp/pc/impls/mpi/pcmpi.c | 2 +- src/mat/interface/matproduct.c | 2 +- src/mat/utils/multequal.c | 2 +- src/snes/impls/al/al.c | 2 +- src/snes/utils/dm/dmadapt.c | 4 ++-- src/sys/objects/options.c | 2 +- src/sys/objects/pinit.c | 2 +- src/sys/tests/ex69f.F90 | 2 +- src/sys/utils/server.c | 4 ++-- src/tao/interface/taosolver.c | 4 ++-- src/ts/tests/ex80.c | 2 +- src/ts/tests/ex81.c | 2 +- src/ts/tutorials/hamiltonian/ex2.c | 4 ++-- src/vec/is/sf/interface/sf.c | 4 ++-- 24 files changed, 30 insertions(+), 30 deletions(-) diff --git a/config/BuildSystem/config/package.py b/config/BuildSystem/config/package.py index d294a703e7f..954b1c0aec0 100644 --- a/config/BuildSystem/config/package.py +++ b/config/BuildSystem/config/package.py @@ -326,7 +326,7 @@ def addArgStartsWith(self,args,sw,value): return keep def rmValueArgStartsWith(self,args,sw,value): - '''Remove a value from arguements that start with sw''' + '''Remove a value from arguments that start with sw''' if not isinstance(sw, list): sw = [sw] keep = [] for i in args: diff --git a/include/petscmacros.h b/include/petscmacros.h index bfba17f91fe..021cb3d30c2 100644 --- a/include/petscmacros.h +++ b/include/petscmacros.h @@ -1250,7 +1250,7 @@ static inline constexpr std::size_t PETSC_STATIC_ARRAY_LENGTH(const T &) noexcep Note: The pragma takes effect when PETSc was configured with `--with-openmp`. See `PetscPragmaUseOMPKernels()` - for when PETSc was configured to use OpenMP in some of its numerical kernals. + for when PETSc was configured to use OpenMP in some of its numerical kernels. .seealso: `PetscPragmaUseOMPKernels()`, `PetscHasBuiltin()`, `PetscDefined()`, `PetscLikely()`, `PetscUnlikely()`, `PETSC_ATTRIBUTE_FORMAT`, `PETSC_ATTRIBUTE_MAY_ALIAS` diff --git a/include/petscsnes.h b/include/petscsnes.h index 6ff49a7ee7f..75014735c5b 100644 --- a/include/petscsnes.h +++ b/include/petscsnes.h @@ -1306,7 +1306,7 @@ PETSC_EXTERN PetscErrorCode SNESNewtonALGetLoadParameter(SNES, PetscReal *); Values: + `SNES_NEWTONAL_CORRECTION_EXACT` - choose the correction which exactly satisfies the constraint -- `SNES_NEWTONAL_CORRECTION_NORMAL` - choose the correction in the updated normal hyper-surface to the contraint surface +- `SNES_NEWTONAL_CORRECTION_NORMAL` - choose the correction in the updated normal hyper-surface to the constraint surface Options Database Key: . -snes_newtonal_correction_type - select type from diff --git a/include/petscsystypes.h b/include/petscsystypes.h index 5fdb1e9422d..64c59a1b01e 100644 --- a/include/petscsystypes.h +++ b/include/petscsystypes.h @@ -469,7 +469,7 @@ E*/ typedef enum { PETSC_BOOL3_FALSE, PETSC_BOOL3_TRUE, - PETSC_BOOL3_UNKNOWN = -1 /* the value is uknown at the time of query, but might be determined later */ + PETSC_BOOL3_UNKNOWN = -1 /* the value is unknown at the time of query, but might be determined later */ } PetscBool3; #define PetscBool3ToBool(a) ((a) == PETSC_BOOL3_TRUE ? PETSC_TRUE : PETSC_FALSE) diff --git a/src/dm/dt/interface/dtprob.c b/src/dm/dt/interface/dtprob.c index 485a1f36fa8..83477df7fca 100644 --- a/src/dm/dt/interface/dtprob.c +++ b/src/dm/dt/interface/dtprob.c @@ -630,7 +630,7 @@ EXTERN_C_END - cdf - The analytic CDF Output Parameter: -. alpha - The KS statisic +. alpha - The KS statistic Level: advanced diff --git a/src/dm/impls/plex/plexcreate.c b/src/dm/impls/plex/plexcreate.c index 80f508da13c..4756d32b293 100644 --- a/src/dm/impls/plex/plexcreate.c +++ b/src/dm/impls/plex/plexcreate.c @@ -854,7 +854,7 @@ static PetscErrorCode DMPlexSetBoxLabel_Internal(DM dm) PetscFunctionBeginUser; PetscCall(DMGetDimension(dm, &dim)); - PetscCheck((dim == 2) || (dim == 3), PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "DMPlex box labeling only supports 2D and 3D meshes, recieved DM of dimension %" PetscInt_FMT, dim); + PetscCheck((dim == 2) || (dim == 3), PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "DMPlex box labeling only supports 2D and 3D meshes, received DM of dimension %" PetscInt_FMT, dim); // Get Face Sets label PetscCall(DMGetLabel(dm, "Face Sets", &label)); if (label) { diff --git a/src/dm/impls/plex/tests/ex47.c b/src/dm/impls/plex/tests/ex47.c index d427d0b8433..65b509c4f20 100644 --- a/src/dm/impls/plex/tests/ex47.c +++ b/src/dm/impls/plex/tests/ex47.c @@ -405,7 +405,7 @@ int main(int argc, char **argv) const PetscInt NcorMax = 4; const PetscInt dim = 2; - /* Create a PetscSection and taking care to exlude nodes with "-1" into element connectivity: */ + /* Create a PetscSection and taking care to exclude nodes with "-1" into element connectivity: */ PetscSection s; PetscInt vStart = 0, vEnd = Nc; PetscCall(PetscSectionCreate(PETSC_COMM_WORLD, &s)); diff --git a/src/dm/interface/dm.c b/src/dm/interface/dm.c index 736865e4eee..c2007bfc00b 100644 --- a/src/dm/interface/dm.c +++ b/src/dm/interface/dm.c @@ -971,7 +971,7 @@ PetscErrorCode DMViewFromOptions(DM dm, PetscObject obj, const char name[]) `DMPLEX` only represents geometry while most post-processing software expect that a mesh also provides information on the discretization space. This function assumes that the file represents Lagrange finite elements of order 1 or 2. The order of the mesh shall be set using `PetscViewerExodusIISetOrder()` - Variable names can be set and querried using `PetscViewerExodusII[Set/Get][Nodal/Zonal]VariableNames[s]`. + Variable names can be set and queried using `PetscViewerExodusII[Set/Get][Nodal/Zonal]VariableNames[s]`. .seealso: [](ch_dmbase), `DM`, `PetscViewer`, `PetscViewerFormat`, `PetscViewerSetFormat()`, `DMDestroy()`, `DMCreateGlobalVector()`, `DMCreateInterpolation()`, `DMCreateColoring()`, `DMCreateMatrix()`, `DMCreateMassMatrix()`, `DMLoad()`, `PetscObjectSetName()` @*/ diff --git a/src/dm/interface/dmgeommodel.c b/src/dm/interface/dmgeommodel.c index 2c018feb815..5116575fb89 100644 --- a/src/dm/interface/dmgeommodel.c +++ b/src/dm/interface/dmgeommodel.c @@ -76,7 +76,7 @@ PetscErrorCode DMGeomModelRegisterAll(void) Not Collective, No Fortran Support Input Parameters: -+ sname - name of a new user-defined gometry model ++ sname - name of a new user-defined geometry model - fnc - geometry model function Example Usage: diff --git a/src/dm/interface/dmperiodicity.c b/src/dm/interface/dmperiodicity.c index 0bdd0a417b7..2feb996b9ea 100644 --- a/src/dm/interface/dmperiodicity.c +++ b/src/dm/interface/dmperiodicity.c @@ -276,7 +276,7 @@ PetscErrorCode DMGetCoordinatesLocalized(DM dm, PetscBool *areLocalized) . dm - The `DM` Output Parameter: -. sparse - `PETSC_TRUE` if ony cells near the periodic boundary are localized +. sparse - `PETSC_TRUE` if only cells near the periodic boundary are localized Level: intermediate @@ -298,7 +298,7 @@ PetscErrorCode DMGetSparseLocalize(DM dm, PetscBool *sparse) Input Parameters: + dm - The `DM` -- sparse - `PETSC_TRUE` if ony cells near the periodic boundary are localized +- sparse - `PETSC_TRUE` if only cells near the periodic boundary are localized Level: intermediate diff --git a/src/ksp/pc/impls/mpi/pcmpi.c b/src/ksp/pc/impls/mpi/pcmpi.c index 6fa5a0088aa..6ca6380fda2 100644 --- a/src/ksp/pc/impls/mpi/pcmpi.c +++ b/src/ksp/pc/impls/mpi/pcmpi.c @@ -610,7 +610,7 @@ PetscErrorCode PCMPIServerBegin(void) PetscMPIInt dummy1 = 1, dummy2; #endif - // TODO: can we broadcast the number of active ranks here so only the correct subset of proccesses waits on the later scatters? + // TODO: can we broadcast the number of active ranks here so only the correct subset of processes waits on the later scatters? #if defined(PETSC_HAVE_PTHREAD_MUTEX) if (PCMPIServerUseShmget) pthread_mutex_lock(&PCMPIServerLocks[PetscGlobalRank]); #endif diff --git a/src/mat/interface/matproduct.c b/src/mat/interface/matproduct.c index 70d1192b6fd..4ff91bdd967 100644 --- a/src/mat/interface/matproduct.c +++ b/src/mat/interface/matproduct.c @@ -554,7 +554,7 @@ PetscErrorCode MatProductSetFromOptions(Mat mat) Level: intermediate Developer Note: - Shouldn't this information be printed from an approriate `MatView()` with perhaps certain formats set? + Shouldn't this information be printed from an appropriate `MatView()` with perhaps certain formats set? .seealso: [](ch_matrices), `MatProductType`, `Mat`, `MatProductSetFromOptions()`, `MatView()`, `MatProductCreate()`, `MatProductCreateWithMat()` @*/ diff --git a/src/mat/utils/multequal.c b/src/mat/utils/multequal.c index 2f986939b26..ae999d49b1e 100644 --- a/src/mat/utils/multequal.c +++ b/src/mat/utils/multequal.c @@ -1,7 +1,7 @@ #include /*I "petscmat.h" I*/ /* - n; try the MatMult varient n times + n; try the MatMult variant n times flg: return the boolean result, equal or not t: 0 => no transpose; 1 => transpose; 2 => Hermitian transpose add: 0 => no add (e.g., y = Ax); 1 => add third vector (e.g., z = Ax + y); 2 => add update (e.g., y = Ax + y) diff --git a/src/snes/impls/al/al.c b/src/snes/impls/al/al.c index 56624009f52..044a95b0c9b 100644 --- a/src/snes/impls/al/al.c +++ b/src/snes/impls/al/al.c @@ -378,7 +378,7 @@ static PetscErrorCode SNESSolve_NEWTONAL(SNES snes) /* Take a step orthogonal to the current incremental update DeltaX. Note, this approach is cheaper than the exact correction, but may exhibit convergence - issues due to the iterative trial points not being on the quadratic contraint surface. + issues due to the iterative trial points not being on the quadratic constraint surface. On the bright side, we always have a real and unique solution for deltaLambda. */ PetscScalar coefs[2]; diff --git a/src/snes/utils/dm/dmadapt.c b/src/snes/utils/dm/dmadapt.c index c44ceebdd33..4824cc1d255 100644 --- a/src/snes/utils/dm/dmadapt.c +++ b/src/snes/utils/dm/dmadapt.c @@ -484,7 +484,7 @@ PetscErrorCode DMAdaptorSetOptionsPrefix(DMAdaptor adaptor, const char prefix[]) . -adaptor_sequence_num - Number of adaptations to generate an optimal grid . -adaptor_target_num - Set the target number of vertices N_adapt, -1 for automatic determination . -adaptor_refinement_factor - Set r such that N_adapt = r^dim N_orig -- -adaptor_mixed_setup_function - Set the fnction func that sets up the mixed problem +- -adaptor_mixed_setup_function - Set the function func that sets up the mixed problem Level: beginner @@ -1179,7 +1179,7 @@ PetscErrorCode DMAdaptorMonitorErrorDraw(DMAdaptor adaptor, PetscInt n, DM odm, } /*@C - DMAdaptorMonitorErrorDrawLGCreate - Creates the context for the erro plotter `DMAdaptorMonitorErrorDrawLG()` + DMAdaptorMonitorErrorDrawLGCreate - Creates the context for the error plotter `DMAdaptorMonitorErrorDrawLG()` Collective diff --git a/src/sys/objects/options.c b/src/sys/objects/options.c index 536fb5c37cf..fb4182241a3 100644 --- a/src/sys/objects/options.c +++ b/src/sys/objects/options.c @@ -2437,7 +2437,7 @@ PetscErrorCode PetscOptionsGetBool(PetscOptions options, const char pre[], const } /*@C - PetscOptionsGetBool3 - Gets the ternary logical (true, false or unkonw) value for a particular + PetscOptionsGetBool3 - Gets the ternary logical (true, false or unknown) value for a particular option in the database. Not Collective diff --git a/src/sys/objects/pinit.c b/src/sys/objects/pinit.c index 938dcfcffe6..c2a1017c944 100644 --- a/src/sys/objects/pinit.c +++ b/src/sys/objects/pinit.c @@ -1254,7 +1254,7 @@ PETSC_INTERN PetscErrorCode PetscInitialize_Common(const char *prog, const char . -shared_tmp - indicates /tmp directory is shared by all processors . -not_shared_tmp - each processor has own /tmp . -tmp - alternative name of /tmp directory -- -mpiuni-allow-multiprocess-launch - allow mpiexec to launch multiple indendent MPI-Uni jobs, otherwise a sanity check error is invoked to prevent misuse of MPI-Uni +- -mpiuni-allow-multiprocess-launch - allow mpiexec to launch multiple independent MPI-Uni jobs, otherwise a sanity check error is invoked to prevent misuse of MPI-Uni Options Database Keys for Option Database: + -skip_petscrc - skip the default option files ~/.petscrc, .petscrc, petscrc diff --git a/src/sys/tests/ex69f.F90 b/src/sys/tests/ex69f.F90 index 5bfa92657d7..6609329c83f 100644 --- a/src/sys/tests/ex69f.F90 +++ b/src/sys/tests/ex69f.F90 @@ -13,7 +13,7 @@ program ex69F90 ! mpiexec --bind-to numa -n 1 ./ex69f ! ! You may get very different wall clock times -! It seems some mpiexec implementations change the thred binding/mapping that results with +! It seems some mpiexec implementations change the thread binding/mapping that results with ! OpenMP so all the threads are run on a single core ! ! The same differences occur without the PetscInitialize() call indicating diff --git a/src/sys/utils/server.c b/src/sys/utils/server.c index 769caea40de..53e6d435127 100644 --- a/src/sys/utils/server.c +++ b/src/sys/utils/server.c @@ -196,7 +196,7 @@ PetscErrorCode PetscShmgetUnmapAddresses(PetscInt n, void **addres) } /*@C - PetscShmgetAllocateArray - allocates shared memory accessable by all MPI processes in the server + PetscShmgetAllocateArray - allocates shared memory accessible by all MPI processes in the server Not Collective, only called on the first MPI process @@ -293,7 +293,7 @@ PetscErrorCode PetscShmgetAllocateArray(size_t sz, size_t asz, void **addr) } /*@C - PetscShmgetDeallocateArray - deallocates shared memory accessable by all MPI processes in the server + PetscShmgetDeallocateArray - deallocates shared memory accessible by all MPI processes in the server Not Collective, only called on the first MPI process diff --git a/src/tao/interface/taosolver.c b/src/tao/interface/taosolver.c index aeb231b23bb..930e8131362 100644 --- a/src/tao/interface/taosolver.c +++ b/src/tao/interface/taosolver.c @@ -1025,7 +1025,7 @@ PetscErrorCode TaoSetMaximumFunctionEvaluations(Tao tao, PetscInt nfcn) } else if (nfcn == PETSC_UNLIMITED || nfcn < 0) { tao->max_funcs = PETSC_UNLIMITED; } else { - PetscCheck(nfcn >= 0, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "Maxium number of function evaluations must be positive"); + PetscCheck(nfcn >= 0, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "Maximum number of function evaluations must be positive"); tao->max_funcs = nfcn; } PetscFunctionReturn(PETSC_SUCCESS); @@ -1111,7 +1111,7 @@ PetscErrorCode TaoSetMaximumIterations(Tao tao, PetscInt maxits) } else if (maxits == PETSC_UNLIMITED) { tao->max_it = PETSC_INT_MAX; } else { - PetscCheck(maxits > 0, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "Maxium number of iterations must be positive"); + PetscCheck(maxits > 0, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "Maximum number of iterations must be positive"); tao->max_it = maxits; } PetscFunctionReturn(PETSC_SUCCESS); diff --git a/src/ts/tests/ex80.c b/src/ts/tests/ex80.c index ce2615bc1de..d3dd3341f73 100644 --- a/src/ts/tests/ex80.c +++ b/src/ts/tests/ex80.c @@ -3,7 +3,7 @@ static char help[] = "Constant acceleration check with 2nd-order generalized-alp #include typedef struct { - PetscReal a0; /* contant acceleration */ + PetscReal a0; /* constant acceleration */ PetscReal u0, v0; /* initial conditions */ PetscReal radius; /* spectral radius of integrator */ } UserParams; diff --git a/src/ts/tests/ex81.c b/src/ts/tests/ex81.c index 6606c4a7eab..d980264deb5 100644 --- a/src/ts/tests/ex81.c +++ b/src/ts/tests/ex81.c @@ -3,7 +3,7 @@ static char help[] = "Constant velocity check with 1st-order generalized-alpha.\ #include typedef struct { - PetscReal v0; /* contant velocity */ + PetscReal v0; /* constant velocity */ PetscReal u0; /* initial condition */ PetscReal radius; /* spectral radius of integrator */ } UserParams; diff --git a/src/ts/tutorials/hamiltonian/ex2.c b/src/ts/tutorials/hamiltonian/ex2.c index 6376d985936..c1865c12120 100644 --- a/src/ts/tutorials/hamiltonian/ex2.c +++ b/src/ts/tutorials/hamiltonian/ex2.c @@ -1696,7 +1696,7 @@ static PetscErrorCode RHSFunctionV(TS ts, PetscReal t, Vec X, Vec Vres, void *ct } PetscCall(VecRestoreArrayRead(X, &x)); /* - Syncrhonized, ordered output for parallel/sequential test cases. + Synchronized, ordered output for parallel/sequential test cases. In the 1D (on the 2D mesh) case, every y component should be zero. */ if (user->checkVRes) { @@ -1845,7 +1845,7 @@ PetscErrorCode line(PetscInt dim, PetscReal time, const PetscReal dummy[], Petsc Input Parameters: + ts - The TS -- useInitial - Flag to also set the initial conditions to the current coodinates and velocities and setup the problem +- useInitial - Flag to also set the initial conditions to the current coordinates and velocities and setup the problem Output Parameters: . u - The initialized solution vector diff --git a/src/vec/is/sf/interface/sf.c b/src/vec/is/sf/interface/sf.c index e93535da71c..5dde5873fc0 100644 --- a/src/vec/is/sf/interface/sf.c +++ b/src/vec/is/sf/interface/sf.c @@ -2633,8 +2633,8 @@ PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], P Input Parameters: + sf - star forest . unit - the data type contained within the root and leaf data -. rootdata - root data that will be used for muliple PetscSF communications -- leafdata - leaf data that will be used for muliple PetscSF communications +. rootdata - root data that will be used for multiple PetscSF communications +- leafdata - leaf data that will be used for multiple PetscSF communications Level: advanced From 25057d89e2e49006fdb1be11b3bbc46760ff1393 Mon Sep 17 00:00:00 2001 From: Barry Smith Date: Sun, 29 Sep 2024 20:14:40 +0000 Subject: [PATCH 02/59] Check that the matstash size never grows beyond PETSC_INT_MAX. Reported-by: Peng Zhang --- src/mat/utils/matstash.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/mat/utils/matstash.c b/src/mat/utils/matstash.c index 0d8aaf7850d..ec0c90ff3ea 100644 --- a/src/mat/utils/matstash.c +++ b/src/mat/utils/matstash.c @@ -218,18 +218,20 @@ PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max) */ static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr) { - PetscInt newnmax, bs2 = stash->bs * stash->bs; + PetscInt newnmax, bs2 = stash->bs * stash->bs; + PetscCount cnewnmax; PetscFunctionBegin; /* allocate a larger stash */ if (!stash->oldnmax && !stash->nmax) { /* new stash */ - if (stash->umax) newnmax = stash->umax / bs2; - else newnmax = DEFAULT_STASH_SIZE / bs2; + if (stash->umax) cnewnmax = stash->umax / bs2; + else cnewnmax = DEFAULT_STASH_SIZE / bs2; } else if (!stash->nmax) { /* reusing stash */ - if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2; - else newnmax = stash->oldnmax / bs2; - } else newnmax = stash->nmax * 2; - if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr; + if (stash->umax > stash->oldnmax) cnewnmax = stash->umax / bs2; + else cnewnmax = stash->oldnmax / bs2; + } else cnewnmax = stash->nmax * 2; + if (cnewnmax < (stash->nmax + incr)) cnewnmax += 2 * incr; + PetscCall(PetscIntCast(cnewnmax, &newnmax)); /* Get a MatStashSpace and attach it to stash */ PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space)); From fef1ebd00dbad4bc32564f727f1229f4db2958ce Mon Sep 17 00:00:00 2001 From: Pierre Jolivet Date: Mon, 30 Sep 2024 08:45:32 +0200 Subject: [PATCH 03/59] Fix -Wshorten-64-to-32 src/vec/is/utils/hdf5/hdf5io.c:120:46: warning: implicit conversion loses integer precision: 'PetscInt' (aka 'long long') to 'int' [-Wshorten-64-to-32] 120 | PetscCallMPI(MPI_Allgatherv(lcind, cmap->n, MPIU_INT, ctx->cind, counts, displs, MPIU_INT, comm)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/petsclog.h:541:123: note: expanded from macro 'MPI_Allgatherv' 541 | (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm))) | ~~~~~~~~~~~~~~ ^~~~~~~~~ include/petscerror.h:784:102: note: expanded from macro 'PetscCallMPI' 784 | #define PetscCallMPI(...) PetscCallMPI_Private(PetscStackPop, SETERRQ, PETSC_COMM_SELF, __VA_ARGS__) | ^~~~~~~~~~~ include/petscerror.h:774:32: note: expanded from macro 'PetscCallMPI_Private' 774 | ierr_petsc_call_mpi_ = __VA_ARGS__; \ | ^~~~~~~~~~~ --- src/vec/is/utils/hdf5/hdf5io.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/vec/is/utils/hdf5/hdf5io.c b/src/vec/is/utils/hdf5/hdf5io.c index 9cb11c7cd3e..463adae93e7 100644 --- a/src/vec/is/utils/hdf5/hdf5io.c +++ b/src/vec/is/utils/hdf5/hdf5io.c @@ -89,11 +89,9 @@ static PetscErrorCode PetscViewerHDF5ReadSizes_Private(PetscViewer viewer, HDF5R if (compressed && uncompress) { hid_t inttype; PetscLayout cmap; - PetscInt *lcind; - PetscMPIInt *counts, *displs; + PetscInt *lcind, N = 0; + PetscMPIInt *counts, *displs, size, n; const PetscInt *range; - PetscInt N = 0; - PetscMPIInt size; MPI_Comm comm; #if defined(PETSC_USE_64BIT_INDICES) @@ -117,7 +115,8 @@ static PetscErrorCode PetscViewerHDF5ReadSizes_Private(PetscViewer viewer, HDF5R PetscCall(PetscMPIIntCast(range[r + 1] - range[r], &counts[r])); PetscCall(PetscMPIIntCast(range[r], &displs[r])); } - PetscCallMPI(MPI_Allgatherv(lcind, cmap->n, MPIU_INT, ctx->cind, counts, displs, MPIU_INT, comm)); + PetscCall(PetscMPIIntCast(cmap->n, &n)); + PetscCallMPI(MPI_Allgatherv(lcind, n, MPIU_INT, ctx->cind, counts, displs, MPIU_INT, comm)); PetscCall(PetscFree2(counts, displs)); PetscCall(PetscFree(lcind)); PetscCall(PetscLayoutDestroy(&cmap)); From fac68d15379d6c01297454dabe60e00630fdbba6 Mon Sep 17 00:00:00 2001 From: Pierre Jolivet Date: Mon, 30 Sep 2024 08:48:45 +0200 Subject: [PATCH 04/59] Fix -Wimplicit-fallthrough MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit src/dm/impls/plex/exodusii/plexexodusii2.c: In function ‘DMView_PlexExodusII’: src/dm/impls/plex/exodusii/plexexodusii2.c:889:12: warning: this statement may fall through [-Wimplicit-fallthrough=] 889 | if (closureSize == 2 * dim) { | ^ src/dm/impls/plex/exodusii/plexexodusii2.c:892:7: note: here 892 | case 2: | ^~~~ --- src/dm/impls/plex/exodusii/plexexodusii2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dm/impls/plex/exodusii/plexexodusii2.c b/src/dm/impls/plex/exodusii/plexexodusii2.c index 5c087e20f7b..787b4996842 100644 --- a/src/dm/impls/plex/exodusii/plexexodusii2.c +++ b/src/dm/impls/plex/exodusii/plexexodusii2.c @@ -889,6 +889,7 @@ PetscErrorCode DMView_PlexExodusII(DM dm, PetscViewer viewer) if (closureSize == 2 * dim) { type[cs] = SEGMENT; } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of vertices %" PetscInt_FMT " in dimension %" PetscInt_FMT " has no ExodusII type", closureSize / dim, dim); + break; case 2: if (closureSize == 3 * dim) { type[cs] = TRI; From 35d731d1aa9d2b537120feeb4fcc837551e0dcc8 Mon Sep 17 00:00:00 2001 From: Lisandro Dalcin Date: Mon, 30 Sep 2024 10:25:06 +0300 Subject: [PATCH 05/59] Add missing cast to silence -Wsign-conversion --- include/petscsys.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/petscsys.h b/include/petscsys.h index 25fa7c0e77e..112561de906 100644 --- a/include/petscsys.h +++ b/include/petscsys.h @@ -2393,7 +2393,7 @@ static inline PetscErrorCode PetscCitationsRegister(const char cit[], PetscBool PetscFunctionBegin; if (set && *set) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(PetscStrlen(cit, &len)); - PetscCall(PetscSegBufferGet(PetscCitationsList, len, &vstring)); + PetscCall(PetscSegBufferGet(PetscCitationsList, (PetscCount)len, &vstring)); PetscCall(PetscArraycpy(vstring, cit, len)); if (set) *set = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); From 309ed48abb1e78afe3b51cfa4a0bf0feeba42ef9 Mon Sep 17 00:00:00 2001 From: Satish Balay Date: Mon, 30 Sep 2024 08:45:17 -0500 Subject: [PATCH 06/59] slepc: update to v3.22.0 --- config/BuildSystem/config/packages/slepc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/BuildSystem/config/packages/slepc.py b/config/BuildSystem/config/packages/slepc.py index 753f973f23f..d8f67373372 100644 --- a/config/BuildSystem/config/packages/slepc.py +++ b/config/BuildSystem/config/packages/slepc.py @@ -3,8 +3,8 @@ class Configure(config.package.Package): def __init__(self, framework): config.package.Package.__init__(self, framework) - self.gitcommit = 'baf2b4f4b79fbc8f8aee8e2925b3f1912f6904ef' # (pre-3.22.0) main sep-28-2024 - #self.gitcommit = 'v'+self.version + self.version = '3.22.0' + self.gitcommit = 'v'+self.version self.download = ['git://https://gitlab.com/slepc/slepc.git','https://gitlab.com/slepc/slepc/-/archive/'+self.gitcommit+'/slepc-'+self.gitcommit+'.tar.gz'] self.functions = [] self.includes = [] From 7aeb3b909ab6d3a184fb40345bafcd5672c85c38 Mon Sep 17 00:00:00 2001 From: Richard Tran Mills Date: Sun, 29 Sep 2024 16:58:39 -0700 Subject: [PATCH 07/59] Copy PetscLayouts for row and columns of a MATCOMPOSITE from the constituent matrices Co-Authored-By: Pierre Jolivet --- src/mat/impls/composite/mcomposite.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/mat/impls/composite/mcomposite.c b/src/mat/impls/composite/mcomposite.c index 77d837c000f..4a354eacaec 100644 --- a/src/mat/impls/composite/mcomposite.c +++ b/src/mat/impls/composite/mcomposite.c @@ -387,20 +387,12 @@ static PetscErrorCode MatSetFromOptions_Composite(Mat A, PetscOptionItems *Petsc @*/ PetscErrorCode MatCreateComposite(MPI_Comm comm, PetscInt nmat, const Mat *mats, Mat *mat) { - PetscInt m, n, M, N, i; - PetscFunctionBegin; PetscCheck(nmat >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must pass in at least one matrix"); PetscAssertPointer(mat, 4); - - PetscCall(MatGetLocalSize(mats[0], PETSC_IGNORE, &n)); - PetscCall(MatGetLocalSize(mats[nmat - 1], &m, PETSC_IGNORE)); - PetscCall(MatGetSize(mats[0], PETSC_IGNORE, &N)); - PetscCall(MatGetSize(mats[nmat - 1], &M, PETSC_IGNORE)); PetscCall(MatCreate(comm, mat)); - PetscCall(MatSetSizes(*mat, m, n, M, N)); PetscCall(MatSetType(*mat, MATCOMPOSITE)); - for (i = 0; i < nmat; i++) PetscCall(MatCompositeAddMat(*mat, mats[i])); + for (PetscInt i = 0; i < nmat; i++) PetscCall(MatCompositeAddMat(*mat, mats[i])); PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); PetscFunctionReturn(PETSC_SUCCESS); @@ -445,6 +437,10 @@ static PetscErrorCode MatCompositeAddMat_Composite(Mat mat, Mat smat) PetscCall(PetscRealloc(sizeof(PetscScalar) * shell->nmat, &shell->scalings)); shell->scalings[shell->nmat - 1] = 1.0; } + + /* The composite matrix requires PetscLayouts for its rows and columns; we copy these from the constituent partial matrices. */ + if (shell->nmat == 1) PetscCall(PetscLayoutReference(smat->cmap, &mat->cmap)); + PetscCall(PetscLayoutReference(smat->rmap, &mat->rmap)); PetscFunctionReturn(PETSC_SUCCESS); } From 731341e59ef9e9cfa98efcaf9a5a63ad0406ab95 Mon Sep 17 00:00:00 2001 From: Junchao Zhang Date: Mon, 30 Sep 2024 21:54:32 -0500 Subject: [PATCH 08/59] CUPM: fix hipblasDoubleComplex, which is replaced by hipDoubleComplex in newer hipBLAS --- .gitlab-ci.yml | 10 ++++ config/examples/arch-ci-linux-hip-cmplx.py | 39 +++++++++++++ include/petsc/private/petsclegacycupmblas.h | 55 +++++++++++-------- include/petscdevice_hip.h | 14 +++++ include/petscsys.h | 2 +- .../bjacobi/bjkokkos/bjkokkos.kokkos.cxx | 2 + 6 files changed, 99 insertions(+), 23 deletions(-) create mode 100755 config/examples/arch-ci-linux-hip-cmplx.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1557c99dbe9..380713372cb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -547,6 +547,16 @@ linux-hip-double: variables: TEST_ARCH: arch-ci-linux-hip-double +linux-hip-cmplx: + extends: + - .stage-3 + - .linux_test_noflags + - .coverage-disable + tags: + - gpu:amd, os:linux + variables: + TEST_ARCH: arch-ci-linux-hip-cmplx + linux-sycl-double: extends: - .stage-3 diff --git a/config/examples/arch-ci-linux-hip-cmplx.py b/config/examples/arch-ci-linux-hip-cmplx.py new file mode 100755 index 00000000000..0f7feca88a1 --- /dev/null +++ b/config/examples/arch-ci-linux-hip-cmplx.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 + +import os +petsc_hash_pkgs=os.path.join(os.getenv('HOME'),'petsc-hash-pkgs') + +if __name__ == '__main__': + import sys + import os + sys.path.insert(0, os.path.abspath('config')) + import configure + configure_options = [ + '--package-prefix-hash='+petsc_hash_pkgs, + #'--with-mpi-dir=/home/users/balay/soft/instinct/gcc-10.2.0/mpich-4.1', + #'--with-blaslapack-dir=/home/users/balay/soft/instinct/gcc-10.2.0/fblaslapack', + '--with-make-np=24', + '--with-make-test-np=8', + '--with-hipc=/opt/rocm-5.4.3/bin/hipcc', + '--with-hip-dir=/opt/rocm-5.4.3', + 'COPTFLAGS=-g -O', + 'FOPTFLAGS=-g -O', + 'CXXOPTFLAGS=-g -O', + 'HIPOPTFLAGS=-g -O', + '--with-cuda=0', + '--with-hip=1', + '--with-precision=double', + '--with-clanguage=c', + '--download-kokkos', + '--download-kokkos-kernels', + # '--download-hypre', # does not support complex on the GPU yet + # '--download-mfem', # requires hypre + '--download-magma', + '--with-magma-fortran-bindings=0', + '--download-metis', + '--with-strict-petscerrorcode', + '--with-scalar-type=complex', + #'--with-coverage', + ] + + configure.petsc_configure(configure_options) diff --git a/include/petsc/private/petsclegacycupmblas.h b/include/petsc/private/petsclegacycupmblas.h index bffea08fcb1..55895e7ff8d 100644 --- a/include/petsc/private/petsclegacycupmblas.h +++ b/include/petsc/private/petsclegacycupmblas.h @@ -209,33 +209,44 @@ /* complex single */ #if defined(PETSC_USE_COMPLEX) + #if defined(HIPBLAS_V2) +typedef hipDoubleComplex PetscHipblasDoubleComplex; +typedef hipFloatComplex PetscHipblasFloatComplex; + #else +typedef hipblasComplex PetscHipblasFloatComplex; +typedef hipblasDoubleComplex PetscHipblasDoubleComplex; + #endif + #if defined(PETSC_USE_REAL_SINGLE) - #define hipblasXaxpy(a, b, c, d, e, f, g) hipblasCaxpy((a), (b), (hipblasComplex *)(c), (hipblasComplex *)(d), (e), (hipblasComplex *)(f), (g)) - #define hipblasXscal(a, b, c, d, e) hipblasCscal((a), (b), (hipblasComplex *)(c), (hipblasComplex *)(d), (e)) - #define hipblasXdotu(a, b, c, d, e, f, g) hipblasCdotu((a), (b), (hipblasComplex *)(c), (d), (hipblasComplex *)(e), (f), (hipblasComplex *)(g)) - #define hipblasXdot(a, b, c, d, e, f, g) hipblasCdotc((a), (b), (hipblasComplex *)(c), (d), (hipblasComplex *)(e), (f), (hipblasComplex *)(g)) - #define hipblasXswap(a, b, c, d, e, f) hipblasCswap((a), (b), (hipblasComplex *)(c), (d), (hipblasComplex *)(e), (f)) - #define hipblasXnrm2(a, b, c, d, e) hipblasScnrm2((a), (b), (hipblasComplex *)(c), (d), (e)) - #define hipblasIXamax(a, b, c, d, e) hipblasIcamax((a), (b), (hipblasComplex *)(c), (d), (e)) - #define hipblasXasum(a, b, c, d, e) hipblasScasum((a), (b), (hipblasComplex *)(c), (d), (e)) - #define hipblasXgemv(a, b, c, d, e, f, g, h, i, j, k, l) hipblasCgemv((a), (b), (c), (d), (hipblasComplex *)(e), (hipblasComplex *)(f), (g), (hipblasComplex *)(h), (i), (hipblasComplex *)(j), (hipblasComplex *)(k), (l)) - #define hipblasXgemm(a, b, c, d, e, f, g, h, i, j, k, l, m, n) hipblasCgemm((a), (b), (c), (d), (e), (f), (hipblasComplex *)(g), (hipblasComplex *)(h), (i), (hipblasComplex *)(j), (k), (hipblasComplex *)(l), (hipblasComplex *)(m), (n)) - #define hipblasXgeam(a, b, c, d, e, f, g, h, i, j, k, l, m) hipblasCgeam((a), (b), (c), (d), (e), (hipblasComplex *)(f), (hipblasComplex *)(g), (h), (hipblasComplex *)(i), (hipblasComplex *)(j), (k), (hipblasComplex *)(l), (m)) + #define hipblasXaxpy(a, b, c, d, e, f, g) hipblasCaxpy((a), (b), (PetscHipblasFloatComplex *)(c), (PetscHipblasFloatComplex *)(d), (e), (PetscHipblasFloatComplex *)(f), (g)) + #define hipblasXscal(a, b, c, d, e) hipblasCscal((a), (b), (PetscHipblasFloatComplex *)(c), (PetscHipblasFloatComplex *)(d), (e)) + #define hipblasXdotu(a, b, c, d, e, f, g) hipblasCdotu((a), (b), (PetscHipblasFloatComplex *)(c), (d), (PetscHipblasFloatComplex *)(e), (f), (PetscHipblasFloatComplex *)(g)) + #define hipblasXdot(a, b, c, d, e, f, g) hipblasCdotc((a), (b), (PetscHipblasFloatComplex *)(c), (d), (PetscHipblasFloatComplex *)(e), (f), (PetscHipblasFloatComplex *)(g)) + #define hipblasXswap(a, b, c, d, e, f) hipblasCswap((a), (b), (PetscHipblasFloatComplex *)(c), (d), (PetscHipblasFloatComplex *)(e), (f)) + #define hipblasXnrm2(a, b, c, d, e) hipblasScnrm2((a), (b), (PetscHipblasFloatComplex *)(c), (d), (e)) + #define hipblasIXamax(a, b, c, d, e) hipblasIcamax((a), (b), (PetscHipblasFloatComplex *)(c), (d), (e)) + #define hipblasXasum(a, b, c, d, e) hipblasScasum((a), (b), (PetscHipblasFloatComplex *)(c), (d), (e)) + #define hipblasXgemv(a, b, c, d, e, f, g, h, i, j, k, l) \ + hipblasCgemv((a), (b), (c), (d), (PetscHipblasFloatComplex *)(e), (PetscHipblasFloatComplex *)(f), (g), (PetscHipblasFloatComplex *)(h), (i), (PetscHipblasFloatComplex *)(j), (PetscHipblasFloatComplex *)(k), (l)) + #define hipblasXgemm(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ + hipblasCgemm((a), (b), (c), (d), (e), (f), (PetscHipblasFloatComplex *)(g), (PetscHipblasFloatComplex *)(h), (i), (PetscHipblasFloatComplex *)(j), (k), (PetscHipblasFloatComplex *)(l), (PetscHipblasFloatComplex *)(m), (n)) + #define hipblasXgeam(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + hipblasCgeam((a), (b), (c), (d), (e), (PetscHipblasFloatComplex *)(f), (PetscHipblasFloatComplex *)(g), (h), (PetscHipblasFloatComplex *)(i), (PetscHipblasFloatComplex *)(j), (k), (PetscHipblasFloatComplex *)(l), (m)) #else /* complex double */ - #define hipblasXaxpy(a, b, c, d, e, f, g) hipblasZaxpy((a), (b), (hipblasDoubleComplex *)(c), (hipblasDoubleComplex *)(d), (e), (hipblasDoubleComplex *)(f), (g)) - #define hipblasXscal(a, b, c, d, e) hipblasZscal((a), (b), (hipblasDoubleComplex *)(c), (hipblasDoubleComplex *)(d), (e)) - #define hipblasXdotu(a, b, c, d, e, f, g) hipblasZdotu((a), (b), (hipblasDoubleComplex *)(c), (d), (hipblasDoubleComplex *)(e), (f), (hipblasDoubleComplex *)(g)) - #define hipblasXdot(a, b, c, d, e, f, g) hipblasZdotc((a), (b), (hipblasDoubleComplex *)(c), (d), (hipblasDoubleComplex *)(e), (f), (hipblasDoubleComplex *)(g)) - #define hipblasXswap(a, b, c, d, e, f) hipblasZswap((a), (b), (hipblasDoubleComplex *)(c), (d), (hipblasDoubleComplex *)(e), (f)) - #define hipblasXnrm2(a, b, c, d, e) hipblasDznrm2((a), (b), (hipblasDoubleComplex *)(c), (d), (e)) - #define hipblasIXamax(a, b, c, d, e) hipblasIzamax((a), (b), (hipblasDoubleComplex *)(c), (d), (e)) - #define hipblasXasum(a, b, c, d, e) hipblasDzasum((a), (b), (hipblasDoubleComplex *)(c), (d), (e)) + #define hipblasXaxpy(a, b, c, d, e, f, g) hipblasZaxpy((a), (b), (PetscHipblasDoubleComplex *)(c), (PetscHipblasDoubleComplex *)(d), (e), (PetscHipblasDoubleComplex *)(f), (g)) + #define hipblasXscal(a, b, c, d, e) hipblasZscal((a), (b), (PetscHipblasDoubleComplex *)(c), (PetscHipblasDoubleComplex *)(d), (e)) + #define hipblasXdotu(a, b, c, d, e, f, g) hipblasZdotu((a), (b), (PetscHipblasDoubleComplex *)(c), (d), (PetscHipblasDoubleComplex *)(e), (f), (PetscHipblasDoubleComplex *)(g)) + #define hipblasXdot(a, b, c, d, e, f, g) hipblasZdotc((a), (b), (PetscHipblasDoubleComplex *)(c), (d), (PetscHipblasDoubleComplex *)(e), (f), (PetscHipblasDoubleComplex *)(g)) + #define hipblasXswap(a, b, c, d, e, f) hipblasZswap((a), (b), (PetscHipblasDoubleComplex *)(c), (d), (PetscHipblasDoubleComplex *)(e), (f)) + #define hipblasXnrm2(a, b, c, d, e) hipblasDznrm2((a), (b), (PetscHipblasDoubleComplex *)(c), (d), (e)) + #define hipblasIXamax(a, b, c, d, e) hipblasIzamax((a), (b), (PetscHipblasDoubleComplex *)(c), (d), (e)) + #define hipblasXasum(a, b, c, d, e) hipblasDzasum((a), (b), (PetscHipblasDoubleComplex *)(c), (d), (e)) #define hipblasXgemv(a, b, c, d, e, f, g, h, i, j, k, l) \ - hipblasZgemv((a), (b), (c), (d), (hipblasDoubleComplex *)(e), (hipblasDoubleComplex *)(f), (g), (hipblasDoubleComplex *)(h), (i), (hipblasDoubleComplex *)(j), (hipblasDoubleComplex *)(k), (l)) + hipblasZgemv((a), (b), (c), (d), (PetscHipblasDoubleComplex *)(e), (PetscHipblasDoubleComplex *)(f), (g), (PetscHipblasDoubleComplex *)(h), (i), (PetscHipblasDoubleComplex *)(j), (PetscHipblasDoubleComplex *)(k), (l)) #define hipblasXgemm(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ - hipblasZgemm((a), (b), (c), (d), (e), (f), (hipblasDoubleComplex *)(g), (hipblasDoubleComplex *)(h), (i), (hipblasDoubleComplex *)(j), (k), (hipblasDoubleComplex *)(l), (hipblasDoubleComplex *)(m), (n)) + hipblasZgemm((a), (b), (c), (d), (e), (f), (PetscHipblasDoubleComplex *)(g), (PetscHipblasDoubleComplex *)(h), (i), (PetscHipblasDoubleComplex *)(j), (k), (PetscHipblasDoubleComplex *)(l), (PetscHipblasDoubleComplex *)(m), (n)) #define hipblasXgeam(a, b, c, d, e, f, g, h, i, j, k, l, m) \ - hipblasZgeam((a), (b), (c), (d), (e), (hipblasDoubleComplex *)(f), (hipblasDoubleComplex *)(g), (h), (hipblasDoubleComplex *)(i), (hipblasDoubleComplex *)(j), (k), (hipblasDoubleComplex *)(l), (m)) + hipblasZgeam((a), (b), (c), (d), (e), (PetscHipblasDoubleComplex *)(f), (PetscHipblasDoubleComplex *)(g), (h), (PetscHipblasDoubleComplex *)(i), (PetscHipblasDoubleComplex *)(j), (k), (PetscHipblasDoubleComplex *)(l), (m)) #endif #else /* real single */ #if defined(PETSC_USE_REAL_SINGLE) diff --git a/include/petscdevice_hip.h b/include/petscdevice_hip.h index 0e5a8478d42..2aeb9a19745 100644 --- a/include/petscdevice_hip.h +++ b/include/petscdevice_hip.h @@ -11,6 +11,20 @@ #include #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) + + // cupmScalarPtrCast() returns hip{Float,Double}Complex while hipBLAS uses hipBlas{Float,Double}Complex, causing many VecCUPM errors like + // error: no matching function for call to 'cupmBlasXdot'. + // Before rocm-6.0, one can define ROCM_MATHLIBS_API_USE_HIP_COMPLEX to force rocm to 'typedef hipDoubleComplex hipBlasDoubleComplex' for example. + // Since then, ROCM_MATHLIBS_API_USE_HIP_COMPLEX is deprecated, and one can define HIPBLAS_V2 to use version 2 of hipBLAS that directly use hipDoubleComplex etc. + // Per AMD, HIPBLAS_V2 will be removed in the future so that hipBLAS only provides updated APIs (but not yet in 6.2.2 as of Sep. 27, 2024). + // + // see https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.0.0/functions.html#complex-datatypes + // and https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.2.2/functions.html#hipblas-v2-and-deprecations + #if PETSC_PKG_HIP_VERSION_GE(6, 0, 0) + #define HIPBLAS_V2 + #else + #define ROCM_MATHLIBS_API_USE_HIP_COMPLEX + #endif #include #include #else diff --git a/include/petscsys.h b/include/petscsys.h index 112561de906..5b4c724b07a 100644 --- a/include/petscsys.h +++ b/include/petscsys.h @@ -179,7 +179,7 @@ PETSC_EXTERN FILE *PETSC_STDERR; Handle inclusion when using clang compiler with CUDA support __float128 is not available for the device */ -#if defined(__clang__) && defined(__CUDA_ARCH__) +#if defined(__clang__) && (defined(__CUDA_ARCH__) || defined(__HIPCC__)) #define PETSC_SKIP_REAL___FLOAT128 #endif diff --git a/src/ksp/pc/impls/bjacobi/bjkokkos/bjkokkos.kokkos.cxx b/src/ksp/pc/impls/bjacobi/bjkokkos/bjkokkos.kokkos.cxx index 39532091e36..9815d61cc0d 100644 --- a/src/ksp/pc/impls/bjacobi/bjkokkos/bjkokkos.kokkos.cxx +++ b/src/ksp/pc/impls/bjacobi/bjkokkos/bjkokkos.kokkos.cxx @@ -1,3 +1,5 @@ +#define PETSC_SKIP_CXX_COMPLEX_FIX // Kokkos::complex does not need the petsc complex fix + #include #include From b67aa78df2e2af026ff671766a9ab427f2c30caa Mon Sep 17 00:00:00 2001 From: Satish Balay Date: Wed, 2 Oct 2024 09:30:21 -0500 Subject: [PATCH 09/59] Revert "sys: Add parens around PetscMalloc/PetscCalloc parameters" This reverts commit 6de9ecaf8d994c58966dbd11bcd19baffdbd4580. --- include/petscsys.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/include/petscsys.h b/include/petscsys.h index 5b4c724b07a..7992b80a362 100644 --- a/include/petscsys.h +++ b/include/petscsys.h @@ -484,7 +484,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()` M*/ -#define PetscCalloc(m, result) PetscMallocA(1, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, (size_t)(m), (result)) +#define PetscCalloc(m, result) PetscMallocA(1, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)m), (result)) /*MC PetscMalloc1 - Allocates an array of memory aligned to `PETSC_MEMALIGN` @@ -520,7 +520,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscCalloc1()`, `PetscMalloc2()` M*/ -#define PetscMalloc1(m1, r1) PetscMallocA(1, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1)) +#define PetscMalloc1(m1, r1) PetscMallocA(1, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1)) /*MC PetscCalloc1 - Allocates a cleared (zeroed) array of memory aligned to `PETSC_MEMALIGN` @@ -544,7 +544,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc1()`, `PetscCalloc2()` M*/ -#define PetscCalloc1(m1, r1) PetscMallocA(1, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1)) +#define PetscCalloc1(m1, r1) PetscMallocA(1, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1)) /*MC PetscMalloc2 - Allocates 2 arrays of memory both aligned to `PETSC_MEMALIGN` @@ -567,7 +567,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc1()`, `PetscCalloc2()` M*/ -#define PetscMalloc2(m1, r1, m2, r2) PetscMallocA(2, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2)) +#define PetscMalloc2(m1, r1, m2, r2) PetscMallocA(2, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2)) /*MC PetscCalloc2 - Allocates 2 cleared (zeroed) arrays of memory both aligned to `PETSC_MEMALIGN` @@ -590,7 +590,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscCalloc1()`, `PetscMalloc2()` M*/ -#define PetscCalloc2(m1, r1, m2, r2) PetscMallocA(2, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2)) +#define PetscCalloc2(m1, r1, m2, r2) PetscMallocA(2, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2)) /*MC PetscMalloc3 - Allocates 3 arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -616,7 +616,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc2()`, `PetscCalloc3()`, `PetscFree3()` M*/ #define PetscMalloc3(m1, r1, m2, r2, m3, r3) \ - PetscMallocA(3, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3)) + PetscMallocA(3, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3)) /*MC PetscCalloc3 - Allocates 3 cleared (zeroed) arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -642,7 +642,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscCalloc2()`, `PetscMalloc3()`, `PetscFree3()` M*/ #define PetscCalloc3(m1, r1, m2, r2, m3, r3) \ - PetscMallocA(3, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3)) + PetscMallocA(3, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3)) /*MC PetscMalloc4 - Allocates 4 arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -670,7 +670,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc2()`, `PetscCalloc4()`, `PetscFree4()` M*/ #define PetscMalloc4(m1, r1, m2, r2, m3, r3, m4, r4) \ - PetscMallocA(4, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3), ((size_t)((size_t)(m4)) * sizeof(**(r4))), (r4)) + PetscMallocA(4, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3), ((size_t)((size_t)m4) * sizeof(**(r4))), (r4)) /*MC PetscCalloc4 - Allocates 4 cleared (zeroed) arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -698,7 +698,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc2()`, `PetscCalloc4()`, `PetscFree4()` M*/ #define PetscCalloc4(m1, r1, m2, r2, m3, r3, m4, r4) \ - PetscMallocA(4, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3), ((size_t)((size_t)(m4)) * sizeof(**(r4))), (r4)) + PetscMallocA(4, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3), ((size_t)((size_t)m4) * sizeof(**(r4))), (r4)) /*MC PetscMalloc5 - Allocates 5 arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -728,7 +728,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc2()`, `PetscCalloc5()`, `PetscFree5()` M*/ #define PetscMalloc5(m1, r1, m2, r2, m3, r3, m4, r4, m5, r5) \ - PetscMallocA(5, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3), ((size_t)((size_t)(m4)) * sizeof(**(r4))), (r4), ((size_t)((size_t)(m5)) * sizeof(**(r5))), (r5)) + PetscMallocA(5, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3), ((size_t)((size_t)m4) * sizeof(**(r4))), (r4), ((size_t)((size_t)m5) * sizeof(**(r5))), (r5)) /*MC PetscCalloc5 - Allocates 5 cleared (zeroed) arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -758,7 +758,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc5()`, `PetscFree5()` M*/ #define PetscCalloc5(m1, r1, m2, r2, m3, r3, m4, r4, m5, r5) \ - PetscMallocA(5, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3), ((size_t)((size_t)(m4)) * sizeof(**(r4))), (r4), ((size_t)((size_t)(m5)) * sizeof(**(r5))), (r5)) + PetscMallocA(5, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3), ((size_t)((size_t)m4) * sizeof(**(r4))), (r4), ((size_t)((size_t)m5) * sizeof(**(r5))), (r5)) /*MC PetscMalloc6 - Allocates 6 arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -790,7 +790,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc2()`, `PetscCalloc6()`, `PetscFree3()`, `PetscFree4()`, `PetscFree5()`, `PetscFree6()` M*/ #define PetscMalloc6(m1, r1, m2, r2, m3, r3, m4, r4, m5, r5, m6, r6) \ - PetscMallocA(6, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3), ((size_t)((size_t)(m4)) * sizeof(**(r4))), (r4), ((size_t)((size_t)(m5)) * sizeof(**(r5))), (r5), ((size_t)((size_t)(m6)) * sizeof(**(r6))), (r6)) + PetscMallocA(6, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3), ((size_t)((size_t)m4) * sizeof(**(r4))), (r4), ((size_t)((size_t)m5) * sizeof(**(r5))), (r5), ((size_t)((size_t)m6) * sizeof(**(r6))), (r6)) /*MC PetscCalloc6 - Allocates 6 cleared (zeroed) arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -822,7 +822,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc2()`, `PetscMalloc6()`, `PetscFree6()` M*/ #define PetscCalloc6(m1, r1, m2, r2, m3, r3, m4, r4, m5, r5, m6, r6) \ - PetscMallocA(6, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3), ((size_t)((size_t)(m4)) * sizeof(**(r4))), (r4), ((size_t)((size_t)(m5)) * sizeof(**(r5))), (r5), ((size_t)((size_t)(m6)) * sizeof(**(r6))), (r6)) + PetscMallocA(6, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3), ((size_t)((size_t)m4) * sizeof(**(r4))), (r4), ((size_t)((size_t)m5) * sizeof(**(r5))), (r5), ((size_t)((size_t)m6) * sizeof(**(r6))), (r6)) /*MC PetscMalloc7 - Allocates 7 arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -856,7 +856,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc2()`, `PetscCalloc7()`, `PetscFree7()` M*/ #define PetscMalloc7(m1, r1, m2, r2, m3, r3, m4, r4, m5, r5, m6, r6, m7, r7) \ - PetscMallocA(7, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3), ((size_t)((size_t)(m4)) * sizeof(**(r4))), (r4), ((size_t)((size_t)(m5)) * sizeof(**(r5))), (r5), ((size_t)((size_t)(m6)) * sizeof(**(r6))), (r6), ((size_t)((size_t)(m7)) * sizeof(**(r7))), (r7)) + PetscMallocA(7, PETSC_FALSE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3), ((size_t)((size_t)m4) * sizeof(**(r4))), (r4), ((size_t)((size_t)m5) * sizeof(**(r5))), (r5), ((size_t)((size_t)m6) * sizeof(**(r6))), (r6), ((size_t)((size_t)m7) * sizeof(**(r7))), (r7)) /*MC PetscCalloc7 - Allocates 7 cleared (zeroed) arrays of memory, all aligned to `PETSC_MEMALIGN` @@ -890,7 +890,7 @@ M*/ .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscMalloc2()`, `PetscMalloc7()`, `PetscFree7()` M*/ #define PetscCalloc7(m1, r1, m2, r2, m3, r3, m4, r4, m5, r5, m6, r6, m7, r7) \ - PetscMallocA(7, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)(m1)) * sizeof(**(r1))), (r1), ((size_t)((size_t)(m2)) * sizeof(**(r2))), (r2), ((size_t)((size_t)(m3)) * sizeof(**(r3))), (r3), ((size_t)((size_t)(m4)) * sizeof(**(r4))), (r4), ((size_t)((size_t)(m5)) * sizeof(**(r5))), (r5), ((size_t)((size_t)(m6)) * sizeof(**(r6))), (r6), ((size_t)((size_t)(m7)) * sizeof(**(r7))), (r7)) + PetscMallocA(7, PETSC_TRUE, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ((size_t)((size_t)m1) * sizeof(**(r1))), (r1), ((size_t)((size_t)m2) * sizeof(**(r2))), (r2), ((size_t)((size_t)m3) * sizeof(**(r3))), (r3), ((size_t)((size_t)m4) * sizeof(**(r4))), (r4), ((size_t)((size_t)m5) * sizeof(**(r5))), (r5), ((size_t)((size_t)m6) * sizeof(**(r6))), (r6), ((size_t)((size_t)m7) * sizeof(**(r7))), (r7)) /*MC PetscNew - Allocates memory of a particular type, zeros the memory! Aligned to `PETSC_MEMALIGN` From cf27e480737f7f40fb2f5c38a85b346ae95c4aac Mon Sep 17 00:00:00 2001 From: Pierre Jolivet Date: Tue, 1 Oct 2024 21:30:50 +0200 Subject: [PATCH 10/59] Fix more -Wshorten-64-to-32 --- src/mat/impls/aij/seq/umfpack/umfpack.c | 9 ++++++--- src/sys/utils/mpishm.c | 11 ++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/mat/impls/aij/seq/umfpack/umfpack.c b/src/mat/impls/aij/seq/umfpack/umfpack.c index f63f54ff3f2..a44adb90fd4 100644 --- a/src/mat/impls/aij/seq/umfpack/umfpack.c +++ b/src/mat/impls/aij/seq/umfpack/umfpack.c @@ -112,7 +112,8 @@ static PetscErrorCode MatSolve_UMFPACK_Private(Mat A, Vec b, Vec x, int uflag) Mat_SeqAIJ *a = (Mat_SeqAIJ *)lu->A->data; PetscScalar *av = a->a, *xa; const PetscScalar *ba; - PetscInt *ai = a->i, *aj = a->j, status; + PetscInt *ai = a->i, *aj = a->j; + int status; static PetscBool cite = PETSC_FALSE; PetscFunctionBegin; @@ -165,7 +166,8 @@ static PetscErrorCode MatLUFactorNumeric_UMFPACK(Mat F, Mat A, const MatFactorIn { Mat_UMFPACK *lu = (Mat_UMFPACK *)F->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; - PetscInt *ai = a->i, *aj = a->j, status; + PetscInt *ai = a->i, *aj = a->j; + int status; PetscScalar *av = a->a; PetscFunctionBegin; @@ -200,7 +202,8 @@ static PetscErrorCode MatLUFactorSymbolic_UMFPACK(Mat F, Mat A, IS r, IS c, cons { Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; Mat_UMFPACK *lu = (Mat_UMFPACK *)F->data; - PetscInt i, *ai = a->i, *aj = a->j, m = A->rmap->n, n = A->cmap->n, status, idx; + PetscInt i, *ai = a->i, *aj = a->j, m = A->rmap->n, n = A->cmap->n, idx; + int status; #if !defined(PETSC_USE_COMPLEX) PetscScalar *av = a->a; #endif diff --git a/src/sys/utils/mpishm.c b/src/sys/utils/mpishm.c index 6b96c0b623e..99997e328d9 100644 --- a/src/sys/utils/mpishm.c +++ b/src/sys/utils/mpishm.c @@ -39,10 +39,8 @@ static PetscInt num_dupped_comms = 0; static MPI_Comm shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS]; static PetscErrorCode PetscShmCommDestroyDuppedComms(void) { - PetscInt i; - PetscFunctionBegin; - for (i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i])); + for (PetscInt i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i])); num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */ PetscFunctionReturn(PETSC_SUCCESS); } @@ -357,10 +355,9 @@ PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscO { PetscOmpCtrl ctrl; unsigned long *cpu_ulongs = NULL; - PetscInt i, nr_cpu_ulongs; PetscShmComm pshmcomm; MPI_Comm shm_comm; - PetscMPIInt shm_rank, shm_comm_size, omp_rank, color; + PetscMPIInt shm_rank, shm_comm_size, omp_rank, color, nr_cpu_ulongs; PetscInt num_packages, num_cores; PetscFunctionBegin; @@ -444,7 +441,7 @@ PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscO if (nr_cpu_ulongs == 1) { cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); } else { - for (i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i); + for (PetscInt i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i); } PetscCallMPI(MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs, nr_cpu_ulongs, MPI_UNSIGNED_LONG, MPI_BOR, 0, ctrl->omp_comm)); @@ -459,7 +456,7 @@ PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscO hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]); #endif } else { - for (i = 0; i < nr_cpu_ulongs; i++) { + for (PetscInt i = 0; i < nr_cpu_ulongs; i++) { #if HWLOC_API_VERSION >= 0x00020000 PetscCallExternal(hwloc_bitmap_set_ith_ulong, ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]); #else From 146c4c42f159455cc0177a6a416d1765f9fbd6f2 Mon Sep 17 00:00:00 2001 From: Pierre Jolivet Date: Thu, 3 Oct 2024 07:38:00 +0200 Subject: [PATCH 11/59] configure: MPICH 4.2.3 --- config/BuildSystem/config/packages/MPICH.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/BuildSystem/config/packages/MPICH.py b/config/BuildSystem/config/packages/MPICH.py index e6e1da7c869..c261110ce3f 100644 --- a/config/BuildSystem/config/packages/MPICH.py +++ b/config/BuildSystem/config/packages/MPICH.py @@ -4,7 +4,7 @@ class Configure(config.package.GNUPackage): def __init__(self, framework): config.package.GNUPackage.__init__(self, framework) - self.version = '4.2.3rc1' + self.version = '4.2.3' self.download = ['https://github.com/pmodels/mpich/releases/download/v'+self.version+'/mpich-'+self.version+'.tar.gz', 'https://www.mpich.org/static/downloads/'+self.version+'/mpich-'+self.version+'.tar.gz', # does not always work from Python? So add in web.cels URL below 'https://web.cels.anl.gov/projects/petsc/download/externalpackages'+'/mpich-'+self.version+'.tar.gz'] From 2711dd098a3b519998ac812d116dc90a3bf569db Mon Sep 17 00:00:00 2001 From: Lisandro Dalcin Date: Thu, 3 Oct 2024 09:51:23 +0300 Subject: [PATCH 12/59] Fix MPIU_Allreduce overflow check The reduction can be done in int64 only if the Op handle is built-in, otherwise a user-defined Op may not be able to handle dtype=MPIU_INT64. From all the built-in Ops, only MPI_SUM and MPI_PROD can overflow. --- src/sys/objects/pinit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sys/objects/pinit.c b/src/sys/objects/pinit.c index c2a1017c944..7c05fd973c7 100644 --- a/src/sys/objects/pinit.c +++ b/src/sys/objects/pinit.c @@ -1845,7 +1845,7 @@ static inline PetscMPIInt MPIU_Allreduce_Count(const void *inbuf, void *outbuf, PetscMPIInt MPIU_Allreduce_Private(const void *inbuf, void *outbuf, MPIU_Count count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { PetscMPIInt err; - if (!PetscDefined(USE_64BIT_INDICES) && count == 1 && dtype == MPIU_INT) { + if (!PetscDefined(USE_64BIT_INDICES) && count == 1 && dtype == MPIU_INT && (op == MPI_SUM || op == MPI_PROD)) { PetscInt64 incnt, outcnt; void *inbufd, *outbufd; From ea0465ef73aeebb3b4bd4bc920c0a220d2549b0b Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Mon, 30 Sep 2024 12:54:49 +0300 Subject: [PATCH 13/59] MatSetValues_BlockMat: disable buggy reallocation --- src/mat/impls/blockmat/seq/blockmat.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mat/impls/blockmat/seq/blockmat.c b/src/mat/impls/blockmat/seq/blockmat.c index 5d592b14a61..80aa1ee4a63 100644 --- a/src/mat/impls/blockmat/seq/blockmat.c +++ b/src/mat/impls/blockmat/seq/blockmat.c @@ -206,8 +206,8 @@ static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) { Mat_BlockMat *a = (Mat_BlockMat *)A->data; - PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; - PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; + PetscInt *rp, k, low, high, t, row, nrow, i, col, l, lastcol = -1; + PetscInt *ai = a->i, *ailen = a->ilen; PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; PetscInt ridx, cidx; PetscBool roworiented = a->roworiented; @@ -222,7 +222,6 @@ static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); rp = aj + ai[brow]; ap = aa + ai[brow]; - rmax = imax[brow]; nrow = ailen[brow]; low = 0; high = nrow; @@ -250,7 +249,8 @@ static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im if (rp[i] == bcol) goto noinsert1; } if (nonew == 1) goto noinsert1; - PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); + SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the block matrix", row, col); +#if 0 MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat); N = nrow++ - 1; high++; @@ -262,6 +262,7 @@ static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im if (N >= i) ap[i] = NULL; rp[i] = bcol; a->nz++; +#endif noinsert1:; if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i)); PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is)); From 4f2d7745dd2e7eaa6936f5c4a18e5312786dd2f5 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sun, 29 Sep 2024 12:51:54 +0300 Subject: [PATCH 14/59] GarbageCollector: fix bug --- src/sys/objects/garbage.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/sys/objects/garbage.c b/src/sys/objects/garbage.c index 814e3d0b7e2..3d316853995 100644 --- a/src/sys/objects/garbage.c +++ b/src/sys/objects/garbage.c @@ -120,11 +120,15 @@ static PetscErrorCode GarbageKeySortedIntersect_Private(PetscInt64 seta[], Petsc void PetscGarbageKeySortedIntersect(void *inset, void *inoutset, PetscMPIInt *length, MPI_Datatype *dtype) { PetscInt64 *seta, *setb; + PetscInt lena = 0, lenb = 0; seta = (PetscInt64 *)inoutset; setb = (PetscInt64 *)inset; - PetscCallAbort(PETSC_COMM_SELF, GarbageKeySortedIntersect_Private(&seta[1], (PetscInt *)&seta[0], &setb[1], (PetscInt)setb[0])); + PetscCallAbort(PETSC_COMM_SELF, PetscIntCast(seta[0], &lena)); + PetscCallAbort(PETSC_COMM_SELF, PetscIntCast(setb[0], &lenb)); + PetscCallAbort(PETSC_COMM_SELF, GarbageKeySortedIntersect_Private(seta + 1, &lena, setb + 1, lenb)); + seta[0] = lena; } /* Performs a collective allreduce intersection of one array per rank */ @@ -143,7 +147,7 @@ PetscErrorCode GarbageKeyAllReduceIntersect_Private(MPI_Comm comm, PetscInt64 *s PetscCallMPI(MPIU_Allreduce(entries, &max_entries, 1, MPIU_INT, MPI_MAX, comm)); PetscCall(PetscMalloc1(max_entries + 1, &sendset)); PetscCall(PetscMalloc1(max_entries + 1, &recvset)); - sendset[0] = (PetscInt64)*entries; + sendset[0] = *entries; for (ii = 1; ii < *entries + 1; ii++) sendset[ii] = set[ii - 1]; /* Create a custom data type to hold the set */ @@ -157,7 +161,7 @@ PetscErrorCode GarbageKeyAllReduceIntersect_Private(MPI_Comm comm, PetscInt64 *s PetscCallMPI(MPI_Type_free(&keyset_type)); - *entries = (PetscInt)recvset[0]; + PetscCall(PetscIntCast(recvset[0], entries)); for (ii = 0; ii < *entries; ii++) set[ii] = recvset[ii + 1]; PetscCall(PetscFree(sendset)); From d070fe2ee8a4063be0149d3cd65900cce2072341 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Thu, 3 Oct 2024 13:52:06 +0300 Subject: [PATCH 15/59] Fix enumerated and non-enumerated type in conditional expression [-Werror=extra] --- src/mat/impls/lrc/lrc.c | 4 +++- src/ts/interface/tshistory.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mat/impls/lrc/lrc.c b/src/mat/impls/lrc/lrc.c index 6b7ab8c3689..9911136f5a6 100644 --- a/src/mat/impls/lrc/lrc.c +++ b/src/mat/impls/lrc/lrc.c @@ -258,11 +258,13 @@ static PetscErrorCode MatSetUp_LRC(Mat N) Vec c = Na->c; Mat Uloc; PetscMPIInt size, csize = 0; + PetscBool sym = (PetscBool)(U == V), dummy; PetscFunctionBegin; PetscCall(MatSetVecType(N, U->defaultvectype)); // Flag matrix as symmetric if A is symmetric and U == V - PetscCall(MatSetOption(N, MAT_SYMMETRIC, (PetscBool)((A ? A->symmetric == PETSC_BOOL3_TRUE : PETSC_TRUE) && U == V))); + if (A && sym) PetscCall(MatIsSymmetricKnown(A, &dummy, &sym)); + PetscCall(MatSetOption(N, MAT_SYMMETRIC, sym)); PetscCall(MatDenseGetLocalMatrix(Na->U, &Uloc)); PetscCall(MatCreateVecs(Uloc, &Na->work1, NULL)); diff --git a/src/ts/interface/tshistory.c b/src/ts/interface/tshistory.c index 9583041e3eb..87fd10f763b 100644 --- a/src/ts/interface/tshistory.c +++ b/src/ts/interface/tshistory.c @@ -75,7 +75,7 @@ PetscErrorCode TSHistoryUpdate(TSHistory tsh, PetscInt id, PetscReal time) PetscCall(PetscRealloc(tsh->c * sizeof(*tsh->hist), &tsh->hist)); PetscCall(PetscRealloc(tsh->c * sizeof(*tsh->hist_id), &tsh->hist_id)); } - tsh->sorted = (PetscBool)(tsh->sorted && (tsh->n ? time >= tsh->hist[tsh->n - 1] : PETSC_TRUE)); + tsh->sorted = (PetscBool)(tsh->sorted && (tsh->n ? (PetscBool)(time >= tsh->hist[tsh->n - 1]) : PETSC_TRUE)); #if defined(PETSC_USE_DEBUG) if (tsh->n) { /* id should be unique */ PetscInt loc, *ids; From 8a40300834377a32ebc3f9206ebc8531bcb9209f Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Thu, 3 Oct 2024 13:53:27 +0300 Subject: [PATCH 16/59] Fix missing initializer for member errors [-Werror=missing-field-initializers] --- src/sys/classes/random/impls/rand/rand.c | 4 + src/sys/classes/random/impls/rand48/rand48.c | 4 + .../classes/random/impls/rander48/rander48.c | 1 + src/vec/is/ao/impls/mapping/aomapping.c | 4 + .../is/ao/impls/memscalable/aomemscalable.c | 4 + src/vec/vec/impls/mpi/pbvec.c | 183 +++++++++--------- src/vec/vec/impls/seq/bvec2.c | 1 + 7 files changed, 111 insertions(+), 90 deletions(-) diff --git a/src/sys/classes/random/impls/rand/rand.c b/src/sys/classes/random/impls/rand/rand.c index bd6b6ccde6a..005ab9bd6e6 100644 --- a/src/sys/classes/random/impls/rand/rand.c +++ b/src/sys/classes/random/impls/rand/rand.c @@ -38,6 +38,10 @@ static struct _PetscRandomOps PetscRandomOps_Values = { PetscDesignatedInitializer(seed, PetscRandomSeed_Rand), PetscDesignatedInitializer(getvalue, PetscRandomGetValue_Rand), PetscDesignatedInitializer(getvaluereal, PetscRandomGetValueReal_Rand), + PetscDesignatedInitializer(getvalues, NULL), + PetscDesignatedInitializer(getvaluesreal, NULL), + PetscDesignatedInitializer(destroy, NULL), + PetscDesignatedInitializer(setfromoptions, NULL), }; /*MC diff --git a/src/sys/classes/random/impls/rand48/rand48.c b/src/sys/classes/random/impls/rand48/rand48.c index db317a44f77..d5a106362c7 100644 --- a/src/sys/classes/random/impls/rand48/rand48.c +++ b/src/sys/classes/random/impls/rand48/rand48.c @@ -41,6 +41,10 @@ static struct _PetscRandomOps PetscRandomOps_Values = { PetscDesignatedInitializer(seed, PetscRandomSeed_Rand48), PetscDesignatedInitializer(getvalue, PetscRandomGetValue_Rand48), PetscDesignatedInitializer(getvaluereal, PetscRandomGetValueReal_Rand48), + PetscDesignatedInitializer(getvalues, NULL), + PetscDesignatedInitializer(getvaluesreal, NULL), + PetscDesignatedInitializer(destroy, NULL), + PetscDesignatedInitializer(setfromoptions, NULL), }; /*MC diff --git a/src/sys/classes/random/impls/rander48/rander48.c b/src/sys/classes/random/impls/rander48/rander48.c index 9f9ca001e06..e8a551ffa30 100644 --- a/src/sys/classes/random/impls/rander48/rander48.c +++ b/src/sys/classes/random/impls/rander48/rander48.c @@ -96,6 +96,7 @@ static struct _PetscRandomOps PetscRandomOps_Values = { PetscDesignatedInitializer(getvalues, NULL), PetscDesignatedInitializer(getvaluesreal, NULL), PetscDesignatedInitializer(destroy, PetscRandomDestroy_Rander48), + PetscDesignatedInitializer(setfromoptions, NULL), }; /*MC diff --git a/src/vec/is/ao/impls/mapping/aomapping.c b/src/vec/is/ao/impls/mapping/aomapping.c index 4c0bd7aa96b..6a3466cc611 100644 --- a/src/vec/is/ao/impls/mapping/aomapping.c +++ b/src/vec/is/ao/impls/mapping/aomapping.c @@ -118,6 +118,10 @@ static const struct _AOOps AOps = { PetscDesignatedInitializer(destroy, AODestroy_Mapping), PetscDesignatedInitializer(petsctoapplication, AOPetscToApplication_Mapping), PetscDesignatedInitializer(applicationtopetsc, AOApplicationToPetsc_Mapping), + PetscDesignatedInitializer(petsctoapplicationpermuteint, NULL), + PetscDesignatedInitializer(applicationtopetscpermuteint, NULL), + PetscDesignatedInitializer(petsctoapplicationpermutereal, NULL), + PetscDesignatedInitializer(applicationtopetscpermutereal, NULL), }; /*@ diff --git a/src/vec/is/ao/impls/memscalable/aomemscalable.c b/src/vec/is/ao/impls/memscalable/aomemscalable.c index 9453fc460aa..91b852e5e94 100644 --- a/src/vec/is/ao/impls/memscalable/aomemscalable.c +++ b/src/vec/is/ao/impls/memscalable/aomemscalable.c @@ -257,6 +257,10 @@ static const struct _AOOps AOOps_MemoryScalable = { PetscDesignatedInitializer(destroy, AODestroy_MemoryScalable), PetscDesignatedInitializer(petsctoapplication, AOPetscToApplication_MemoryScalable), PetscDesignatedInitializer(applicationtopetsc, AOApplicationToPetsc_MemoryScalable), + PetscDesignatedInitializer(petsctoapplicationpermuteint, NULL), + PetscDesignatedInitializer(applicationtopetscpermuteint, NULL), + PetscDesignatedInitializer(petsctoapplicationpermutereal, NULL), + PetscDesignatedInitializer(applicationtopetscpermutereal, NULL), }; static PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm, PetscInt napp, const PetscInt from_array[], const PetscInt to_array[], AO ao, PetscInt *aomap_loc) diff --git a/src/vec/vec/impls/mpi/pbvec.c b/src/vec/vec/impls/mpi/pbvec.c index 5e535711a6a..22e786936e6 100644 --- a/src/vec/vec/impls/mpi/pbvec.c +++ b/src/vec/vec/impls/mpi/pbvec.c @@ -444,96 +444,99 @@ static PetscErrorCode VecGetLocalToGlobalMapping_MPI_VecGhost(Vec X, ISLocalToGl PetscFunctionReturn(PETSC_SUCCESS); } -static struct _VecOps DvOps = {PetscDesignatedInitializer(duplicate, VecDuplicate_MPI), /* 1 */ - PetscDesignatedInitializer(duplicatevecs, VecDuplicateVecs_Default), - PetscDesignatedInitializer(destroyvecs, VecDestroyVecs_Default), - PetscDesignatedInitializer(dot, VecDot_MPI), - PetscDesignatedInitializer(mdot, VecMDot_MPI), - PetscDesignatedInitializer(norm, VecNorm_MPI), - PetscDesignatedInitializer(tdot, VecTDot_MPI), - PetscDesignatedInitializer(mtdot, VecMTDot_MPI), - PetscDesignatedInitializer(scale, VecScale_Seq), - PetscDesignatedInitializer(copy, VecCopy_Seq), /* 10 */ - PetscDesignatedInitializer(set, VecSet_Seq), - PetscDesignatedInitializer(swap, VecSwap_Seq), - PetscDesignatedInitializer(axpy, VecAXPY_Seq), - PetscDesignatedInitializer(axpby, VecAXPBY_Seq), - PetscDesignatedInitializer(maxpy, VecMAXPY_Seq), - PetscDesignatedInitializer(aypx, VecAYPX_Seq), - PetscDesignatedInitializer(waxpy, VecWAXPY_Seq), - PetscDesignatedInitializer(axpbypcz, VecAXPBYPCZ_Seq), - PetscDesignatedInitializer(pointwisemult, VecPointwiseMult_Seq), - PetscDesignatedInitializer(pointwisedivide, VecPointwiseDivide_Seq), - PetscDesignatedInitializer(setvalues, VecSetValues_MPI), /* 20 */ - PetscDesignatedInitializer(assemblybegin, VecAssemblyBegin_MPI_BTS), - PetscDesignatedInitializer(assemblyend, VecAssemblyEnd_MPI_BTS), - PetscDesignatedInitializer(getarray, NULL), - PetscDesignatedInitializer(getsize, VecGetSize_MPI), - PetscDesignatedInitializer(getlocalsize, VecGetSize_Seq), - PetscDesignatedInitializer(restorearray, NULL), - PetscDesignatedInitializer(max, VecMax_MPI), - PetscDesignatedInitializer(min, VecMin_MPI), - PetscDesignatedInitializer(setrandom, VecSetRandom_Seq), - PetscDesignatedInitializer(setoption, VecSetOption_MPI), - PetscDesignatedInitializer(setvaluesblocked, VecSetValuesBlocked_MPI), - PetscDesignatedInitializer(destroy, VecDestroy_MPI), - PetscDesignatedInitializer(view, VecView_MPI), - PetscDesignatedInitializer(placearray, VecPlaceArray_MPI), - PetscDesignatedInitializer(replacearray, VecReplaceArray_Seq), - PetscDesignatedInitializer(dot_local, VecDot_Seq), - PetscDesignatedInitializer(tdot_local, VecTDot_Seq), - PetscDesignatedInitializer(norm_local, VecNorm_Seq), - PetscDesignatedInitializer(mdot_local, VecMDot_Seq), - PetscDesignatedInitializer(mtdot_local, VecMTDot_Seq), - PetscDesignatedInitializer(load, VecLoad_Default), - PetscDesignatedInitializer(reciprocal, VecReciprocal_Default), - PetscDesignatedInitializer(conjugate, VecConjugate_Seq), - PetscDesignatedInitializer(setlocaltoglobalmapping, NULL), - PetscDesignatedInitializer(getlocaltoglobalmapping, VecGetLocalToGlobalMapping_MPI_VecGhost), - PetscDesignatedInitializer(setvalueslocal, NULL), - PetscDesignatedInitializer(resetarray, VecResetArray_MPI), - PetscDesignatedInitializer(setfromoptions, VecSetFromOptions_MPI), /*set from options */ - PetscDesignatedInitializer(maxpointwisedivide, VecMaxPointwiseDivide_Seq), - PetscDesignatedInitializer(pointwisemax, VecPointwiseMax_Seq), - PetscDesignatedInitializer(pointwisemaxabs, VecPointwiseMaxAbs_Seq), - PetscDesignatedInitializer(pointwisemin, VecPointwiseMin_Seq), - PetscDesignatedInitializer(getvalues, VecGetValues_MPI), - PetscDesignatedInitializer(sqrt, NULL), - PetscDesignatedInitializer(abs, NULL), - PetscDesignatedInitializer(exp, NULL), - PetscDesignatedInitializer(log, NULL), - PetscDesignatedInitializer(shift, NULL), - PetscDesignatedInitializer(create, NULL), /* really? */ - PetscDesignatedInitializer(stridegather, VecStrideGather_Default), - PetscDesignatedInitializer(stridescatter, VecStrideScatter_Default), - PetscDesignatedInitializer(dotnorm2, NULL), - PetscDesignatedInitializer(getsubvector, NULL), - PetscDesignatedInitializer(restoresubvector, NULL), - PetscDesignatedInitializer(getarrayread, NULL), - PetscDesignatedInitializer(restorearrayread, NULL), - PetscDesignatedInitializer(stridesubsetgather, VecStrideSubSetGather_Default), - PetscDesignatedInitializer(stridesubsetscatter, VecStrideSubSetScatter_Default), - PetscDesignatedInitializer(viewnative, VecView_MPI), - PetscDesignatedInitializer(loadnative, NULL), - PetscDesignatedInitializer(createlocalvector, NULL), - PetscDesignatedInitializer(getlocalvector, NULL), - PetscDesignatedInitializer(restorelocalvector, NULL), - PetscDesignatedInitializer(getlocalvectorread, NULL), - PetscDesignatedInitializer(restorelocalvectorread, NULL), - PetscDesignatedInitializer(bindtocpu, NULL), - PetscDesignatedInitializer(getarraywrite, NULL), - PetscDesignatedInitializer(restorearraywrite, NULL), - PetscDesignatedInitializer(getarrayandmemtype, NULL), - PetscDesignatedInitializer(restorearrayandmemtype, NULL), - PetscDesignatedInitializer(getarrayreadandmemtype, NULL), - PetscDesignatedInitializer(restorearrayreadandmemtype, NULL), - PetscDesignatedInitializer(getarraywriteandmemtype, NULL), - PetscDesignatedInitializer(restorearraywriteandmemtype, NULL), - PetscDesignatedInitializer(concatenate, NULL), - PetscDesignatedInitializer(sum, NULL), - PetscDesignatedInitializer(setpreallocationcoo, VecSetPreallocationCOO_MPI), - PetscDesignatedInitializer(setvaluescoo, VecSetValuesCOO_MPI), - PetscDesignatedInitializer(errorwnorm, NULL)}; +static struct _VecOps DvOps = { + PetscDesignatedInitializer(duplicate, VecDuplicate_MPI), /* 1 */ + PetscDesignatedInitializer(duplicatevecs, VecDuplicateVecs_Default), + PetscDesignatedInitializer(destroyvecs, VecDestroyVecs_Default), + PetscDesignatedInitializer(dot, VecDot_MPI), + PetscDesignatedInitializer(mdot, VecMDot_MPI), + PetscDesignatedInitializer(norm, VecNorm_MPI), + PetscDesignatedInitializer(tdot, VecTDot_MPI), + PetscDesignatedInitializer(mtdot, VecMTDot_MPI), + PetscDesignatedInitializer(scale, VecScale_Seq), + PetscDesignatedInitializer(copy, VecCopy_Seq), /* 10 */ + PetscDesignatedInitializer(set, VecSet_Seq), + PetscDesignatedInitializer(swap, VecSwap_Seq), + PetscDesignatedInitializer(axpy, VecAXPY_Seq), + PetscDesignatedInitializer(axpby, VecAXPBY_Seq), + PetscDesignatedInitializer(maxpy, VecMAXPY_Seq), + PetscDesignatedInitializer(aypx, VecAYPX_Seq), + PetscDesignatedInitializer(waxpy, VecWAXPY_Seq), + PetscDesignatedInitializer(axpbypcz, VecAXPBYPCZ_Seq), + PetscDesignatedInitializer(pointwisemult, VecPointwiseMult_Seq), + PetscDesignatedInitializer(pointwisedivide, VecPointwiseDivide_Seq), + PetscDesignatedInitializer(setvalues, VecSetValues_MPI), /* 20 */ + PetscDesignatedInitializer(assemblybegin, VecAssemblyBegin_MPI_BTS), + PetscDesignatedInitializer(assemblyend, VecAssemblyEnd_MPI_BTS), + PetscDesignatedInitializer(getarray, NULL), + PetscDesignatedInitializer(getsize, VecGetSize_MPI), + PetscDesignatedInitializer(getlocalsize, VecGetSize_Seq), + PetscDesignatedInitializer(restorearray, NULL), + PetscDesignatedInitializer(max, VecMax_MPI), + PetscDesignatedInitializer(min, VecMin_MPI), + PetscDesignatedInitializer(setrandom, VecSetRandom_Seq), + PetscDesignatedInitializer(setoption, VecSetOption_MPI), + PetscDesignatedInitializer(setvaluesblocked, VecSetValuesBlocked_MPI), + PetscDesignatedInitializer(destroy, VecDestroy_MPI), + PetscDesignatedInitializer(view, VecView_MPI), + PetscDesignatedInitializer(placearray, VecPlaceArray_MPI), + PetscDesignatedInitializer(replacearray, VecReplaceArray_Seq), + PetscDesignatedInitializer(dot_local, VecDot_Seq), + PetscDesignatedInitializer(tdot_local, VecTDot_Seq), + PetscDesignatedInitializer(norm_local, VecNorm_Seq), + PetscDesignatedInitializer(mdot_local, VecMDot_Seq), + PetscDesignatedInitializer(mtdot_local, VecMTDot_Seq), + PetscDesignatedInitializer(load, VecLoad_Default), + PetscDesignatedInitializer(reciprocal, VecReciprocal_Default), + PetscDesignatedInitializer(conjugate, VecConjugate_Seq), + PetscDesignatedInitializer(setlocaltoglobalmapping, NULL), + PetscDesignatedInitializer(getlocaltoglobalmapping, VecGetLocalToGlobalMapping_MPI_VecGhost), + PetscDesignatedInitializer(setvalueslocal, NULL), + PetscDesignatedInitializer(resetarray, VecResetArray_MPI), + PetscDesignatedInitializer(setfromoptions, VecSetFromOptions_MPI), /*set from options */ + PetscDesignatedInitializer(maxpointwisedivide, VecMaxPointwiseDivide_Seq), + PetscDesignatedInitializer(pointwisemax, VecPointwiseMax_Seq), + PetscDesignatedInitializer(pointwisemaxabs, VecPointwiseMaxAbs_Seq), + PetscDesignatedInitializer(pointwisemin, VecPointwiseMin_Seq), + PetscDesignatedInitializer(getvalues, VecGetValues_MPI), + PetscDesignatedInitializer(sqrt, NULL), + PetscDesignatedInitializer(abs, NULL), + PetscDesignatedInitializer(exp, NULL), + PetscDesignatedInitializer(log, NULL), + PetscDesignatedInitializer(shift, NULL), + PetscDesignatedInitializer(create, NULL), /* really? */ + PetscDesignatedInitializer(stridegather, VecStrideGather_Default), + PetscDesignatedInitializer(stridescatter, VecStrideScatter_Default), + PetscDesignatedInitializer(dotnorm2, NULL), + PetscDesignatedInitializer(getsubvector, NULL), + PetscDesignatedInitializer(restoresubvector, NULL), + PetscDesignatedInitializer(getarrayread, NULL), + PetscDesignatedInitializer(restorearrayread, NULL), + PetscDesignatedInitializer(stridesubsetgather, VecStrideSubSetGather_Default), + PetscDesignatedInitializer(stridesubsetscatter, VecStrideSubSetScatter_Default), + PetscDesignatedInitializer(viewnative, VecView_MPI), + PetscDesignatedInitializer(loadnative, NULL), + PetscDesignatedInitializer(createlocalvector, NULL), + PetscDesignatedInitializer(getlocalvector, NULL), + PetscDesignatedInitializer(restorelocalvector, NULL), + PetscDesignatedInitializer(getlocalvectorread, NULL), + PetscDesignatedInitializer(restorelocalvectorread, NULL), + PetscDesignatedInitializer(bindtocpu, NULL), + PetscDesignatedInitializer(getarraywrite, NULL), + PetscDesignatedInitializer(restorearraywrite, NULL), + PetscDesignatedInitializer(getarrayandmemtype, NULL), + PetscDesignatedInitializer(restorearrayandmemtype, NULL), + PetscDesignatedInitializer(getarrayreadandmemtype, NULL), + PetscDesignatedInitializer(restorearrayreadandmemtype, NULL), + PetscDesignatedInitializer(getarraywriteandmemtype, NULL), + PetscDesignatedInitializer(restorearraywriteandmemtype, NULL), + PetscDesignatedInitializer(concatenate, NULL), + PetscDesignatedInitializer(sum, NULL), + PetscDesignatedInitializer(setpreallocationcoo, VecSetPreallocationCOO_MPI), + PetscDesignatedInitializer(setvaluescoo, VecSetValuesCOO_MPI), + PetscDesignatedInitializer(errorwnorm, NULL), + PetscDesignatedInitializer(maxpby, NULL), +}; /* VecCreate_MPI_Private - Basic create routine called by VecCreate_MPI() (i.e. VecCreateMPI()), diff --git a/src/vec/vec/impls/seq/bvec2.c b/src/vec/vec/impls/seq/bvec2.c index 4801eb8c38b..49fd5163fd0 100644 --- a/src/vec/vec/impls/seq/bvec2.c +++ b/src/vec/vec/impls/seq/bvec2.c @@ -883,6 +883,7 @@ static struct _VecOps DvOps = { PetscDesignatedInitializer(setpreallocationcoo, VecSetPreallocationCOO_Seq), PetscDesignatedInitializer(setvaluescoo, VecSetValuesCOO_Seq), PetscDesignatedInitializer(errorwnorm, NULL), + PetscDesignatedInitializer(maxpby, NULL), }; /* From 0a82db20497c038dce1fde5c16605eafc5923bcf Mon Sep 17 00:00:00 2001 From: Junchao Zhang Date: Thu, 3 Oct 2024 14:23:22 +0000 Subject: [PATCH 17/59] Log: fix another nan in log view output --- src/sys/logging/handler/impls/default/logdefault.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/sys/logging/handler/impls/default/logdefault.c b/src/sys/logging/handler/impls/default/logdefault.c index 3870cd39925..56c7dfc4731 100644 --- a/src/sys/logging/handler/impls/default/logdefault.c +++ b/src/sys/logging/handler/impls/default/logdefault.c @@ -1662,10 +1662,13 @@ static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, else totml = 0.0; if (maxt != 0.0) flopr = totf / maxt; else flopr = 0.0; - if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) - PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); - else { - if (PetscIsNanReal((PetscReal)maxt)) { // when maxt, ratt, flopr are NaN (i.e., run with GPUs but without -log_view_gpu_time), replace the confusing "nan" with "n/a" + if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) { + if (PetscIsNanReal(maxt)) + PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f n/a n/a %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages n/a", event_name, maxC, ratC, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed)); + else + PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); + } else { + if (PetscIsNanReal(maxt)) { // when maxt, ratt, flopr are NaN (i.e., run with GPUs but without -log_view_gpu_time), replace the confusing "nan" with "n/a" PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f n/a n/a %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f n/a", event_name, maxC, ratC, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed)); } else { PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); @@ -1677,7 +1680,7 @@ static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, else fracgflops = 0.0; if (gmaxt != 0.0) gflopr = gflops / gmaxt; else gflopr = 0.0; - if (PetscIsNanReal((PetscReal)gflopr)) { + if (PetscIsNanReal(gflopr)) { PetscCall(PetscViewerASCIIPrintf(viewer, " n/a %4.0f %3.2e %4.0f %3.2e % 2.0f", cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); } else { PetscCall(PetscViewerASCIIPrintf(viewer, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); From 840d638a4172bd160de4227f851bcc373051da07 Mon Sep 17 00:00:00 2001 From: Junchao Zhang Date: Thu, 3 Oct 2024 14:24:50 +0000 Subject: [PATCH 18/59] Sys: do not use thrust complex when petsc is not configured with GPU Reported-by: langtian.liu@icloud.com --- include/petscsystypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/petscsystypes.h b/include/petscsystypes.h index 64c59a1b01e..0f595de66ea 100644 --- a/include/petscsystypes.h +++ b/include/petscsystypes.h @@ -560,7 +560,7 @@ M*/ #if defined(PETSC_DESIRE_KOKKOS_COMPLEX) /* Defined in petscvec_kokkos.hpp for *.kokkos.cxx files */ #define petsccomplexlib Kokkos #include - #elif defined(__CUDACC__) || defined(__HIPCC__) + #elif (defined(__CUDACC__) && defined(PETSC_HAVE_CUDA)) || (defined(__HIPCC__) && defined(PETSC_HAVE_HIP)) #define petsccomplexlib thrust #include #elif defined(PETSC_USE_REAL___FLOAT128) From 45a61cde458fc79300a6523be9c221dabbc4da6c Mon Sep 17 00:00:00 2001 From: Junchao Zhang Date: Thu, 3 Oct 2024 12:48:56 -0500 Subject: [PATCH 19/59] SYCL: use sycl/sycl.hpp since CL/sycl.hpp is deprecated --- config/BuildSystem/config/setCompilers.py | 2 +- src/sys/objects/device/impls/sycl/syclcontext.sycl.cxx | 2 +- src/sys/objects/device/impls/sycl/sycldevice.sycl.cxx | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/BuildSystem/config/setCompilers.py b/config/BuildSystem/config/setCompilers.py index 4c386de1a28..86a7a29ade8 100644 --- a/config/BuildSystem/config/setCompilers.py +++ b/config/BuildSystem/config/setCompilers.py @@ -1692,7 +1692,7 @@ def checkSYCLPreprocessor(self): for compiler in self.generateSYCLPreprocessorGuesses(): try: if self.getExecutable(compiler, resultName = 'SYCLPP'): - if not self.checkPreprocess('#include \n void testFunction() {return;};'): + if not self.checkPreprocess('#include \n void testFunction() {return;};'): raise RuntimeError('Cannot preprocess SYCL with '+self.SYCLPP+'.') return except RuntimeError as e: diff --git a/src/sys/objects/device/impls/sycl/syclcontext.sycl.cxx b/src/sys/objects/device/impls/sycl/syclcontext.sycl.cxx index 6d7d20ca77b..f4a5a8d2f5a 100644 --- a/src/sys/objects/device/impls/sycl/syclcontext.sycl.cxx +++ b/src/sys/objects/device/impls/sycl/syclcontext.sycl.cxx @@ -1,5 +1,5 @@ #include "sycldevice.hpp" -#include +#include #include namespace Petsc diff --git a/src/sys/objects/device/impls/sycl/sycldevice.sycl.cxx b/src/sys/objects/device/impls/sycl/sycldevice.sycl.cxx index d72782a5756..451bff52531 100644 --- a/src/sys/objects/device/impls/sycl/sycldevice.sycl.cxx +++ b/src/sys/objects/device/impls/sycl/sycldevice.sycl.cxx @@ -3,7 +3,7 @@ #include // for MPI sycl device awareness #include // SIGSEGV #include -#include +#include namespace Petsc { From 5341eb2e3b20bbedb83026cfad75e70f80bee940 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Fri, 4 Oct 2024 15:39:45 +0300 Subject: [PATCH 20/59] PetscViewerDestroy_ExodusII: add missing PetscCalls --- src/dm/impls/plex/exodusii/plexexodusii2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dm/impls/plex/exodusii/plexexodusii2.c b/src/dm/impls/plex/exodusii/plexexodusii2.c index 787b4996842..bb2a877ee49 100644 --- a/src/dm/impls/plex/exodusii/plexexodusii2.c +++ b/src/dm/impls/plex/exodusii/plexexodusii2.c @@ -79,9 +79,9 @@ static PetscErrorCode PetscViewerDestroy_ExodusII(PetscViewer viewer) PetscFunctionBegin; if (exo->exoid >= 0) PetscCallExternal(ex_close, exo->exoid); - for (PetscInt i = 0; i < exo->numZonalVariables; i++) PetscFree(exo->zonalVariableNames[i]); + for (PetscInt i = 0; i < exo->numZonalVariables; i++) PetscCall(PetscFree(exo->zonalVariableNames[i])); PetscCall(PetscFree(exo->zonalVariableNames)); - for (PetscInt i = 0; i < exo->numNodalVariables; i++) PetscFree(exo->nodalVariableNames[i]); + for (PetscInt i = 0; i < exo->numNodalVariables; i++) PetscCall(PetscFree(exo->nodalVariableNames[i])); PetscCall(PetscFree(exo->nodalVariableNames)); PetscCall(PetscFree(exo->filename)); PetscCall(PetscFree(exo)); From 4bf1a0e8ff80104a73c22b827149902ed229e956 Mon Sep 17 00:00:00 2001 From: Hansol Suh Date: Fri, 4 Oct 2024 17:11:37 +0000 Subject: [PATCH 21/59] added missing MATCONSTANTDIAGONAL documentation entry --- src/mat/impls/cdiagonal/cdiagonal.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mat/impls/cdiagonal/cdiagonal.c b/src/mat/impls/cdiagonal/cdiagonal.c index 2cd19b67af5..995bec4b3df 100644 --- a/src/mat/impls/cdiagonal/cdiagonal.c +++ b/src/mat/impls/cdiagonal/cdiagonal.c @@ -290,6 +290,14 @@ PetscErrorCode MatCreateConstantDiagonal(MPI_Comm comm, PetscInt m, PetscInt n, PetscFunctionReturn(PETSC_SUCCESS); } +/*MC + MATCONSTANTDIAGONAL - "constant-diagonal" - A diagonal matrix type with a uniform value + along the diagonal. + + Level: advanced + +.seealso: [](ch_matrices), `Mat`, `MatCreateConstantDiagonal()` +M*/ PETSC_EXTERN PetscErrorCode MatCreate_ConstantDiagonal(Mat A) { Mat_ConstantDiagonal *ctx; From 6dd40b4cf7b5157c903e9c956e892487577bdb5b Mon Sep 17 00:00:00 2001 From: Junchao Zhang Date: Fri, 4 Oct 2024 14:26:29 -0500 Subject: [PATCH 22/59] VecKokkos: fix logging for gpu/cpu memory copy and VecMDot gpu timing PetscLogGpuTime is only used for Kokkos kernel execution, excluding time for moving data to GPU --- .../vec/impls/seq/kokkos/veckok.kokkos.cxx | 40 ++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/vec/vec/impls/seq/kokkos/veckok.kokkos.cxx b/src/vec/vec/impls/seq/kokkos/veckok.kokkos.cxx index 78470c29e91..02212a49c19 100644 --- a/src/vec/vec/impls/seq/kokkos/veckok.kokkos.cxx +++ b/src/vec/vec/impls/seq/kokkos/veckok.kokkos.cxx @@ -23,9 +23,15 @@ static PetscErrorCode VecGetKokkosView_Private(Vec v, PetscScalarKokkosViewType< PetscFunctionBegin; VecErrorIfNotKokkos(v); if (!overwrite) { /* If overwrite=true, no need to sync the space, since caller will overwrite the data */ - auto &exec = PetscGetKokkosExecutionSpace(); - veckok->v_dual.sync(exec); // async call - if (std::is_same_v) exec.fence(); // make sure one can access the host copy immediately + auto &exec = PetscGetKokkosExecutionSpace(); + constexpr bool hostspace = std::is_same_v; + if (hostspace) { + if (veckok->v_dual.need_sync_host()) PetscCall(PetscLogGpuToCpu(veckok->v_dual.extent(0) * sizeof(PetscScalar))); + } else { + if (veckok->v_dual.need_sync_device()) PetscCall(PetscLogCpuToGpu(veckok->v_dual.extent(0) * sizeof(PetscScalar))); + } + veckok->v_dual.sync(exec); // async call + if (hostspace) exec.fence(); // make sure one can access the host copy immediately } *kv = veckok->v_dual.view(); PetscFunctionReturn(PETSC_SUCCESS); @@ -47,13 +53,19 @@ static PetscErrorCode VecRestoreKokkosView_Private(Vec v, PetscScalarKokkosViewT template PetscErrorCode VecGetKokkosView(Vec v, ConstPetscScalarKokkosViewType *kv) { - Vec_Kokkos *veckok = static_cast(v->spptr); - auto &exec = PetscGetKokkosExecutionSpace(); + Vec_Kokkos *veckok = static_cast(v->spptr); + auto &exec = PetscGetKokkosExecutionSpace(); + constexpr bool hostspace = std::is_same_v; PetscFunctionBegin; VecErrorIfNotKokkos(v); + if (hostspace) { + if (veckok->v_dual.need_sync_host()) PetscCall(PetscLogGpuToCpu(veckok->v_dual.extent(0) * sizeof(PetscScalar))); + } else { + if (veckok->v_dual.need_sync_device()) PetscCall(PetscLogCpuToGpu(veckok->v_dual.extent(0) * sizeof(PetscScalar))); + } veckok->v_dual.sync(exec); - if (std::is_same_v) exec.fence(); // make sure one can access the host copy immediately + if (hostspace) exec.fence(); // make sure one can access the host copy immediately *kv = veckok->v_dual.view(); PetscFunctionReturn(PETSC_SUCCESS); } @@ -354,7 +366,9 @@ PetscErrorCode VecMultiDot_Private(Vec xin, PetscInt nv, const Vec yin[], PetscS for (i = 0; i < ngroup; i++) { /* 8 y's per group */ for (j = 0; j < 8; j++) PetscCall(VecGetKokkosView(yin[cur + j], &yv[j])); MDotFunctor<8> mdot(xv, yv[0], yv[1], yv[2], yv[3], yv[4], yv[5], yv[6], yv[7]); /* Hope Kokkos make it asynchronous */ + PetscCall(PetscLogGpuTimeBegin()); PetscCallCXX(Kokkos::parallel_reduce(Kokkos::RangePolicy(exec, 0, N), mdot, Kokkos::subview(zv, Kokkos::pair(cur, cur + 8)))); + PetscCall(PetscLogGpuTimeEnd()); for (j = 0; j < 8; j++) PetscCall(VecRestoreKokkosView(yin[cur + j], &yv[j])); cur += 8; } @@ -364,6 +378,7 @@ PetscErrorCode VecMultiDot_Private(Vec xin, PetscInt nv, const Vec yin[], PetscS Kokkos::RangePolicy policy(exec, 0, N); auto results = Kokkos::subview(zv, Kokkos::pair(cur, cur + rem)); // clang-format off + PetscCall(PetscLogGpuTimeBegin()); switch (rem) { case 1: PetscCallCXX(Kokkos::parallel_reduce(policy, MDotFunctor<1>(xv, yv[0], yv[1], yv[2], yv[3], yv[4], yv[5], yv[6], yv[7]), results)); break; case 2: PetscCallCXX(Kokkos::parallel_reduce(policy, MDotFunctor<2>(xv, yv[0], yv[1], yv[2], yv[3], yv[4], yv[5], yv[6], yv[7]), results)); break; @@ -373,6 +388,7 @@ PetscErrorCode VecMultiDot_Private(Vec xin, PetscInt nv, const Vec yin[], PetscS case 6: PetscCallCXX(Kokkos::parallel_reduce(policy, MDotFunctor<6>(xv, yv[0], yv[1], yv[2], yv[3], yv[4], yv[5], yv[6], yv[7]), results)); break; case 7: PetscCallCXX(Kokkos::parallel_reduce(policy, MDotFunctor<7>(xv, yv[0], yv[1], yv[2], yv[3], yv[4], yv[5], yv[6], yv[7]), results)); break; } + PetscCall(PetscLogGpuTimeEnd()); // clang-format on for (j = 0; j < rem; j++) PetscCall(VecRestoreKokkosView(yin[cur + j], &yv[j])); } @@ -401,12 +417,14 @@ static PetscErrorCode VecMultiDot_Verbose(Vec xin, PetscInt nv, const Vec yin[], PetscCall(VecGetKokkosView(yp[5], &y5)); PetscCall(VecGetKokkosView(yp[6], &y6)); PetscCall(VecGetKokkosView(yp[7], &y7)); + PetscCall(PetscLogGpuTimeBegin()); // only for GPU kernel execution Kokkos::parallel_reduce( "VecMDot8", policy, KOKKOS_LAMBDA(const PetscInt &i, PetscScalar &lsum0, PetscScalar &lsum1, PetscScalar &lsum2, PetscScalar &lsum3, PetscScalar &lsum4, PetscScalar &lsum5, PetscScalar &lsum6, PetscScalar &lsum7) { lsum0 += xv(i) * PetscConj(y0(i)); lsum1 += xv(i) * PetscConj(y1(i)); lsum2 += xv(i) * PetscConj(y2(i)); lsum3 += xv(i) * PetscConj(y3(i)); lsum4 += xv(i) * PetscConj(y4(i)); lsum5 += xv(i) * PetscConj(y5(i)); lsum6 += xv(i) * PetscConj(y6(i)); lsum7 += xv(i) * PetscConj(y7(i)); }, zp[0], zp[1], zp[2], zp[3], zp[4], zp[5], zp[6], zp[7]); + PetscCall(PetscLogGpuTimeEnd()); PetscCall(VecRestoreKokkosView(yp[0], &y0)); PetscCall(VecRestoreKokkosView(yp[1], &y1)); PetscCall(VecRestoreKokkosView(yp[2], &y2)); @@ -427,6 +445,7 @@ static PetscErrorCode VecMultiDot_Verbose(Vec xin, PetscInt nv, const Vec yin[], if (rem > 4) PetscCall(VecGetKokkosView(yp[4], &y4)); if (rem > 5) PetscCall(VecGetKokkosView(yp[5], &y5)); if (rem > 6) PetscCall(VecGetKokkosView(yp[6], &y6)); + PetscCall(PetscLogGpuTimeBegin()); switch (rem) { case 7: Kokkos::parallel_reduce( @@ -481,6 +500,7 @@ static PetscErrorCode VecMultiDot_Verbose(Vec xin, PetscInt nv, const Vec yin[], }, zp[0]); break; } + PetscCall(PetscLogGpuTimeEnd()); if (rem > 0) PetscCall(VecRestoreKokkosView(yp[0], &y0)); if (rem > 1) PetscCall(VecRestoreKokkosView(yp[1], &y1)); if (rem > 2) PetscCall(VecRestoreKokkosView(yp[2], &y2)); @@ -498,7 +518,6 @@ static PetscErrorCode VecMultiDot_Verbose(Vec xin, PetscInt nv, const Vec yin[], PetscErrorCode VecMDot_SeqKokkos(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z) { PetscFunctionBegin; - PetscCall(PetscLogGpuTimeBegin()); // With no good reason, VecMultiDot_Private() performs much worse than VecMultiDot_Verbose() with HIP, // but they are on par with CUDA. Kokkos team is investigating this problem. #if 0 @@ -506,7 +525,7 @@ PetscErrorCode VecMDot_SeqKokkos(Vec xin, PetscInt nv, const Vec yin[], PetscSca #else PetscCall(VecMultiDot_Verbose(xin, nv, yin, z)); #endif - PetscCall(PetscLogGpuTimeEnd()); + PetscCall(PetscLogGpuToCpu(nv * sizeof(PetscScalar))); // for copying to z[] on host PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0))); PetscFunctionReturn(PETSC_SUCCESS); } @@ -515,9 +534,8 @@ PetscErrorCode VecMDot_SeqKokkos(Vec xin, PetscInt nv, const Vec yin[], PetscSca PetscErrorCode VecMTDot_SeqKokkos(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z) { PetscFunctionBegin; - PetscCall(PetscLogGpuTimeBegin()); PetscCall(VecMultiDot_Private(xin, nv, yin, z)); - PetscCall(PetscLogGpuTimeEnd()); + PetscCall(PetscLogGpuToCpu(nv * sizeof(PetscScalar))); // for copying to z[] on host PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * xin->map->n - 1), 0.0))); PetscFunctionReturn(PETSC_SUCCESS); } @@ -598,6 +616,7 @@ PetscErrorCode VecMDot_SeqKokkos_GEMV(Vec xin, PetscInt nv, const Vec yin[], Pet { PetscFunctionBegin; PetscCall(VecMultiDot_SeqKokkos_GEMV(PETSC_TRUE, xin, nv, yin, z)); // conjugate + PetscCall(PetscLogGpuToCpu(nv * sizeof(PetscScalar))); // for copying to z[] on host PetscFunctionReturn(PETSC_SUCCESS); } @@ -605,6 +624,7 @@ PetscErrorCode VecMTDot_SeqKokkos_GEMV(Vec xin, PetscInt nv, const Vec yin[], Pe { PetscFunctionBegin; PetscCall(VecMultiDot_SeqKokkos_GEMV(PETSC_FALSE, xin, nv, yin, z)); // transpose + PetscCall(PetscLogGpuToCpu(nv * sizeof(PetscScalar))); // for copying to z[] on host PetscFunctionReturn(PETSC_SUCCESS); } From 2f54a4926a740b813b3b24497281ee2c56d556f7 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sat, 5 Oct 2024 15:44:25 +0300 Subject: [PATCH 23/59] makefile: reverse linking order for static libraries --- gmakefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gmakefile b/gmakefile index 75679d127ba..6b9e23fef20 100644 --- a/gmakefile +++ b/gmakefile @@ -9,6 +9,7 @@ OBJDIR := $(PETSC_ARCH)/obj LIBDIR := $(PETSC_ARCH)/lib pkgs := sys vec mat dm ksp snes ts tao +pkgs_reverse := tao ts snes ksp dm mat vec sys # $(call SONAME_FUNCTION,libfoo,abiversion) SONAME_FUNCTION ?= $(1).$(SL_LINKER_SUFFIX).$(2) @@ -34,7 +35,7 @@ libpetsc_static := $(LIBDIR)/libpetsc.$(AR_LIB_SUFFIX) libpetscpkgs_shared := $(foreach pkg, $(pkgs), $(LIBDIR)/libpetsc$(pkg).$(SL_LINKER_SUFFIX)) libpetscpkgs_soname := $(foreach pkg, $(pkgs), $(call soname_function,$(LIBDIR)/libpetsc$(pkg))) libpetscpkgs_libname := $(foreach pkg, $(pkgs), $(call libname_function,$(LIBDIR)/libpetsc$(pkg))) -libpetscpkgs_static := $(foreach pkg, $(pkgs), $(LIBDIR)/libpetsc$(pkg).$(AR_LIB_SUFFIX)) +libpetscpkgs_static := $(foreach pkg, $(pkgs_reverse), $(LIBDIR)/libpetsc$(pkg).$(AR_LIB_SUFFIX)) ifeq ($(PETSC_WITH_EXTERNAL_LIB),) libpetscall_shared := $(libpetscpkgs_shared) From 0ef292d34d72d2286c645fabc9b92dc1f6c2aa8c Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sat, 5 Oct 2024 20:28:53 +0300 Subject: [PATCH 24/59] configure: detect GNU address sanitizer disable PetscCheckPointer when using a sanitizer --- config/PETSc/Configure.py | 2 ++ src/ksp/ksp/tests/ex86f.F90 | 8 ++++---- src/snes/tutorials/ex19.c | 2 +- src/sys/error/checkptr.c | 7 +++++-- src/sys/tests/ex1.c | 2 +- src/sys/tests/ex66.c | 2 +- src/sys/tests/ex77.c | 2 +- src/ts/tests/ex26.c | 2 +- 8 files changed, 16 insertions(+), 11 deletions(-) diff --git a/config/PETSc/Configure.py b/config/PETSc/Configure.py index 2d4fad7fcd8..b0251447bbb 100644 --- a/config/PETSc/Configure.py +++ b/config/PETSc/Configure.py @@ -617,6 +617,8 @@ def configureSanitize(self): '''Checks if fsanitize is supported''' if self.checkLink('#if defined(__has_feature)\n#if !__has_feature(address_sanitizer)\nGarbage\n#endif\n#else\nGarbage\n#endif\n'): self.addDefine('HAVE_SANITIZER', '1') + elif self.checkLink('#if !defined(__SANITIZE_ADDRESS__)\nGarbage\n#endif\n'): + self.addDefine('HAVE_SANITIZER', '1') def configureUnused(self): '''Sees if __attribute((unused)) is supported''' diff --git a/src/ksp/ksp/tests/ex86f.F90 b/src/ksp/ksp/tests/ex86f.F90 index 3a72605f8e7..32ffc64ceb3 100644 --- a/src/ksp/ksp/tests/ex86f.F90 +++ b/src/ksp/ksp/tests/ex86f.F90 @@ -40,26 +40,26 @@ program main !/*TEST ! ! test: -! requires: defined(PETSC_USE_DEBUG) !defined(PETSCTEST_VALGRIND) defined(PETSC_HAVE_FORTRAN_FREE_LINE_LENGTH_NONE) +! requires: defined(PETSC_USE_DEBUG) !defined(PETSCTEST_VALGRIND) defined(PETSC_HAVE_FORTRAN_FREE_LINE_LENGTH_NONE) !defined(PETSC_HAVE_SANITIZER) ! args: -petsc_ci_portable_error_output -error_output_stdout -test 1 ! filter: grep -E "(PETSC ERROR)" | sed s"?KSPCREATE?kspcreate_?" ! ! test: ! suffix: 2 -! requires: !defined(PETSCTEST_VALGRIND) defined(PETSC_HAVE_FORTRAN_FREE_LINE_LENGTH_NONE) +! requires: !defined(PETSCTEST_VALGRIND) defined(PETSC_HAVE_FORTRAN_FREE_LINE_LENGTH_NONE) !defined(PETSC_HAVE_SANITIZER) ! args: -petsc_ci_portable_error_output -error_output_stdout -test 2 ! filter: grep -E "(PETSC ERROR)"| sed s"?KSPCREATE?kspcreate_?" ! ! test: ! suffix: 3 -! requires: !defined(PETSCTEST_VALGRIND) defined(PETSC_HAVE_FORTRAN_FREE_LINE_LENGTH_NONE) +! requires: !defined(PETSCTEST_VALGRIND) defined(PETSC_HAVE_FORTRAN_FREE_LINE_LENGTH_NONE) !defined(PETSC_HAVE_SANITIZER) ! args: -petsc_ci_portable_error_output -error_output_stdout -test 3 ! filter: grep -E "(PETSC ERROR)" | sed s"?KSPCREATE?kspcreate_?" ! ! ! test: ! suffix: 4 -! requires: !defined(PETSCTEST_VALGRIND) defined(PETSC_HAVE_FORTRAN_FREE_LINE_LENGTH_NONE) +! requires: !defined(PETSCTEST_VALGRIND) defined(PETSC_HAVE_FORTRAN_FREE_LINE_LENGTH_NONE) !defined(PETSC_HAVE_SANITIZER) ! args: -petsc_ci_portable_error_output -error_output_stdout -test 4 ! filter: grep -E "(PETSC ERROR)" | sed s"?KSPDESTROY?kspdestroy_?" ! diff --git a/src/snes/tutorials/ex19.c b/src/snes/tutorials/ex19.c index afc1739e605..ff30bafd7d6 100644 --- a/src/snes/tutorials/ex19.c +++ b/src/snes/tutorials/ex19.c @@ -1226,7 +1226,7 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) test: suffix: failure_size nsize: 1 - requires: !defined(PETSC_USE_64BIT_INDICES) !defined(PETSCTEST_VALGRIND) + requires: !defined(PETSC_USE_64BIT_INDICES) !defined(PETSCTEST_VALGRIND) !defined(PETSC_HAVE_SANITIZER) args: -da_refine 100 -petsc_ci_portable_error_output -error_output_stdout filter: grep -E -v "(memory block|leaked context|not freed before MPI_Finalize|Could be the program crashed)" diff --git a/src/sys/error/checkptr.c b/src/sys/error/checkptr.c index c736d6d4363..ac23a60d856 100644 --- a/src/sys/error/checkptr.c +++ b/src/sys/error/checkptr.c @@ -67,8 +67,10 @@ void PetscSignalSegvCheckPointerOrMpi(void) Level: developer - Note: - This is a non-standard PETSc function in that it returns the result and does not return an error code + Notes: + This is a non-standard PETSc function in that it returns the result and does not return an error code. + + This function always returns true when running under Valgrind, or when compiled with asan options. .seealso: `PetscCheckPointerSetIntensity()` @*/ @@ -77,6 +79,7 @@ PetscBool PetscCheckPointer(const void *ptr, PetscDataType dtype) if (PETSC_RUNNING_ON_VALGRIND) return PETSC_TRUE; if (!ptr) return PETSC_FALSE; if (petsc_checkpointer_intensity < 1) return PETSC_TRUE; + if (PetscDefined(HAVE_SANITIZER)) return PETSC_TRUE; #if PetscDefined(USE_DEBUG) && !PetscDefined(HAVE_THREADSAFETY) /* Skip the verbose check if we are inside a hot function. */ diff --git a/src/sys/tests/ex1.c b/src/sys/tests/ex1.c index a8d3dd3288a..cdc5984c304 100644 --- a/src/sys/tests/ex1.c +++ b/src/sys/tests/ex1.c @@ -25,7 +25,7 @@ int main(int argc, char **argv) # Testing errors so only look for errors test: - requires: !defined(PETSCTEST_VALGRIND) + requires: !defined(PETSCTEST_VALGRIND) !defined(PETSC_HAVE_SANITIZER) args: -petsc_ci_portable_error_output -error_output_stdout nsize: {{1 2 3}} filter: grep -E "(PETSC ERROR)" | egrep "(Error Created|CreateError\(\)|main\(\))" diff --git a/src/sys/tests/ex66.c b/src/sys/tests/ex66.c index 16a6dfb55be..9c61386a8cf 100644 --- a/src/sys/tests/ex66.c +++ b/src/sys/tests/ex66.c @@ -30,7 +30,7 @@ int main(int argc, char **argv) /*TEST test: - requires: !defined(PETSCTEST_VALGRIND) + requires: !defined(PETSCTEST_VALGRIND) !defined(PETSC_HAVE_SANITIZER) args: -petsc_ci_portable_error_output -error_output_stdout filter: grep -E -v "(memory block|leaked context|not freed before MPI_Finalize|Could be the program crashed|PETSc Option Table entries|source: environment)" diff --git a/src/sys/tests/ex77.c b/src/sys/tests/ex77.c index 6531d11e538..e13cf47c4dc 100644 --- a/src/sys/tests/ex77.c +++ b/src/sys/tests/ex77.c @@ -18,7 +18,7 @@ int main(int argc, char **args) /*TEST test: - requires: defined(PETSC_USE_DEBUG) !defined(PETSCTEST_VALGRIND) + requires: defined(PETSC_USE_DEBUG) !defined(PETSCTEST_VALGRIND) !defined(PETSC_HAVE_SANITIZER) args: -petsc_ci_portable_error_output -error_output_stdout nsize: 2 filter: grep -E "(PETSC ERROR)" | egrep "(Error Created|CreateError\(\)|main\(\))" diff --git a/src/ts/tests/ex26.c b/src/ts/tests/ex26.c index f891547728d..5195167e69b 100644 --- a/src/ts/tests/ex26.c +++ b/src/ts/tests/ex26.c @@ -93,7 +93,7 @@ PetscErrorCode IJacobian(TS ts, PetscReal t, Vec x, Vec xdot, PetscReal shift, M test: suffix: arkimex_explicit_stage - requires: !defined(PETSCTEST_VALGRIND) defined(PETSC_USE_DEBUG) + requires: !defined(PETSCTEST_VALGRIND) defined(PETSC_USE_DEBUG) !defined(PETSC_HAVE_SANITIZER) args: -ts_type arkimex -petsc_ci_portable_error_output -error_output_stdout -set_implicit filter: grep -E -v "(memory block|leaked context|not freed before MPI_Finalize|Could be the program crashed)" From b7de911b5697711da9ce58b0d8e3cc5532320f7f Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sat, 5 Oct 2024 22:16:47 +0300 Subject: [PATCH 25/59] PetscGlobalMinMaxInt: fix asan warning runtime error: negation of -2147483648 cannot be represented in type 'PetscInt' (aka 'int'); cast to an unsigned type to negate this value to itself --- share/petsc/suppressions/ubsan | 1 - src/sys/utils/mpiu.c | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/share/petsc/suppressions/ubsan b/share/petsc/suppressions/ubsan index a11cc71bc14..ecd22b9d4c2 100644 --- a/share/petsc/suppressions/ubsan +++ b/share/petsc/suppressions/ubsan @@ -42,7 +42,6 @@ shift-base:MCJPMinColor_Private pointer-overflow:DMDAGetArray -signed-integer-overflow:PetscGlobalMinMaxInt signed-integer-overflow:*HMapIJKLRemote* alignment:DMNetworkGetIndex diff --git a/src/sys/utils/mpiu.c b/src/sys/utils/mpiu.c index efe4e166bad..82e6dc5198e 100644 --- a/src/sys/utils/mpiu.c +++ b/src/sys/utils/mpiu.c @@ -148,12 +148,13 @@ PetscErrorCode PetscSequentialPhaseEnd(MPI_Comm comm, int ng) @*/ PetscErrorCode PetscGlobalMinMaxInt(MPI_Comm comm, const PetscInt minMaxVal[2], PetscInt minMaxValGlobal[2]) { - PetscInt sendbuf[3], recvbuf[3]; + PetscInt sendbuf[3], recvbuf[3]; + PetscBool hasminint = (PetscBool)(minMaxVal[0] == PETSC_MIN_INT); PetscFunctionBegin; - sendbuf[0] = -minMaxVal[0]; /* Note that -PETSC_INT_MIN = PETSC_INT_MIN */ + sendbuf[0] = hasminint ? PETSC_MIN_INT : -minMaxVal[0]; /* Note that -PETSC_INT_MIN = PETSC_INT_MIN: ternary to suppress sanitizer warnings */ sendbuf[1] = minMaxVal[1]; - sendbuf[2] = (minMaxVal[0] == PETSC_INT_MIN) ? 1 : 0; /* Are there PETSC_INT_MIN in minMaxVal[0]? */ + sendbuf[2] = hasminint ? 1 : 0; /* Are there PETSC_INT_MIN in minMaxVal[0]? */ PetscCallMPI(MPIU_Allreduce(sendbuf, recvbuf, 3, MPIU_INT, MPI_MAX, comm)); minMaxValGlobal[0] = recvbuf[2] ? PETSC_INT_MIN : -recvbuf[0]; minMaxValGlobal[1] = recvbuf[1]; From d9dd0fdc23c100de1e8d76f38a53292936f51a28 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sat, 5 Oct 2024 22:15:36 +0300 Subject: [PATCH 26/59] DMPlexInterpolatePointSF: fix asan warnings runtime error: signed integer overflow: 2147483647 + 2147483647 cannot be represented in type 'int' --- share/petsc/suppressions/ubsan | 2 -- src/dm/impls/plex/plexinterpolate.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/share/petsc/suppressions/ubsan b/share/petsc/suppressions/ubsan index ecd22b9d4c2..71a95947eef 100644 --- a/share/petsc/suppressions/ubsan +++ b/share/petsc/suppressions/ubsan @@ -42,8 +42,6 @@ shift-base:MCJPMinColor_Private pointer-overflow:DMDAGetArray -signed-integer-overflow:*HMapIJKLRemote* - alignment:DMNetworkGetIndex alignment:DMNetworkGetNumComponents alignment:DMNetworkGetLocalVecOffset diff --git a/src/dm/impls/plex/plexinterpolate.c b/src/dm/impls/plex/plexinterpolate.c index 8ea3199ce2f..496b27f3111 100644 --- a/src/dm/impls/plex/plexinterpolate.c +++ b/src/dm/impls/plex/plexinterpolate.c @@ -1309,7 +1309,7 @@ PetscErrorCode DMPlexInterpolatePointSF(DM dm, PetscSF pointSF) const PetscSFNode rface = candidatesRemote[hidx + 1]; const PetscSFNode *fcone = &candidatesRemote[hidx + 2]; PetscSFNode fcp0; - const PetscSFNode pmax = {PETSC_INT_MAX, PETSC_MPI_INT_MAX}; + const PetscSFNode pmax = {-1, -1}; const PetscInt *join = NULL; PetscHMapIJKLRemoteKey key; PetscHashIter iter; @@ -1379,7 +1379,7 @@ PetscErrorCode DMPlexInterpolatePointSF(DM dm, PetscSF pointSF) const PetscInt Np = candidatesRemote[hidx].index + 1; const PetscSFNode *fcone = &candidatesRemote[hidx + 2]; PetscSFNode fcp0; - const PetscSFNode pmax = {PETSC_INT_MAX, PETSC_MPI_INT_MAX}; + const PetscSFNode pmax = {-1, -1}; PetscHMapIJKLRemoteKey key; PetscHashIter iter; PetscBool missing; From a680e639d71d642419255e23f49c4610963f3538 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sat, 5 Oct 2024 22:27:59 +0300 Subject: [PATCH 27/59] DMDAVecGetArray: fix asan warning src/dm/impls/da/dalocal.c:401:35: runtime error: subtraction of unsigned offset from 0x61d00006a0d0 overflowed to 0x61d00006a0e0 --- share/petsc/suppressions/ubsan | 2 -- src/dm/impls/da/dalocal.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/share/petsc/suppressions/ubsan b/share/petsc/suppressions/ubsan index 71a95947eef..4ffbca83bff 100644 --- a/share/petsc/suppressions/ubsan +++ b/share/petsc/suppressions/ubsan @@ -40,8 +40,6 @@ function:PFApply shift-base:MCJPMinColor_Private -pointer-overflow:DMDAGetArray - alignment:DMNetworkGetIndex alignment:DMNetworkGetNumComponents alignment:DMNetworkGetLocalVecOffset diff --git a/src/dm/impls/da/dalocal.c b/src/dm/impls/da/dalocal.c index 011c3a9adf5..f0f5a6afd2d 100644 --- a/src/dm/impls/da/dalocal.c +++ b/src/dm/impls/da/dalocal.c @@ -398,7 +398,7 @@ PetscErrorCode DMDAGetArray(DM da, PetscBool ghosted, void *vptr) PetscCall(PetscMalloc(xm * sizeof(PetscScalar), &iarray_start)); - ptr = (void *)(iarray_start - xs * sizeof(PetscScalar)); + ptr = (void *)((PetscScalar *)iarray_start - xs); *iptr = (void *)ptr; break; } From db68c2149926a04787c623231ab8716159e04ec6 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sat, 5 Oct 2024 22:42:54 +0300 Subject: [PATCH 28/59] SPARSEPACKgenrcm: fix asan warnings # > /home/szampini/Devel/petsc/src/mat/graphops/order/genrcm.c:50:3: runtime error: applying non-zero offset 18446744073709551612 to null pointer # > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/szampini/Devel/petsc/src/mat/graphops/order/genrcm.c:50:3 in # > /home/szampini/Devel/petsc/src/mat/graphops/order/genrcm.c:66:5: runtime error: applying non-zero offset to non-null pointer 0xfffffffffffffffc produced null pointer # > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/szampini/Devel/petsc/src/mat/graphops/order/genrcm.c:66:5 in # > /home/szampini/Devel/petsc/src/mat/graphops/order/fnroot.c:46:3: runtime error: applying non-zero offset 18446744073709551612 to null pointer # > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/szampini/Devel/petsc/src/mat/graphops/order/fnroot.c:46:3 in # > /home/szampini/Devel/petsc/src/mat/graphops/order/fnroot.c:49:3: runtime error: applying non-zero offset to non-null pointer 0xfffffffffffffffc produced null pointer # > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/szampini/Devel/petsc/src/mat/graphops/order/fnroot.c:49:3 in # > /home/szampini/Devel/petsc/src/mat/graphops/order/rootls.c:39:3: runtime error: applying non-zero offset 18446744073709551612 to null pointer # > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/szampini/Devel/petsc/src/mat/graphops/order/rootls.c:39:3 in # > /home/szampini/Devel/petsc/src/mat/graphops/order/genrcm.c:67:5: runtime error: applying non-zero offset to non-null pointer 0xfffffffffffffffc produced null pointer # > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/szampini/Devel/petsc/src/mat/graphops/order/genrcm.c:67:5 in # > /home/szampini/Devel/petsc/src/mat/graphops/order/rcm.c:58:3: runtime error: applying non-zero offset 18446744073709551612 to null pointer # > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/szampini/Devel/petsc/src/mat/graphops/order/rcm.c:58:3 in # > /home/szampini/Devel/petsc/src/mat/graphops/order/rcm.c:61:3: runtime error: applying non-zero offset to non-null pointer 0xfffffffffffffffc produced null pointer # > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/szampini/Devel/petsc/src/mat/graphops/order/rcm.c:61:3 in # > /home/szampini/Devel/petsc/src/mat/graphops/order/degree.c:43:3: runtime error: applying non-zero offset 18446744073709551612 to null pointer # > SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/szampini/Devel/petsc/src/mat/graphops/order/degree.c:43:3 in --- share/petsc/suppressions/ubsan | 5 ----- src/mat/graphops/order/genrcm.c | 8 ++++++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/share/petsc/suppressions/ubsan b/share/petsc/suppressions/ubsan index 4ffbca83bff..517d94103b1 100644 --- a/share/petsc/suppressions/ubsan +++ b/share/petsc/suppressions/ubsan @@ -62,11 +62,6 @@ function:PCTFS_giop function:PCTFS_giop_hc function:PCTFS_grop_hc function:do_matvec -pointer-overflow:SPARSEPACKgenrcm -pointer-overflow:SPARSEPACKfnroot -pointer-overflow:SPARSEPACKrootls -pointer-overflow:SPARSEPACKrcm -pointer-overflow:SPARSEPACKdegree # files for MPICH and Open MPI function:src/mpi/* diff --git a/src/mat/graphops/order/genrcm.c b/src/mat/graphops/order/genrcm.c index 712f6353b1b..cde4ed492c3 100644 --- a/src/mat/graphops/order/genrcm.c +++ b/src/mat/graphops/order/genrcm.c @@ -43,6 +43,14 @@ PetscErrorCode SPARSEPACKgenrcm(const PetscInt *neqns, const PetscInt *xadj, con PetscInt num; PetscFunctionBegin; + if (!*neqns) PetscFunctionReturn(PETSC_SUCCESS); + if (*neqns == 1) { + perm[0] = 1; + mask[0] = 1; + xls[0] = 1; + PetscFunctionReturn(PETSC_SUCCESS); + } + /* Parameter adjustments */ --xls; --mask; From 887451b318e06dde835ccb78746a4a1895b5db21 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sun, 6 Oct 2024 00:26:38 +0300 Subject: [PATCH 29/59] PFString: fix asan warnings --- share/petsc/suppressions/ubsan | 1 - src/vec/pf/impls/string/cstring.c | 2 +- src/vec/pf/impls/string/cstringbase.template | 9 +++------ 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/share/petsc/suppressions/ubsan b/share/petsc/suppressions/ubsan index 517d94103b1..5274d70a0af 100644 --- a/share/petsc/suppressions/ubsan +++ b/share/petsc/suppressions/ubsan @@ -36,7 +36,6 @@ function:TSAdjointMonitorCancel function:TaoMonitorCancel function:PetscFVIntegrateRHSFunction_Upwind function:PetscFVIntegrateRHSFunction_LeastSquares -function:PFApply shift-base:MCJPMinColor_Private diff --git a/src/vec/pf/impls/string/cstring.c b/src/vec/pf/impls/string/cstring.c index 12411e8a0d0..5ca6f1f70d2 100644 --- a/src/vec/pf/impls/string/cstring.c +++ b/src/vec/pf/impls/string/cstring.c @@ -79,7 +79,7 @@ PetscErrorCode PFStringSetFunction(PF pf, const char string[]) PetscCall(PetscObjectGetComm((PetscObject)pf, &comm)); } PetscCall(PetscOptionsGetBool(((PetscObject)pf)->options, ((PetscObject)pf)->prefix, "-pf_string_keep_files", &keeptmpfiles, NULL)); - PetscCall(PetscSNPrintf(task, PETSC_STATIC_ARRAY_LENGTH(task), "cd %s ; if [ ! -d ${USERNAME} ]; then mkdir ${USERNAME}; fi ; cd ${USERNAME} ; rm -f makefile petscdlib.* ; cp -f ${PETSC_DIR}/src/vec/pf/impls/string/makefile ./makefile ; ${PETSC_MAKE} NIN=%" PetscInt_FMT " NOUT=%" PetscInt_FMT " -f makefile libpetscdlib STRINGFUNCTION=\"%s\" %s ; sync\n", tmp, pf->dimin, pf->dimout, string, keeptmpfiles ? "; rm -f makefile petscdlib.c" : "")); + PetscCall(PetscSNPrintf(task, PETSC_STATIC_ARRAY_LENGTH(task), "cd %s ; if [ ! -d ${USERNAME} ]; then mkdir ${USERNAME}; fi ; cd ${USERNAME} ; rm -f makefile petscdlib.* ; cp -f ${PETSC_DIR}/src/vec/pf/impls/string/makefile ./makefile ; ${PETSC_MAKE} NIN=%" PetscInt_FMT " NOUT=%" PetscInt_FMT " -f makefile libpetscdlib STRINGFUNCTION=\"%s\" %s ; sync\n", tmp, pf->dimin, pf->dimout, string, !keeptmpfiles ? "; rm -f makefile petscdlib.c" : "")); PetscCall(PetscPOpen(comm, NULL, task, "r", &fd)); PetscCall(PetscPClose(comm, fd)); diff --git a/src/vec/pf/impls/string/cstringbase.template b/src/vec/pf/impls/string/cstringbase.template index 6197cd35378..223653030f6 100644 --- a/src/vec/pf/impls/string/cstringbase.template +++ b/src/vec/pf/impls/string/cstringbase.template @@ -1,10 +1,8 @@ -#define PETSCVEC_DLL #include -#define NOTLIKELY -1.234567890123 +#define NOTLIKELY PETSC_INFINITY -EXTERN_C_BEGIN -int PETSC_VISIBILITY_PUBLIC PFApply_String(void *value,PetscInt n,const PetscScalar *in,PetscScalar *out) +PETSC_EXTERN PetscErrorCode PFApply_String(void *value,PetscInt n,const PetscScalar *in,PetscScalar *out) { PetscInt i; PetscScalar x,y,z,f = NOTLIKELY,x1 = 0,x2 = 0,x3 = 0,x4 = 0,x5 = 0; @@ -57,6 +55,5 @@ int PETSC_VISIBILITY_PUBLIC PFApply_String(void *value,PetscInt n,const PetscSca out[_NOUT_*i+4] = f5; #endif } - return(0); + return(PETSC_SUCCESS); } -EXTERN_C_END From 9578c1cc5da6bf6ab08d91dc6659c159c237eb9b Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sun, 6 Oct 2024 11:16:04 +0300 Subject: [PATCH 30/59] SNES ex58: fix asan warnings --- src/snes/tutorials/ex58.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/snes/tutorials/ex58.c b/src/snes/tutorials/ex58.c index 64752095821..1f30f5ef6c9 100644 --- a/src/snes/tutorials/ex58.c +++ b/src/snes/tutorials/ex58.c @@ -45,8 +45,8 @@ typedef struct { /* -------- User-defined Routines --------- */ -extern PetscErrorCode FormBoundaryConditions(SNES, AppCtx **); -extern PetscErrorCode DestroyBoundaryConditions(AppCtx **); +extern PetscErrorCode FormBoundaryConditions(SNES, void **); +extern PetscErrorCode DestroyBoundaryConditions(void **); extern PetscErrorCode ComputeInitialGuess(SNES, Vec, void *); extern PetscErrorCode FormGradient(SNES, Vec, Vec, void *); extern PetscErrorCode FormJacobian(SNES, Vec, Mat, Mat, void *); @@ -82,7 +82,7 @@ int main(int argc, char **argv) PetscCall(SNESSetFunction(snes, r, FormGradient, NULL)); PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, NULL)); - PetscCall(SNESSetComputeApplicationContext(snes, (PetscErrorCode (*)(SNES, void **))FormBoundaryConditions, (PetscErrorCode (*)(void **))DestroyBoundaryConditions)); + PetscCall(SNESSetComputeApplicationContext(snes, FormBoundaryConditions, DestroyBoundaryConditions)); PetscCall(SNESSetComputeInitialGuess(snes, ComputeInitialGuess, NULL)); @@ -457,7 +457,7 @@ PetscErrorCode FormJacobian(SNES snes, Vec X, Mat H, Mat tHPre, void *ptr) Output Parameter: . user - user-defined application context */ -PetscErrorCode FormBoundaryConditions(SNES snes, AppCtx **ouser) +PetscErrorCode FormBoundaryConditions(SNES snes, void **inctx) { PetscInt i, j, k, limit = 0, maxits = 5; PetscInt mx, my; @@ -468,7 +468,7 @@ PetscErrorCode FormBoundaryConditions(SNES snes, AppCtx **ouser) PetscScalar u1, u2, nf1, nf2, njac11, njac12, njac21, njac22; PetscScalar b = -0.5, t = 0.5, l = -0.5, r = 0.5; PetscScalar *boundary; - AppCtx *user; + AppCtx *user, **ouser = (AppCtx **)inctx; DM da; PetscFunctionBeginUser; @@ -543,9 +543,9 @@ PetscErrorCode FormBoundaryConditions(SNES snes, AppCtx **ouser) PetscFunctionReturn(PETSC_SUCCESS); } -PetscErrorCode DestroyBoundaryConditions(AppCtx **ouser) +PetscErrorCode DestroyBoundaryConditions(void **ouser) { - AppCtx *user = *ouser; + AppCtx *user = (AppCtx *)*ouser; PetscFunctionBeginUser; PetscCall(PetscFree(user->bottom)); From ea17275a231699c97e382a1ac143297a4bc2ae59 Mon Sep 17 00:00:00 2001 From: "Jose E. Roman" Date: Sat, 5 Oct 2024 09:39:34 +0200 Subject: [PATCH 31/59] Fix compiler warnings from NVIDIA nvc 24.7 "src/vec/is/section/interface/section.c", line 1268: warning: pointless comparison of unsigned integer with zero [unsigned_compare_with_zero] PetscCall(PetscMalloc1(last >= 0 ? s->bc->atlasOff[last] + s->bc->atlasDof[last] : 0, &s->bcIndices)); ^ "src/mat/impls/aij/mpi/mumps/mumps.c", line 2101: warning: loop is not reachable [loop_not_reachable] PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); ^ "src/dm/impls/swarm/swarm.c", line 217: warning: variable "flg" was set but never used [set_but_not_used] PetscBool flg; ^ "src/ts/utils/dmplexlandau/plexland.c", line 1848: warning: loop is not reachable [loop_not_reachable] PetscCall(PetscFree4(ww, xx, yy, invJ_a)); ^ --- src/dm/impls/swarm/swarm.c | 1 + src/mat/impls/aij/mpi/mumps/mumps.c | 5 +++-- src/ts/utils/dmplexlandau/plexland.c | 6 +++--- src/vec/is/section/interface/section.c | 3 ++- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/dm/impls/swarm/swarm.c b/src/dm/impls/swarm/swarm.c index edfd2dbebc7..942b59611de 100644 --- a/src/dm/impls/swarm/swarm.c +++ b/src/dm/impls/swarm/swarm.c @@ -220,6 +220,7 @@ static PetscErrorCode DMSwarmDestroyVectorFromField_Private(DM dm, const char fi /* check vector is an inplace array */ PetscCall(DMSwarmDataBucketGetDMSwarmDataFieldIdByName(swarm->db, fieldname, &fid)); PetscCall(PetscObjectComposedDataGetInt((PetscObject)*vec, SwarmDataFieldId, cfid, flg)); + (void)flg; /* avoid compiler warning */ PetscCheck(cfid == fid, PetscObjectComm((PetscObject)dm), PETSC_ERR_USER, "Vector being destroyed was not created from DMSwarm field(%s)! %" PetscInt_FMT " != %" PetscInt_FMT, fieldname, cfid, fid); PetscCall(VecGetLocalSize(*vec, &nlocal)); PetscCall(VecGetBlockSize(*vec, &bs)); diff --git a/src/mat/impls/aij/mpi/mumps/mumps.c b/src/mat/impls/aij/mpi/mumps/mumps.c index c1ffaa4dfee..27c19482b24 100644 --- a/src/mat/impls/aij/mpi/mumps/mumps.c +++ b/src/mat/impls/aij/mpi/mumps/mumps.c @@ -2096,12 +2096,13 @@ static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); if (mumps->use_petsc_omp_support) { - PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", - ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); #if defined(PETSC_HAVE_OPENMP_SUPPORT) PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); +#else + SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", + ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); #endif } else { mumps->omp_comm = PETSC_COMM_SELF; diff --git a/src/ts/utils/dmplexlandau/plexland.c b/src/ts/utils/dmplexlandau/plexland.c index 2e61e08f716..52229227527 100644 --- a/src/ts/utils/dmplexlandau/plexland.c +++ b/src/ts/utils/dmplexlandau/plexland.c @@ -1841,12 +1841,12 @@ static PetscErrorCode CreateStaticData(PetscInt dim, IS grid_batch_is_inv[], Lan if (ctx->deviceType == LANDAU_KOKKOS) { #if defined(PETSC_HAVE_KOKKOS) PetscCall(LandauKokkosStaticDataSet(ctx->plex[0], Nq, Nb, ctx->batch_sz, ctx->num_grids, numCells, ctx->species_offset, ctx->mat_offset, nu_alpha, nu_beta, invMass, (PetscReal *)ctx->lambdas, invJ_a, xx, yy, zz, ww, &ctx->SData_d)); -#else - SETERRQ(ctx->comm, PETSC_ERR_ARG_WRONG, "-landau_device_type kokkos not built"); -#endif /* free */ PetscCall(PetscFree4(ww, xx, yy, invJ_a)); if (dim == 3) PetscCall(PetscFree(zz)); +#else + SETERRQ(ctx->comm, PETSC_ERR_ARG_WRONG, "-landau_device_type kokkos not built"); +#endif } else { /* CPU version, just copy in, only use part */ PetscReal *nu_alpha_p = (PetscReal *)ctx->SData_d.alpha, *nu_beta_p = (PetscReal *)ctx->SData_d.beta, *invMass_p = (PetscReal *)ctx->SData_d.invMass, *lambdas_p = NULL; // why set these ? ctx->SData_d.w = (void *)ww; diff --git a/src/vec/is/section/interface/section.c b/src/vec/is/section/interface/section.c index 18ecb230740..4dd833d5fe1 100644 --- a/src/vec/is/section/interface/section.c +++ b/src/vec/is/section/interface/section.c @@ -1265,7 +1265,8 @@ PetscErrorCode PetscSectionSetUpBC(PetscSection s) const PetscInt last = (s->bc->pEnd - s->bc->pStart) - 1; PetscCall(PetscSectionSetUp(s->bc)); - PetscCall(PetscMalloc1(last >= 0 ? s->bc->atlasOff[last] + s->bc->atlasDof[last] : 0, &s->bcIndices)); + if (last >= 0) PetscCall(PetscMalloc1(s->bc->atlasOff[last] + s->bc->atlasDof[last], &s->bcIndices)); + else s->bcIndices = NULL; } PetscFunctionReturn(PETSC_SUCCESS); } From 5f7675d981db2054d9c40dec7852a4ff93513c0e Mon Sep 17 00:00:00 2001 From: "Jose E. Roman" Date: Sat, 5 Oct 2024 09:43:48 +0200 Subject: [PATCH 32/59] Fix integer conversion warning with MKL_PARDISO with 64-bit ints src/mat/impls/aij/seq/mkl_pardiso/mkl_pardiso.c:45:16: warning: implicit conversion loses integer precision: 'long long' to 'int' [-Wshorten-64-to-32] 45 | mtype_copy = *mtype; | ~ ^~~~~~ --- src/mat/impls/aij/seq/mkl_pardiso/mkl_pardiso.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mat/impls/aij/seq/mkl_pardiso/mkl_pardiso.c b/src/mat/impls/aij/seq/mkl_pardiso/mkl_pardiso.c index 7c86a6781f4..e45ca073e48 100644 --- a/src/mat/impls/aij/seq/mkl_pardiso/mkl_pardiso.c +++ b/src/mat/impls/aij/seq/mkl_pardiso/mkl_pardiso.c @@ -40,11 +40,11 @@ PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int); #define MKL_PARDISO_INIT pardiso_64init void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm[]) { - int iparm_copy[IPARM_SIZE], mtype_copy, i; + PetscBLASInt iparm_copy[IPARM_SIZE], mtype_copy; - mtype_copy = *mtype; + PetscCallVoid(PetscBLASIntCast(*mtype, &mtype_copy)); pardisoinit(pt, &mtype_copy, iparm_copy); - for (i = 0; i < IPARM_SIZE; i++) iparm[i] = iparm_copy[i]; + for (PetscInt i = 0; i < IPARM_SIZE; i++) iparm[i] = iparm_copy[i]; } #endif #else From 07c83e99bc22b531fe50602e7d003b6c4ab90fa3 Mon Sep 17 00:00:00 2001 From: "Jose E. Roman" Date: Sun, 6 Oct 2024 11:55:01 +0200 Subject: [PATCH 33/59] Fix more int conversion warnings in complex builds --- src/ksp/ksp/interface/eige.c | 4 ++-- src/mat/impls/dense/seq/dense.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ksp/ksp/interface/eige.c b/src/ksp/ksp/interface/eige.c index dc4813baaeb..7eaa4b15e5a 100644 --- a/src/ksp/ksp/interface/eige.c +++ b/src/ksp/ksp/interface/eige.c @@ -192,8 +192,8 @@ PetscErrorCode KSPComputeEigenvaluesExplicitly(KSP ksp, PetscInt nmax, PetscReal PetscBLASInt idummy, lwork; PetscInt *perm; - idummy = n; - lwork = 5 * n; + PetscCall(PetscBLASIntCast(n, &idummy)); + PetscCall(PetscBLASIntCast(5 * n, &lwork)); PetscCall(PetscMalloc1(5 * n, &work)); PetscCall(PetscMalloc1(2 * n, &rwork)); PetscCall(PetscMalloc1(n, &eigs)); diff --git a/src/mat/impls/dense/seq/dense.c b/src/mat/impls/dense/seq/dense.c index b529b81747c..8309336c771 100644 --- a/src/mat/impls/dense/seq/dense.c +++ b/src/mat/impls/dense/seq/dense.c @@ -861,7 +861,7 @@ PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *f PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info)); PetscCall(PetscFPTrapPop()); - mat->lfwork = (PetscInt)PetscRealPart(dummy); + PetscCall(PetscBLASIntCast((PetscCount)(PetscRealPart(dummy)), &mat->lfwork)); PetscCall(PetscMalloc1(mat->lfwork, &mat->fwork)); } PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); From 11a05d332b9598f5416223f8553ed3c1cae94217 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sat, 5 Oct 2024 22:43:53 +0300 Subject: [PATCH 34/59] Reorganize update ASAN suppression file remove unneeded suppressions --- share/petsc/suppressions/ubsan | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/share/petsc/suppressions/ubsan b/share/petsc/suppressions/ubsan index 5274d70a0af..b3c7de4adf0 100644 --- a/share/petsc/suppressions/ubsan +++ b/share/petsc/suppressions/ubsan @@ -1,27 +1,31 @@ +# PetscObject methods function:PetscObjectDereference function:PetscObjectDestroy -function:PetscContainerDestroy -function:DMAdaptMonitor -function:DMAdaptMonitorCancel -function:DMTSView -function:DMDestroy function:PetscObjectView -function:PetscMonitorCompare + +# FD (using typed object instead of void *) function:MatMult_MFFD function:MatGetDiagonal_MFFD function:MatFDColoringApply_AIJ function:MatFDColoringApply_BAIJ -function:KSPMonitorCancel + +# Monitor callbacks using typed contexts +function:DMAdaptMonitor function:KSPMonitor function:SNESMonitor -function:SNESMonitorCancel -function:SNESSetUp -function:SNESReset +function:TSMonitor +function:TSAdjointMonitor + +# DMDA array access with void* function:SNESComputeFunction_DMDA function:SNESComputeJacobian_DMDA function:SNESComputeObjective_DMDA function:SNESComputePicard_DMDA function:SNESComputePicardJacobian_DMDA +function:TSComputeIFunction_DMDA +function:TSComputeIJacobian_DMDA + +# examples using typed contexts function:TSComputeIFunction function:TSComputeRHSFunction function:TSComputeSolutionFunction @@ -36,9 +40,16 @@ function:TSAdjointMonitorCancel function:TaoMonitorCancel function:PetscFVIntegrateRHSFunction_Upwind function:PetscFVIntegrateRHSFunction_LeastSquares +function:DMTSView + +# viewer contexts destroys +function:TaoMonitorCancel +function:PetscMonitorCompare +# misc shift-base:MCJPMinColor_Private +# DMNetwork misalignments alignment:DMNetworkGetIndex alignment:DMNetworkGetNumComponents alignment:DMNetworkGetLocalVecOffset From 6b8451b42eddd297d543d75fe6ce8620927ef80d Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Tue, 8 Oct 2024 16:24:11 +0300 Subject: [PATCH 35/59] DMPlexVTKWriteAll_VTU: do not assume the DM has fields attached --- src/dm/impls/plex/plexvtu.c | 60 +++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/dm/impls/plex/plexvtu.c b/src/dm/impls/plex/plexvtu.c index fb3e6cb94c0..37f0192d79c 100644 --- a/src/dm/impls/plex/plexvtu.c +++ b/src/dm/impls/plex/plexvtu.c @@ -143,6 +143,23 @@ PETSC_INTERN PetscErrorCode DMPlexGetNonEmptyComm_Private(DM dm, MPI_Comm *comm) PetscFunctionReturn(PETSC_SUCCESS); } +static PetscErrorCode DMGetFieldIfFV_Private(DM dm, PetscInt field, PetscFV *fv) +{ + PetscObject f = NULL; + PetscClassId fClass = PETSC_SMALLEST_CLASSID; + PetscInt nf; + + PetscFunctionBegin; + *fv = NULL; + PetscCall(DMGetNumFields(dm, &nf)); + if (nf > 0) { + PetscCall(DMGetField(dm, field, NULL, &f)); + PetscCall(PetscObjectGetClassId(f, &fClass)); + if (fClass == PETSCFV_CLASSID) *fv = (PetscFV)f; + } + PetscFunctionReturn(PETSC_SUCCESS); +} + /* Write all fields that have been provided to the viewer Multi-block XML format with binary appended data. @@ -275,20 +292,17 @@ PetscErrorCode DMPlexVTKWriteAll_VTU(DM dm, PetscViewer viewer) nfields = field + 1; } for (i = 0; field < (nfields ? nfields : 1); field++) { - PetscInt fbs, j; - PetscFV fv = NULL; - PetscObject f; - PetscClassId fClass; - const char *fieldname = NULL; - char buf[256]; - PetscBool vector; + PetscInt fbs, j; + PetscFV fv = NULL; + const char *fieldname = NULL; + char buf[256]; + PetscBool vector; + if (nfields) { /* We have user-defined fields/components */ PetscCall(PetscSectionGetFieldDof(section, cStart, field, &fbs)); PetscCall(PetscSectionGetFieldName(section, field, &fieldname)); } else fbs = bs; /* Say we have one field with 'bs' components */ - PetscCall(DMGetField(dmX, field, NULL, &f)); - PetscCall(PetscObjectGetClassId(f, &fClass)); - if (fClass == PETSCFV_CLASSID) fv = (PetscFV)f; + PetscCall(DMGetFieldIfFV_Private(dmX, field, &fv)); if (nfields && !fieldname) { PetscCall(PetscSNPrintf(buf, sizeof(buf), "CellField%" PetscInt_FMT, field)); fieldname = buf; @@ -551,17 +565,14 @@ PetscErrorCode DMPlexVTKWriteAll_VTU(DM dm, PetscViewer viewer) PetscCall(VecGetArrayRead(X, &x)); PetscCall(PetscMalloc1(piece.ncells * 3, &y)); for (; field < (nfields ? nfields : 1); field++) { - PetscInt fbs, j; - PetscFV fv = NULL; - PetscObject f; - PetscClassId fClass; - PetscBool vector; + PetscInt fbs, j; + PetscFV fv = NULL; + PetscBool vector; + if (nfields && cEnd > cStart) { /* We have user-defined fields/components */ PetscCall(PetscSectionGetFieldDof(section, cStart, field, &fbs)); } else fbs = bs; /* Say we have one field with 'bs' components */ - PetscCall(DMGetField(dmX, field, NULL, &f)); - PetscCall(PetscObjectGetClassId(f, &fClass)); - if (fClass == PETSCFV_CLASSID) fv = (PetscFV)f; + PetscCall(DMGetFieldIfFV_Private(dmX, field, &fv)); vector = PETSC_FALSE; if (link->ft == PETSC_VTK_CELL_VECTOR_FIELD) { vector = PETSC_TRUE; @@ -779,17 +790,14 @@ PetscErrorCode DMPlexVTKWriteAll_VTU(DM dm, PetscViewer viewer) nfields = field + 1; } for (; field < (nfields ? nfields : 1); field++) { - PetscInt fbs, j; - PetscFV fv = NULL; - PetscObject f; - PetscClassId fClass; - PetscBool vector; + PetscInt fbs, j; + PetscFV fv = NULL; + PetscBool vector; + if (nfields && cEnd > cStart) { /* We have user-defined fields/components */ PetscCall(PetscSectionGetFieldDof(section, cStart, field, &fbs)); } else fbs = bs; /* Say we have one field with 'bs' components */ - PetscCall(DMGetField(dmX, field, NULL, &f)); - PetscCall(PetscObjectGetClassId(f, &fClass)); - if (fClass == PETSCFV_CLASSID) fv = (PetscFV)f; + PetscCall(DMGetFieldIfFV_Private(dmX, field, &fv)); vector = PETSC_FALSE; if (link->ft == PETSC_VTK_CELL_VECTOR_FIELD) { vector = PETSC_TRUE; From de2016142308fb5ef549aca9096b5ad78329032b Mon Sep 17 00:00:00 2001 From: "Matthew G. Knepley" Date: Tue, 8 Oct 2024 18:32:00 -0400 Subject: [PATCH 36/59] IS: Change default for I/O compression to PETSC_FALSE --- src/vec/is/is/impls/general/general.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vec/is/is/impls/general/general.c b/src/vec/is/is/impls/general/general.c index 03fba1872e8..2ad1022de08 100644 --- a/src/vec/is/is/impls/general/general.c +++ b/src/vec/is/is/impls/general/general.c @@ -221,7 +221,7 @@ static PetscErrorCode ISFindRun_Private(const PetscInt indices[], PetscInt len, static PetscErrorCode ISGeneralCheckCompress(IS is, PetscBool *compress) { const PetscInt minRun = 8; - PetscBool lcompress = PETSC_TRUE, test = PETSC_TRUE; + PetscBool lcompress = PETSC_TRUE, test = PETSC_FALSE; const PetscInt *idx; PetscInt n, off = 0; From f5c5fea7bafc091eaee9c368492c6543d24ddb33 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Sat, 5 Oct 2024 15:43:40 +0300 Subject: [PATCH 37/59] CI: enable compilation of examples in some stage-1 jobs --- .gitlab-ci.yml | 6 +++++- src/dm/dt/tests/ex3.c | 4 +--- src/ksp/ksp/tutorials/ex42.c | 12 ++++++------ src/mat/tests/ex18.c | 2 +- src/mat/tests/ex65.c | 2 -- src/snes/tutorials/ex27.c | 6 +++--- src/sys/objects/device/tests/ex11.cxx | 16 +++++++++++++++- src/sys/tests/ex64.cxx | 4 ++-- src/sys/tests/output/ex52_small.out | 1 - src/ts/tutorials/ex11.c | 2 +- src/vec/is/ao/tests/ex1.c | 8 ++++---- .../tests/ex52.c => vec/vec/tests/ex32.c} | 3 ++- .../vec/tests/output/ex32.out} | 0 src/vec/vec/tests/output/ex32f_1.out | 19 ------------------- 14 files changed, 40 insertions(+), 45 deletions(-) delete mode 100644 src/sys/tests/output/ex52_small.out rename src/{sys/tests/ex52.c => vec/vec/tests/ex32.c} (98%) rename src/{sys/tests/output/ex52_large.out => vec/vec/tests/output/ex32.out} (100%) delete mode 100644 src/vec/vec/tests/output/ex32f_1.out diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 380713372cb..80e8c1824e7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -84,6 +84,7 @@ check-ci-settings: - make CFLAGS="${MAKE_CFLAGS}" CXXFLAGS="${MAKE_CXXFLAGS}" FFLAGS="${MAKE_FFLAGS}" CUDAFLAGS="${MAKE_CUDAFLAGS}" - if [ ! -z ${ENABLE_CHECK+x} ]; then make CFLAGS="${MAKE_CFLAGS}" CXXFLAGS="${MAKE_CXXFLAGS}" FFLAGS="${MAKE_FFLAGS}" CUDAFLAGS="${MAKE_CUDAFLAGS}" check; fi - make CFLAGS="${MAKE_TEST_CFLAGS}" CXXFLAGS="${MAKE_TEST_CXXFLAGS}" FFLAGS="${MAKE_TEST_FFLAGS}" CUDAFLAGS="${MAKE_CUDAFLAGS}" allgtests-tap gmakesearch="${TEST_SEARCH}" TIMEOUT=${TIMEOUT} + - if [ ! -z ${ENABLE_COMPILETESTS+x} ]; then make -j 16 CFLAGS="${MAKE_TEST_CFLAGS}" CXXFLAGS="${MAKE_TEST_CXXFLAGS}" FFLAGS="${MAKE_TEST_FFLAGS}" test PRINTONLY=1; fi variables: PYTHON: python3 MAKE_CFLAGS: -Werror -Wmissing-field-initializers @@ -142,7 +143,7 @@ uni-complex-float-int64: variables: CONFIG_OPTS: --with-mpi=0 --with-scalar-type=complex --with-precision=single --with-64-bit-indices TEST_SEARCH: ts_tutorials-ex11_adv_2d_quad_% - LOAD_MODULES: gcc/12.1.0 mpich/4.1-gcc-12.1.0 + LOAD_MODULES: gcc/12.1.0 c99-mlib-static-py: extends: .stage-1 @@ -159,7 +160,10 @@ clang-uni: CONFIG_OPTS: --with-cc=clang --with-cxx=clang++ --with-mpi=0 --with-strict-petscerrorcode MAKE_CFLAGS: -Werror -Wmissing-field-initializers -Wundef -Wextra-semi-stmt MAKE_CXXFLAGS: -Werror -Wzero-as-null-pointer-constant -Wundef -Wextra-semi-stmt -Wextra-semi + MAKE_TEST_CFLAGS: -Werror -Wmissing-field-initializers -Wundef -Wextra-semi-stmt + MAKE_TEST_CXXFLAGS: -Werror -Wzero-as-null-pointer-constant -Wundef -Wextra-semi-stmt -Wextra-semi TEST_SEARCH: snes_tutorials-ex48% + ENABLE_COMPILETESTS: 1 gcc-lto: extends: .stage-1 diff --git a/src/dm/dt/tests/ex3.c b/src/dm/dt/tests/ex3.c index 05896660e00..23bccc974cc 100644 --- a/src/dm/dt/tests/ex3.c +++ b/src/dm/dt/tests/ex3.c @@ -99,10 +99,8 @@ static void func14(const PetscReal a[], void *dummy, PetscReal *val) int main(int argc, char **argv) { -#if PETSC_SCALAR_SIZE == 32 +#if defined(PETSC_USE_REAL_SINGLE) PetscInt digits = 7; -#elif PETSC_SCALAR_SIZE == 64 - PetscInt digits = 14; #else PetscInt digits = 14; #endif diff --git a/src/ksp/ksp/tutorials/ex42.c b/src/ksp/ksp/tutorials/ex42.c index 70aca64eb57..7a8e7c1f092 100644 --- a/src/ksp/ksp/tutorials/ex42.c +++ b/src/ksp/ksp/tutorials/ex42.c @@ -873,25 +873,25 @@ static void FormContinuityRhsQ13D(PetscScalar Fe[], PetscScalar coords[], PetscS } #define _ZERO_ROWCOL_i(A, i) \ - { \ + do { \ PetscInt KK; \ PetscScalar tmp = A[24 * (i) + (i)]; \ for (KK = 0; KK < 24; KK++) A[24 * (i) + KK] = 0.0; \ for (KK = 0; KK < 24; KK++) A[24 * KK + (i)] = 0.0; \ A[24 * (i) + (i)] = tmp; \ - } + } while (0) #define _ZERO_ROW_i(A, i) \ - { \ + do { \ PetscInt KK; \ for (KK = 0; KK < 8; KK++) A[8 * (i) + KK] = 0.0; \ - } + } while (0) #define _ZERO_COL_i(A, i) \ - { \ + do { \ PetscInt KK; \ for (KK = 0; KK < 8; KK++) A[24 * KK + (i)] = 0.0; \ - } + } while (0) static PetscErrorCode AssembleA_Stokes(Mat A, DM stokes_da, CellProperties cell_properties) { diff --git a/src/mat/tests/ex18.c b/src/mat/tests/ex18.c index 4f64190eeab..af434d2bac3 100644 --- a/src/mat/tests/ex18.c +++ b/src/mat/tests/ex18.c @@ -74,7 +74,7 @@ int main(int argc, char **args) /* fall back to 1st order upwind */ v1 = -1.0 * a; v0 = 1.0 * a; - }; + } if (j > 1) { J = Ii - 1 * bs; PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v1, ADD_VALUES)); diff --git a/src/mat/tests/ex65.c b/src/mat/tests/ex65.c index 8425a07db83..5c60bc1dd06 100644 --- a/src/mat/tests/ex65.c +++ b/src/mat/tests/ex65.c @@ -19,8 +19,6 @@ int main(int argc, char **args) cnt = 0; if (i % 2) { for (j = 0; j < n; j += 2) js[cnt++] = j; - } else { - ; } PetscCall(MatSetValues(A, 1, &i, cnt, js, values, INSERT_VALUES)); } diff --git a/src/snes/tutorials/ex27.c b/src/snes/tutorials/ex27.c index d565b4fa971..8c45af20529 100644 --- a/src/snes/tutorials/ex27.c +++ b/src/snes/tutorials/ex27.c @@ -238,10 +238,10 @@ static void f0_trig_bd_primal(PetscInt dim, PetscInt Nf, PetscInt NfAux, const P static void f0_sensor_bd_primal(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) { const PetscReal k = PetscRealPart(constants[0]); - PetscScalar flux; + PetscScalar flux[2]; - PetscCallAbort(PETSC_COMM_SELF, sensor_q(dim, t, x, dim, &flux, NULL)); - for (PetscInt d = 0; d < dim; ++d) f0[d] = -k * flux * n[d]; + PetscCallAbort(PETSC_COMM_SELF, sensor_q(dim, t, x, dim, flux, NULL)); + for (PetscInt d = 0; d < dim; ++d) f0[d] = -k * flux[d] * n[d]; } static void g3_primal_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) diff --git a/src/sys/objects/device/tests/ex11.cxx b/src/sys/objects/device/tests/ex11.cxx index f2e06ed71df..99aa4858a0d 100644 --- a/src/sys/objects/device/tests/ex11.cxx +++ b/src/sys/objects/device/tests/ex11.cxx @@ -12,6 +12,9 @@ static const char help[] = "Tests PetscDeviceContextMarkIntentFromID().\n\n"; #include // std::find #include // std::distance, std::next +#include // PETSC_CPP_VERSION + +#if PETSC_CPP_VERSION > 14 struct Marker { PetscMemoryAccessMode mode{}; @@ -88,7 +91,8 @@ PETSC_ATTRIBUTE_FORMAT(10, 11) static PetscErrorCode CheckMarkedObjectMap_Privat } PetscFunctionReturn(PETSC_SUCCESS); } -#define CheckMarkedObjectMap(__cond__, ...) CheckMarkedObjectMap_Private((PetscBool)(!!(__cond__)), PetscStringize(__cond__), PETSC_COMM_SELF, dctx, nkeys, keys, modes, ndeps, const_cast(dependencies), __VA_ARGS__); + + #define CheckMarkedObjectMap(__cond__, ...) CheckMarkedObjectMap_Private((PetscBool)(!!(__cond__)), PetscStringize(__cond__), PETSC_COMM_SELF, dctx, nkeys, keys, modes, ndeps, const_cast(dependencies), __VA_ARGS__); static PetscErrorCode TestAllCombinations(PetscDeviceContext dctx, const std::vector &cont) { @@ -413,6 +417,16 @@ int main(int argc, char *argv[]) PetscCall(PetscFinalize()); return 0; } +#else // PETSC_CPP_VERSION > 11 +int main(int argc, char *argv[]) +{ + PetscFunctionBeginUser; + PetscCall(PetscInitialize(&argc, &argv, nullptr, help)); + PetscCall(PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n")); + PetscCall(PetscFinalize()); + return 0; +} +#endif /*TEST diff --git a/src/sys/tests/ex64.cxx b/src/sys/tests/ex64.cxx index a0256dad287..d69642ddc07 100644 --- a/src/sys/tests/ex64.cxx +++ b/src/sys/tests/ex64.cxx @@ -38,7 +38,7 @@ static inline void hash_combine(std::size_t &seed, const T &v, Rest &&...rest) n } using pair_type = std::pair; -MAKE_HASHABLE(pair_type, t.first, t.second); +MAKE_HASHABLE(pair_type, t.first, t.second) using namespace Petsc::util; @@ -71,7 +71,7 @@ struct Foo { } }; -MAKE_HASHABLE(Foo, t.x, t.y); +MAKE_HASHABLE(Foo, t.x, t.y) struct Bar { std::vector x{}; diff --git a/src/sys/tests/output/ex52_small.out b/src/sys/tests/output/ex52_small.out deleted file mode 100644 index b023c33d659..00000000000 --- a/src/sys/tests/output/ex52_small.out +++ /dev/null @@ -1 +0,0 @@ -SUCCEEDED diff --git a/src/ts/tutorials/ex11.c b/src/ts/tutorials/ex11.c index 27e9760e04b..8768df15fad 100644 --- a/src/ts/tutorials/ex11.c +++ b/src/ts/tutorials/ex11.c @@ -1331,7 +1331,7 @@ int main(int argc, char **argv) for (i = 0; i < DIM; i++) { mod->bounds[2 * i] = 0.; mod->bounds[2 * i + 1] = 1.; - }; + } dim = DIM; { /* a null name means just do a hex box */ PetscInt cells[3] = {1, 1, 1}, n = 3; diff --git a/src/vec/is/ao/tests/ex1.c b/src/vec/is/ao/tests/ex1.c index de40426f6a1..78a120aa092 100644 --- a/src/vec/is/ao/tests/ex1.c +++ b/src/vec/is/ao/tests/ex1.c @@ -48,7 +48,7 @@ int main(int argc, char **argv) PetscCall(AOPetscToApplication(ao, 4, getapp1)); PetscCall(AOApplicationToPetsc(ao, 3, getpetsc1)); - /* Check accuracy */; + /* Check accuracy */ for (i = 0; i < 4; i++) PetscCheck(getapp1[i] == getapp[i], PETSC_COMM_SELF, PETSC_ERR_USER, "getapp1 %" PetscInt_FMT " != getapp %" PetscInt_FMT, getapp1[i], getapp[i]); for (i = 0; i < 3; i++) PetscCheck(getpetsc1[i] == getpetsc[i], PETSC_COMM_SELF, PETSC_ERR_USER, "getpetsc1 %" PetscInt_FMT " != getpetsc %" PetscInt_FMT, getpetsc1[i], getpetsc[i]); @@ -64,7 +64,7 @@ int main(int argc, char **argv) PetscCall(AOPetscToApplication(ao, 4, getapp2)); PetscCall(AOApplicationToPetsc(ao, 3, getpetsc2)); - /* Check accuracy */; + /* Check accuracy */ for (i = 0; i < 4; i++) PetscCheck(getapp2[i] == getapp[i], PETSC_COMM_SELF, PETSC_ERR_USER, "getapp2 %" PetscInt_FMT " != getapp %" PetscInt_FMT, getapp2[i], getapp[i]); for (i = 0; i < 3; i++) PetscCheck(getpetsc2[i] == getpetsc[i], PETSC_COMM_SELF, PETSC_ERR_USER, "getpetsc2 %" PetscInt_FMT " != getpetsc %" PetscInt_FMT, getpetsc2[i], getpetsc[i]); PetscCall(AODestroy(&ao)); @@ -77,7 +77,7 @@ int main(int argc, char **argv) PetscCall(AOPetscToApplication(ao, 4, getapp4)); PetscCall(AOApplicationToPetsc(ao, 3, getpetsc4)); - /* Check accuracy */; + /* Check accuracy */ for (i = 0; i < 4; i++) PetscCheck(getapp4[i] == getapp[i], PETSC_COMM_SELF, PETSC_ERR_USER, "getapp4 %" PetscInt_FMT " != getapp %" PetscInt_FMT, getapp4[i], getapp[i]); for (i = 0; i < 3; i++) PetscCheck(getpetsc4[i] == getpetsc[i], PETSC_COMM_SELF, PETSC_ERR_USER, "getpetsc4 %" PetscInt_FMT " != getpetsc %" PetscInt_FMT, getpetsc4[i], getpetsc[i]); PetscCall(AODestroy(&ao)); @@ -101,7 +101,7 @@ int main(int argc, char **argv) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "[%d] 0,3,4 ApplicationToPetsc %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", rank, getpetsc3[0], getpetsc3[1], getpetsc3[2])); PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT)); - /* Check accuracy */; + /* Check accuracy */ for (i = 0; i < 4; i++) PetscCheck(getapp3[i] == getapp[i], PETSC_COMM_SELF, PETSC_ERR_USER, "getapp3 %" PetscInt_FMT " != getapp %" PetscInt_FMT, getapp3[i], getapp[i]); for (i = 0; i < 3; i++) PetscCheck(getpetsc3[i] == getpetsc[i], PETSC_COMM_SELF, PETSC_ERR_USER, "getpetsc3 %" PetscInt_FMT " != getpetsc %" PetscInt_FMT, getpetsc3[i], getpetsc[i]); diff --git a/src/sys/tests/ex52.c b/src/vec/vec/tests/ex32.c similarity index 98% rename from src/sys/tests/ex52.c rename to src/vec/vec/tests/ex32.c index 498bebceb8e..9dc73a08ac4 100644 --- a/src/sys/tests/ex52.c +++ b/src/vec/vec/tests/ex32.c @@ -1,7 +1,7 @@ static char help[] = "A benchmark for testing PetscSortInt(), PetscSortIntSemiOrdered(), PetscSortIntWithArrayPair(), PetscIntSortSemiOrderedWithArray(), and PetscSortIntWithArray()\n\ The array is filled with random numbers, but one can control average duplicates for each unique integer with the -d option.\n\ Usage:\n\ - mpirun -n 1 ./ex52 -n , default=100 \n\ + mpirun -n 1 ./ex32 -n , default=100 \n\ -r , default=10 \n\ -d , default=1, i.e., no duplicates \n\n"; @@ -162,6 +162,7 @@ int main(int argc, char **argv) testset: filter: grep -vE "per unique value took|Speedup of " + output_file: output/ex32.out test: suffix: small diff --git a/src/sys/tests/output/ex52_large.out b/src/vec/vec/tests/output/ex32.out similarity index 100% rename from src/sys/tests/output/ex52_large.out rename to src/vec/vec/tests/output/ex32.out diff --git a/src/vec/vec/tests/output/ex32f_1.out b/src/vec/vec/tests/output/ex32f_1.out deleted file mode 100644 index b0786782371..00000000000 --- a/src/vec/vec/tests/output/ex32f_1.out +++ /dev/null @@ -1,19 +0,0 @@ -Vec Object: 1 MPI process - type: mpi -Process [0] -1. -0. -0. -0. -0. -0. -0. -0. - 1 1.0000000000000000 - 2 0.0000000000000000 - 3 0.0000000000000000 - 4 0.0000000000000000 - 5 0.0000000000000000 - 6 0.0000000000000000 - 7 0.0000000000000000 - 8 0.0000000000000000 From 5b2c77db7596be0e602c801c2aa948432bca0f91 Mon Sep 17 00:00:00 2001 From: Lisandro Dalcin Date: Wed, 9 Oct 2024 19:59:42 +0300 Subject: [PATCH 38/59] petsc4py: Exception chaining across Python/C boundaries --- .../petsc4py/src/petsc4py/PETSc/PETSc.pyx | 67 +++++++++++++------ 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/src/binding/petsc4py/src/petsc4py/PETSc/PETSc.pyx b/src/binding/petsc4py/src/petsc4py/PETSc/PETSc.pyx index 0ca0007c308..690b5c39852 100644 --- a/src/binding/petsc4py/src/petsc4py/PETSc/PETSc.pyx +++ b/src/binding/petsc4py/src/petsc4py/PETSc/PETSc.pyx @@ -53,29 +53,57 @@ cdef inline object S_(const char p[]): # -------------------------------------------------------------------- -# Vile hack for raising an exception and not contaminating traceback +# SETERR Support +# -------------- cdef extern from *: + """ +#if PY_VERSION_HEX < 0X30C0000 +static PyObject *PyErr_GetRaisedException() +{ + PyObject *t, *v, *tb; + PyErr_Fetch(&t, &v, &tb); + PyErr_NormalizeException(&t, &v, &tb); + if (tb != NULL) PyException_SetTraceback(v, tb); + Py_XDECREF(t); + Py_XDECREF(tb); + return v; +} +static void PyErr_SetRaisedException(PyObject *v) +{ + PyObject *t = (PyObject *)Py_TYPE(v); + PyObject *tb = PyException_GetTraceback(v); + Py_XINCREF(t); + Py_XINCREF(tb); + PyErr_Restore(t, v, tb); +} +#endif + """ void PyErr_SetObject(object, object) - void *PyExc_RuntimeError - -# SETERR Support -# -------------- + PyObject *PyExc_RuntimeError + PyObject *PyErr_GetRaisedException() + void PyErr_SetRaisedException(PyObject*) + void PyException_SetCause(PyObject*, PyObject*) cdef object PetscError = PyExc_RuntimeError -cdef inline int SETERR(PetscErrorCode ierr) except -1 nogil: - if (PetscError) != NULL: - with gil: PyErr_SetObject(PetscError, ierr) - else: - with gil: PyErr_SetObject(PyExc_RuntimeError, ierr) +cdef inline int SETERR(PetscErrorCode ierr) noexcept nogil: + cdef PyObject *exception = NULL, *cause = NULL + with gil: + cause = PyErr_GetRaisedException() + if (PetscError) != NULL: + PyErr_SetObject(PetscError, ierr) + else: + PyErr_SetObject(PyExc_RuntimeError, ierr) + if cause != NULL: + exception = PyErr_GetRaisedException() + PyException_SetCause(exception, cause) + PyErr_SetRaisedException(exception) return 0 cdef inline PetscErrorCode CHKERR(PetscErrorCode ierr) except PETSC_ERR_PYTHON nogil: if ierr == PETSC_SUCCESS: - return ierr # no error - if ierr == PETSC_ERR_PYTHON: - return ierr # error in python code + return PETSC_SUCCESS # no error SETERR(ierr) return PETSC_ERR_PYTHON @@ -83,26 +111,23 @@ cdef inline PetscErrorCode CHKERR(PetscErrorCode ierr) except PETSC_ERR_PYTHON n # ----------------- cdef extern from * nogil: - enum: MPI_SUCCESS = 0 + enum: MPI_SUCCESS enum: MPI_MAX_ERROR_STRING int MPI_Error_string(int, char[], int*) PetscErrorCode PetscSNPrintf(char[], size_t, const char[], ...) PetscErrorCode PetscERROR(MPI_Comm, char[], PetscErrorCode, int, char[], char[]) -cdef inline int SETERRMPI(int ierr) except -1 nogil: +cdef inline int SETERRMPI(int ierr) noexcept nogil: cdef char mpi_err_str[MPI_MAX_ERROR_STRING] cdef int result_len = sizeof(mpi_err_str) - - memset(mpi_err_str, 0, result_len) + memset(mpi_err_str, 0, result_len) MPI_Error_string(ierr, mpi_err_str, &result_len) - result_len - + result_len # unused-but-set-variable cdef char error_str[MPI_MAX_ERROR_STRING+64] PetscSNPrintf(error_str, sizeof(error_str), b"MPI Error %s %d", mpi_err_str, ierr) - PetscERROR(PETSC_COMM_SELF, "Unknown Python Function", PETSC_ERR_MPI, PETSC_ERROR_INITIAL, "%s", error_str) SETERR(PETSC_ERR_MPI) - return ierr + return 0 cdef inline PetscErrorCode CHKERRMPI(int ierr) except PETSC_ERR_PYTHON nogil: if ierr == MPI_SUCCESS: From bdcd51b8bdc94e86e7a58beb875baca349bf13e4 Mon Sep 17 00:00:00 2001 From: Pierre Jolivet Date: Sun, 13 Oct 2024 17:17:21 +0200 Subject: [PATCH 39/59] PCHPDDM: stop the setup if there is an error during the factorization Reported-by: Alexander Thanks-to: Jose Roman --- src/ksp/pc/impls/hpddm/pchpddm.cxx | 1 + src/mat/impls/aij/mpi/mumps/mumps.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ksp/pc/impls/hpddm/pchpddm.cxx b/src/ksp/pc/impls/hpddm/pchpddm.cxx index 659149baf1b..aec3f1fcddc 100644 --- a/src/ksp/pc/impls/hpddm/pchpddm.cxx +++ b/src/ksp/pc/impls/hpddm/pchpddm.cxx @@ -2226,6 +2226,7 @@ static PetscErrorCode PCSetUp_HPDDM(PC pc) if (uaux) PetscCall(MatDestroy(&uaux)); } else PetscCall(MatDestroy(&A0)); PetscCall(MatCreateShell(PETSC_COMM_SELF, P->rmap->n, n[1] - n[0], P->rmap->n, n[1] - n[0], h, &data->aux)); + PetscCall(KSPSetErrorIfNotConverged(h->ksp, PETSC_TRUE)); /* bail out as early as possible to avoid (apparently) unrelated error messages */ PetscCall(MatCreateVecs(h->ksp->pc->pmat, &h->v, nullptr)); PetscCall(MatShellSetOperation(data->aux, MATOP_MULT, (void (*)(void))MatMult_Harmonic)); PetscCall(MatShellSetOperation(data->aux, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Harmonic)); diff --git a/src/mat/impls/aij/mpi/mumps/mumps.c b/src/mat/impls/aij/mpi/mumps/mumps.c index 27c19482b24..ec4a6ba5128 100644 --- a/src/mat/impls/aij/mpi/mumps/mumps.c +++ b/src/mat/impls/aij/mpi/mumps/mumps.c @@ -2023,7 +2023,7 @@ static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const Ma PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); F->factorerrortype = MAT_FACTOR_OUTMEMORY; } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { - PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); + PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); F->factorerrortype = MAT_FACTOR_OUTMEMORY; } else { PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); From 42e65cf20546ef3d3540361cdbee4937af774a4e Mon Sep 17 00:00:00 2001 From: Barry Smith Date: Sat, 12 Oct 2024 14:21:30 -0500 Subject: [PATCH 40/59] Fixing -da_refine for DA Q0 elements and associated example, some minor docs --- src/dm/impls/da/dacreate.c | 8 ++++---- src/ksp/ksp/tutorials/ex32.c | 19 ++++++++++++++++--- src/ksp/ksp/tutorials/output/ex32_1.out | 6 ------ src/ksp/ksp/tutorials/output/ex32_2.out | 6 ++++++ src/ksp/ksp/tutorials/output/ex32_3.out | 5 +++++ src/ksp/ksp/tutorials/output/ex32_4.out | 6 ++++++ 6 files changed, 37 insertions(+), 13 deletions(-) delete mode 100644 src/ksp/ksp/tutorials/output/ex32_1.out create mode 100644 src/ksp/ksp/tutorials/output/ex32_2.out create mode 100644 src/ksp/ksp/tutorials/output/ex32_3.out create mode 100644 src/ksp/ksp/tutorials/output/ex32_4.out diff --git a/src/dm/impls/da/dacreate.c b/src/dm/impls/da/dacreate.c index f400e698f5a..40391fa271f 100644 --- a/src/dm/impls/da/dacreate.c +++ b/src/dm/impls/da/dacreate.c @@ -91,15 +91,15 @@ static PetscErrorCode DMSetFromOptions_DA(DM da, PetscOptionItems *PetscOptionsO PetscCall(PetscIntMultError(dd->refine_x, dd->M - 1, &dd->M)); dd->M += 1; } - if (dd->by == DM_BOUNDARY_PERIODIC || dd->interptype == DMDA_Q0) { + if (dim > 1 && (dd->by == DM_BOUNDARY_PERIODIC || dd->interptype == DMDA_Q0)) { PetscCall(PetscIntMultError(dd->refine_y, dd->N, &dd->N)); - } else { + } else if (dim > 1) { PetscCall(PetscIntMultError(dd->refine_y, dd->N - 1, &dd->N)); dd->N += 1; } - if (dd->bz == DM_BOUNDARY_PERIODIC || dd->interptype == DMDA_Q0) { + if (dim > 2 && (dd->bz == DM_BOUNDARY_PERIODIC || dd->interptype == DMDA_Q0)) { PetscCall(PetscIntMultError(dd->refine_z, dd->P, &dd->P)); - } else { + } else if (dim > 2) { PetscCall(PetscIntMultError(dd->refine_z, dd->P - 1, &dd->P)); dd->P += 1; } diff --git a/src/ksp/ksp/tutorials/ex32.c b/src/ksp/ksp/tutorials/ex32.c index 0f183831686..98a17663556 100644 --- a/src/ksp/ksp/tutorials/ex32.c +++ b/src/ksp/ksp/tutorials/ex32.c @@ -54,10 +54,10 @@ int main(int argc, char **argv) PetscFunctionBeginUser; PetscCall(PetscInitialize(&argc, &argv, NULL, help)); PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp)); - PetscCall(DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, 12, 12, PETSC_DECIDE, PETSC_DECIDE, 1, 1, 0, 0, &da)); + PetscCall(DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, 2, 2, PETSC_DECIDE, PETSC_DECIDE, 1, 1, 0, 0, &da)); + PetscCall(DMDASetInterpolationType(da, DMDA_Q0)); PetscCall(DMSetFromOptions(da)); PetscCall(DMSetUp(da)); - PetscCall(DMDASetInterpolationType(da, DMDA_Q0)); PetscCall(KSPSetDM(ksp, da)); @@ -72,6 +72,7 @@ int main(int argc, char **argv) PetscCall(KSPSetComputeRHS(ksp, ComputeRHS, &user)); PetscCall(KSPSetComputeOperators(ksp, ComputeMatrix, &user)); PetscCall(KSPSetFromOptions(ksp)); + PetscCall(KSPSetUp(ksp)); PetscCall(KSPSolve(ksp, NULL, NULL)); PetscCall(KSPDestroy(&ksp)); PetscCall(DMDestroy(&da)); @@ -211,6 +212,18 @@ PetscErrorCode ComputeMatrix(KSP ksp, Mat J, Mat jac, void *ctx) /*TEST test: - args: -pc_type mg -pc_mg_type full -ksp_type fgmres -ksp_monitor_short -pc_mg_levels 3 -mg_coarse_pc_factor_shift_type nonzero + suffix: 2 + requires: !single + args: -pc_type mg -pc_mg_levels 5 -ksp_monitor_true_residual -ksp_rtol 1.e-10 -ksp_type cg -mg_levels_pc_type sor -mg_levels_ksp_type richardson -mg_levels_ksp_max_it 2 -mg_coarse_pc_type svd -da_refine 5 + + test: + suffix: 3 + requires: !single + args: -pc_type mg -pc_mg_levels 2 -ksp_monitor_true_residual -ksp_rtol 1.e-10 -ksp_type cg -mg_levels_pc_type sor -mg_levels_ksp_type richardson -mg_levels_ksp_max_it 2 -mg_coarse_pc_type svd -da_refine 5 + + test: + suffix: 4 + requires: !single + args: -pc_type mg -pc_mg_levels 2 -ksp_monitor_true_residual -ksp_rtol 1.e-10 -ksp_type cg -mg_levels_pc_type sor -mg_levels_ksp_type richardson -mg_levels_ksp_max_it 2 -mg_coarse_pc_type svd -da_refine 4 TEST*/ diff --git a/src/ksp/ksp/tutorials/output/ex32_1.out b/src/ksp/ksp/tutorials/output/ex32_1.out deleted file mode 100644 index 28aad0dcf8c..00000000000 --- a/src/ksp/ksp/tutorials/output/ex32_1.out +++ /dev/null @@ -1,6 +0,0 @@ - 0 KSP Residual norm 0.0151615 - 1 KSP Residual norm 0.00160545 - 2 KSP Residual norm 9.02058e-05 - 3 KSP Residual norm 5.83173e-06 - 4 KSP Residual norm 4.38682e-07 - 5 KSP Residual norm 2.62912e-08 diff --git a/src/ksp/ksp/tutorials/output/ex32_2.out b/src/ksp/ksp/tutorials/output/ex32_2.out new file mode 100644 index 00000000000..3891a372b27 --- /dev/null +++ b/src/ksp/ksp/tutorials/output/ex32_2.out @@ -0,0 +1,6 @@ + 0 KSP preconditioned resid norm 8.707575268782e-01 true resid norm 2.842788112864e-03 ||r(i)||/||b|| 1.000000000000e+00 + 1 KSP preconditioned resid norm 5.067478574947e-03 true resid norm 8.807970167024e-05 ||r(i)||/||b|| 3.098356197273e-02 + 2 KSP preconditioned resid norm 5.566700402730e-05 true resid norm 2.014981529883e-06 ||r(i)||/||b|| 7.088046839529e-04 + 3 KSP preconditioned resid norm 2.698851702073e-07 true resid norm 3.013479554557e-08 ||r(i)||/||b|| 1.060043673646e-05 + 4 KSP preconditioned resid norm 2.689594854129e-09 true resid norm 2.536151839771e-10 ||r(i)||/||b|| 8.921353752308e-08 + 5 KSP preconditioned resid norm 2.232993540537e-11 true resid norm 2.360948191859e-12 ||r(i)||/||b|| 8.305044548259e-10 diff --git a/src/ksp/ksp/tutorials/output/ex32_3.out b/src/ksp/ksp/tutorials/output/ex32_3.out new file mode 100644 index 00000000000..83a95d6e4d4 --- /dev/null +++ b/src/ksp/ksp/tutorials/output/ex32_3.out @@ -0,0 +1,5 @@ + 0 KSP preconditioned resid norm 8.643260847300e-01 true resid norm 2.842788112864e-03 ||r(i)||/||b|| 1.000000000000e+00 + 1 KSP preconditioned resid norm 2.210826548741e-04 true resid norm 5.270675562941e-05 ||r(i)||/||b|| 1.854051499333e-02 + 2 KSP preconditioned resid norm 2.402868659554e-06 true resid norm 4.129338708325e-07 ||r(i)||/||b|| 1.452566475018e-04 + 3 KSP preconditioned resid norm 3.487514886207e-09 true resid norm 6.765746390082e-10 ||r(i)||/||b|| 2.379968580658e-07 + 4 KSP preconditioned resid norm 2.563322328222e-11 true resid norm 4.648796382632e-12 ||r(i)||/||b|| 1.635294717041e-09 diff --git a/src/ksp/ksp/tutorials/output/ex32_4.out b/src/ksp/ksp/tutorials/output/ex32_4.out new file mode 100644 index 00000000000..0ec4555e153 --- /dev/null +++ b/src/ksp/ksp/tutorials/output/ex32_4.out @@ -0,0 +1,6 @@ + 0 KSP preconditioned resid norm 4.325165543060e-01 true resid norm 5.685576018812e-03 ||r(i)||/||b|| 1.000000000000e+00 + 1 KSP preconditioned resid norm 4.127330138987e-04 true resid norm 1.252467835245e-04 ||r(i)||/||b|| 2.202886446511e-02 + 2 KSP preconditioned resid norm 4.385965563394e-06 true resid norm 7.248323471860e-07 ||r(i)||/||b|| 1.274861763852e-04 + 3 KSP preconditioned resid norm 9.837913040926e-09 true resid norm 3.688522921235e-09 ||r(i)||/||b|| 6.487509636720e-07 + 4 KSP preconditioned resid norm 1.128201049459e-10 true resid norm 2.475011001680e-11 ||r(i)||/||b|| 4.353140286034e-09 + 5 KSP preconditioned resid norm 3.137718115366e-13 true resid norm 2.053235909174e-13 ||r(i)||/||b|| 3.611306756572e-11 From ffbd2f08570576d12bd1d65cc9cc8322c1a13f28 Mon Sep 17 00:00:00 2001 From: Barry Smith Date: Sat, 12 Oct 2024 14:22:00 -0500 Subject: [PATCH 41/59] Minor manual pages --- src/ksp/pc/impls/mg/mgfunc.c | 6 +++--- src/sys/logging/plog.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ksp/pc/impls/mg/mgfunc.c b/src/ksp/pc/impls/mg/mgfunc.c index e734a4cb90a..9fd853b3e7b 100644 --- a/src/ksp/pc/impls/mg/mgfunc.c +++ b/src/ksp/pc/impls/mg/mgfunc.c @@ -333,7 +333,7 @@ PetscErrorCode PCMGSetRestriction(PC pc, PetscInt l, Mat mat) } /*@ - PCMGGetRestriction - Gets the function to be used to restrict dual vectors + PCMGGetRestriction - Gets the function to be used to restrict dual (i.e. residual) vectors from level l to l-1. Logically Collective @@ -452,7 +452,7 @@ PetscErrorCode PCMGGetRScale(PC pc, PetscInt l, Vec *rscale) } /*@ - PCMGSetInjection - Sets the function to be used to inject primal vectors + PCMGSetInjection - Sets the function to be used to inject primal (i.e. solution) vectors from level l to l-1. Logically Collective @@ -484,7 +484,7 @@ PetscErrorCode PCMGSetInjection(PC pc, PetscInt l, Mat mat) } /*@ - PCMGGetInjection - Gets the function to be used to inject primal vectors + PCMGGetInjection - Gets the function to be used to inject primal vectors (i.e. solutions) from level l to l-1. Logically Collective diff --git a/src/sys/logging/plog.c b/src/sys/logging/plog.c index 68d397ba2ac..53c56db9cc7 100644 --- a/src/sys/logging/plog.c +++ b/src/sys/logging/plog.c @@ -1068,14 +1068,14 @@ PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, Pe Input Parameters: + event - The event id -- collective - Boolean flag indicating whether a particular event is collective +- collective - `PetscBool` indicating whether a particular event is collective Level: developer Notes: New events returned from `PetscLogEventRegister()` are collective by default. - Collective events are handled specially if the command line option -log_sync is used. In that case the logging saves information about + Collective events are handled specially if the command line option `-log_sync` is used. In that case the logging saves information about two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication to be performed. This option is useful to debug imbalance within the computations or communications. From 3e5b5ee7d680a929eb164e6e52ea45e97ded171f Mon Sep 17 00:00:00 2001 From: Pierre Jolivet Date: Tue, 15 Oct 2024 08:22:01 +0200 Subject: [PATCH 42/59] configure: mpi4py 4.0.1 --- config/BuildSystem/config/packages/mpi4py.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/BuildSystem/config/packages/mpi4py.py b/config/BuildSystem/config/packages/mpi4py.py index cce4cd6c0d8..b90ff276ea4 100644 --- a/config/BuildSystem/config/packages/mpi4py.py +++ b/config/BuildSystem/config/packages/mpi4py.py @@ -5,7 +5,7 @@ class Configure(config.package.Package): def __init__(self, framework): config.package.Package.__init__(self, framework) - self.download = ['https://github.com/mpi4py/mpi4py/releases/download/4.0.0/mpi4py-4.0.0.tar.gz'] + self.download = ['https://github.com/mpi4py/mpi4py/releases/download/4.0.1/mpi4py-4.0.1.tar.gz'] self.functions = [] self.includes = [] self.useddirectly = 0 From 7aa118f732033ae94ce14f27b5b9ad3fe640197c Mon Sep 17 00:00:00 2001 From: Pierre Jolivet Date: Tue, 15 Oct 2024 08:22:18 +0200 Subject: [PATCH 43/59] KSPPREONLY: fix -ksp_monitor_true_residual --- src/ksp/ksp/impls/preonly/preonly.c | 21 ++++++++++----------- src/ksp/ksp/tutorials/output/ex84_1.out | 6 +++--- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/ksp/ksp/impls/preonly/preonly.c b/src/ksp/ksp/impls/preonly/preonly.c index a16f2bbac51..7280cd47cec 100644 --- a/src/ksp/ksp/impls/preonly/preonly.c +++ b/src/ksp/ksp/impls/preonly/preonly.c @@ -8,20 +8,23 @@ static PetscErrorCode KSPSetUp_PREONLY(KSP ksp) static PetscErrorCode KSPSolve_PREONLY(KSP ksp) { - PetscBool diagonalscale; + PetscReal norm; + PetscBool flg; PCFailedReason pcreason; PetscFunctionBegin; - PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale)); - PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name); + PetscCall(PCGetDiagonalScale(ksp->pc, &flg)); + PetscCheck(!flg, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name); if (!ksp->guess_zero) { - PetscBool flg; PetscCall(PetscObjectTypeCompareAny((PetscObject)ksp->pc, &flg, PCREDISTRIBUTE, PCMPI, "")); PetscCheck(flg, PetscObjectComm((PetscObject)ksp), PETSC_ERR_USER, "KSP of type preonly (application of preconditioner only) doesn't make sense with nonzero initial guess you probably want a KSP of type Richardson"); } ksp->its = 0; + if (ksp->numbermonitors) { + PetscCall(VecNorm(ksp->vec_rhs, NORM_2, &norm)); + PetscCall(KSPMonitor(ksp, 0, norm)); + } PetscCall(KSP_PCApply(ksp, ksp->vec_rhs, ksp->vec_sol)); - PetscCall(PCReduceFailedReason(ksp->pc)); PetscCall(PCGetFailedReason(ksp->pc, &pcreason)); PetscCall(VecFlag(ksp->vec_sol, pcreason)); @@ -32,14 +35,10 @@ static PetscErrorCode KSPSolve_PREONLY(KSP ksp) ksp->its = 1; ksp->reason = KSP_CONVERGED_ITS; } - if (ksp->numbermonitors) { - Vec v; - PetscReal norm; - Mat A; + Vec v; + Mat A; - PetscCall(VecNorm(ksp->vec_rhs, NORM_2, &norm)); - PetscCall(KSPMonitor(ksp, 0, norm)); PetscCall(VecDuplicate(ksp->vec_rhs, &v)); PetscCall(PCGetOperators(ksp->pc, &A, NULL)); PetscCall(KSP_MatMult(ksp, A, ksp->vec_sol, v)); diff --git a/src/ksp/ksp/tutorials/output/ex84_1.out b/src/ksp/ksp/tutorials/output/ex84_1.out index f255902cfea..7c492462d9a 100644 --- a/src/ksp/ksp/tutorials/output/ex84_1.out +++ b/src/ksp/ksp/tutorials/output/ex84_1.out @@ -34,11 +34,11 @@ Mat Object: 2 MPI processes row 3: (3, 1.) (4, 0.) row 4: (4, 6.) (5, -1.) row 5: (4, -0.5) (5, 7.) + 0 KSP Residual norm 2.449489742783e+00 Residual norms for redistribute_ solve. 0 KSP Residual norm 4.747000485090e-01 1 KSP Residual norm 2.445422843562e-02 2 KSP Residual norm 2.021518719930e-17 - 0 KSP Residual norm 2.449489742783e+00 1 KSP Residual norm 3.330669073875e-16 KSP Object: 2 MPI processes type: preonly @@ -168,9 +168,9 @@ PC Object: 2 MPI processes total: nonzeros=12, allocated nonzeros=12 total number of mallocs used during MatSetValues calls=0 not using I-node (on process 0) routines + 0 KSP Residual norm 2.449489742783e+00 Residual norms for redistribute_ solve. 0 KSP Residual norm 8.479468414379e-17 - 0 KSP Residual norm 2.449489742783e+00 1 KSP Residual norm 3.330669073875e-16 KSP Object: 2 MPI processes type: preonly @@ -300,9 +300,9 @@ PC Object: 2 MPI processes total: nonzeros=12, allocated nonzeros=12 total number of mallocs used during MatSetValues calls=0 not using I-node (on process 0) routines + 0 KSP Residual norm 2.449489742783e+00 Residual norms for redistribute_ solve. 0 KSP Residual norm 8.529808416651e-02 1 KSP Residual norm 1.682101381914e-03 2 KSP Residual norm 2.122130848081e-17 - 0 KSP Residual norm 2.449489742783e+00 1 KSP Residual norm 2.482534153247e-16 From b50c806f64fc3e40a985079f05cbb15ab20e2e4e Mon Sep 17 00:00:00 2001 From: Barry Smith Date: Tue, 15 Oct 2024 17:36:31 -0400 Subject: [PATCH 44/59] Fix handling of resetting SNES parameters in corner case If someone sets a SNES parameter to zero before setting the SNESType their value would be overwritten by SNESParametersInitialize. To prevent this call SNESParametersInitialize also in SNESCreate(). Reported-by: Alex Lindsay: --- src/snes/interface/snes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/snes/interface/snes.c b/src/snes/interface/snes.c index 73cafb4c6aa..0dd71689b1b 100644 --- a/src/snes/interface/snes.c +++ b/src/snes/interface/snes.c @@ -1928,6 +1928,7 @@ PetscErrorCode SNESCreate(MPI_Comm comm, SNES *outsnes) kctx->v4_m3 = 0.1; kctx->v4_m4 = 0.5; + PetscCall(SNESParametersInitialize(snes)); *outsnes = snes; PetscFunctionReturn(PETSC_SUCCESS); } From 0058e31da5457bf1c5638b1334e0281da385ec13 Mon Sep 17 00:00:00 2001 From: Barry Smith Date: Tue, 15 Oct 2024 17:36:40 -0400 Subject: [PATCH 45/59] Improve manual page formatting --- src/snes/impls/ls/ls.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/snes/impls/ls/ls.c b/src/snes/impls/ls/ls.c index 588ebd8b308..c193b697599 100644 --- a/src/snes/impls/ls/ls.c +++ b/src/snes/impls/ls/ls.c @@ -352,14 +352,14 @@ static PetscErrorCode SNESSetFromOptions_NEWTONLS(SNES snes, PetscOptionItems *P SNESNEWTONLS - Newton based nonlinear solver that uses a line search Options Database Keys: -+ -snes_linesearch_type - bt,basic. Select line search type -. -snes_linesearch_order <3> - 2, 3. Selects the order of the line search for bt -. -snes_linesearch_norms - Turns on/off computation of the norms for basic linesearch (`SNESLineSearchSetComputeNorms()`) -. -snes_linesearch_alpha - Sets alpha used in determining if reduction in function norm is sufficient -. -snes_linesearch_maxstep - Sets the maximum stepsize the line search will use (if the 2-norm(y) > maxstep then scale y to be y = (maxstep/2-norm(y)) *y) ++ -snes_linesearch_type - basic (or equivalently none), bt, l2, cp, nleqerr, shell. Select line search type, see `SNESLineSearchSetType()` +. -snes_linesearch_order <3> - 2, 3. Selects the order of the line search for bt, see `SNESLineSearchSetOrder()` +. -snes_linesearch_norms - Turns on/off computation of the norms for basic linesearch (`SNESLineSearchSetComputeNorms()`) +. -snes_linesearch_alpha - Sets alpha used in determining if reduction in function norm is sufficient +. -snes_linesearch_maxstep - Sets the maximum stepsize the line search will use (if the 2-norm(y) > maxstep then scale y to be y = (maxstep/2-norm(y)) *y) . -snes_linesearch_minlambda - Sets the minimum lambda the line search will tolerate -. -snes_linesearch_monitor - print information about progress of line searches -- -snes_linesearch_damping - damping factor used for basic line search +. -snes_linesearch_monitor - print information about the progress of line searches +- -snes_linesearch_damping - damping factor used for basic line search Level: beginner @@ -367,7 +367,7 @@ static PetscErrorCode SNESSetFromOptions_NEWTONLS(SNES snes, PetscOptionItems *P This is the default nonlinear solver in `SNES` .seealso: [](ch_snes), `SNESCreate()`, `SNES`, `SNESSetType()`, `SNESNEWTONTR`, `SNESQN`, `SNESLineSearchSetType()`, `SNESLineSearchSetOrder()` - `SNESLineSearchSetPostCheck()`, `SNESLineSearchSetPreCheck()` `SNESLineSearchSetComputeNorms()`, `SNESGetLineSearch()` + `SNESLineSearchSetPostCheck()`, `SNESLineSearchSetPreCheck()` `SNESLineSearchSetComputeNorms()`, `SNESGetLineSearch()`, `SNESLineSearchSetType()` M*/ PETSC_EXTERN PetscErrorCode SNESCreate_NEWTONLS(SNES snes) { From 1f2d607bfb32d48f34baff2a9b0d789e764af593 Mon Sep 17 00:00:00 2001 From: Barry Smith Date: Tue, 15 Oct 2024 21:33:48 -0400 Subject: [PATCH 46/59] Update PETSc developers list and news about Hong's retirement --- doc/community/petsc_team.rst | 40 ++++++++++++++++++------------------ doc/index.rst | 5 +++++ 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/doc/community/petsc_team.rst b/doc/community/petsc_team.rst index 860ef42b15f..9eb3fea6f1d 100644 --- a/doc/community/petsc_team.rst +++ b/doc/community/petsc_team.rst @@ -50,14 +50,6 @@ The full list of contributors can be found `on GitLab `, :any:`requests ` and :any:`contributions ` are welcome. +.. admonition:: News: Mrs Hong Zhang, who has been a PETSc developer for twenty-five years and mentored many students and future PETSc developers, has retired. + + .. image:: /images/community/HongZhangDinner.jpg + :align: center + .. admonition:: News: PETSc is associated with `NumFOCUS `__, a 501(c)(3) nonprofit supporting open code and reproducible science, through which you can help support PETSc. From 2d56c195196ca5cc59e93282610af6ff95b9946d Mon Sep 17 00:00:00 2001 From: Lisandro Dalcin Date: Thu, 17 Oct 2024 17:02:32 +0300 Subject: [PATCH 47/59] petsc4py: Fix Mat.zeroRowsColumnsStencil() --- src/binding/petsc4py/src/petsc4py/PETSc/Mat.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/binding/petsc4py/src/petsc4py/PETSc/Mat.pyx b/src/binding/petsc4py/src/petsc4py/PETSc/Mat.pyx index 201eb4fb46a..8fb1bcef54a 100644 --- a/src/binding/petsc4py/src/petsc4py/PETSc/Mat.pyx +++ b/src/binding/petsc4py/src/petsc4py/PETSc/Mat.pyx @@ -3242,12 +3242,10 @@ cdef class Mat(Object): """ cdef PetscScalar sval = asScalar(diag) cdef PetscInt nrows = asInt(len(rows)) - cdef MatStencil r = 0 cdef PetscMatStencil *crows = NULL CHKERR(PetscMalloc((nrows+1)*sizeof(PetscMatStencil), &crows)) for i in range(nrows): - r = rows[i] - crows[i] = r.stencil + crows[i] = (rows[i]).stencil cdef PetscVec xvec = NULL, bvec = NULL if x is not None: xvec = x.vec if b is not None: bvec = b.vec From 32fc4d459f43070c84a59467dd4c327a7a0cd12e Mon Sep 17 00:00:00 2001 From: Barry Smith Date: Thu, 17 Oct 2024 15:54:37 -0400 Subject: [PATCH 48/59] Add documents from PETSc 2024 to petsc-annual-meetings website --- doc/community/meetings/2024/index.rst | 50 +++++++++++++++++++++++++++ doc/community/meetings/meeting.rst | 4 +-- 2 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 doc/community/meetings/2024/index.rst diff --git a/doc/community/meetings/2024/index.rst b/doc/community/meetings/2024/index.rst new file mode 100644 index 00000000000..6ca0cc4bbf2 --- /dev/null +++ b/doc/community/meetings/2024/index.rst @@ -0,0 +1,50 @@ +:orphan: + +.. _2024_meeting: + + +2024 Annual PETSc Meeting +************************* + +Cologne, Germany; May 23-24, 2024 + +`Original meeting website `__ + +.. image:: https://petsc.gitlab.io/annual-meetings/2024/GroupPhoto.jpg + :width: 800 + :alt: PETSc User Meeting 2024 group photo + +Scientific committee: +- Dr. Pierre Jolivet Sorbonne Université, CNRS + +- Prof. Dr. Axel Klawonn University of Cologne + +- Prof. Dr. Matthew Knepley University at Buffalo + +- Dr. Martin Lanser University of Cologne + +- Prof. Dr. Oliver Rheinbach TU Bergakademie Freiberg + +- Dr. Janine Weber University of Cologne + +- Dr. Stefano Zampini King Abdullah University of Science and Technology + +Local Organizing Committee +- Prof. Dr. Axel Klawonn + +- Dr. Jascha Knepper + +- Dr. Martin Lanser + +- Dr. Janine Weber + +.. image:: https://petsc.gitlab.io/annual-meetings/2024/banner_PETSc_2024.jpg + :width: 800 + +.. image:: https://petsc.gitlab.io/annual-meetings/2024/csm_PETSc_2024_Schedule_Overview.jpg + :width: 800 + + +- `Abstracts `__ + +- `Detailed schedule `__ diff --git a/doc/community/meetings/meeting.rst b/doc/community/meetings/meeting.rst index ab758170aa7..aeb25082a0b 100644 --- a/doc/community/meetings/meeting.rst +++ b/doc/community/meetings/meeting.rst @@ -28,12 +28,12 @@ simulations by scientists and engineers. Upcoming Meetings ================= -- The 2025 user meeting will take place in Buffalo, New York, USA. +- The 2025 user meeting will take place May 21-22, 2024 in Buffalo, New York, USA. Previous Meetings ================= -- `Cologne, Germany; May 23-24, 2024 `__ +- :any:`Cologne, Germany; May 23-24, 2024 <2024_meeting>` - :any:`Chicago, Illinois, USA; June 5-7, 2023 <2023_meeting>` - `Atlanta, Georgia, USA; June 5-7, 2019 `__ - `London, UK; June 4-6, 2018 `__ From c4a4d5b1c7d85f8a77640315f9201f0dd97b0f89 Mon Sep 17 00:00:00 2001 From: Barry Smith Date: Thu, 17 Oct 2024 22:42:57 -0400 Subject: [PATCH 49/59] Have better boxed errors for some messages --- config/configure.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/config/configure.py b/config/configure.py index d8f94b20744..4a31f3085be 100755 --- a/config/configure.py +++ b/config/configure.py @@ -46,7 +46,7 @@ def check_for_option_mistakes(opts): def check_for_unsupported_combinations(opts): if '--with-precision=single' in opts and '--with-clanguage=cxx' in opts and '--with-scalar-type=complex' in opts: - sys.exit(ValueError('PETSc does not support single precision complex with C++ clanguage, run with --with-clanguage=c')) + raise ValueError('PETSc does not support single precision complex with C++ clanguage, run with --with-clanguage=c') def check_for_option_changed(opts): # Document changes in command line options here. (matlab-engine is deprecated, no longer needed but still allowed) @@ -97,10 +97,10 @@ def chkenable(): for l in range(0,len(sys.argv)): name = sys.argv[l] if name.find(no_break_space) >= 0: - sys.exit(ValueError('Unicode NO-BREAK SPACE char found in arguments! Please rerun configure using regular space chars: %s' % [name])) + raise ValueError('Unicode NO-BREAK SPACE char found in arguments! Please rerun configure using regular space chars: %s' % [name]) name = name.replace(en_dash,'-') if hasattr(name,'isprintable') and not name.isprintable(): - sys.exit(ValueError('Non-printable characters or control characters found in arguments! Please rerun configure using only printable character arguments: %s' % [name])) + raise ValueError('Non-printable characters or control characters found in arguments! Please rerun configure using only printable character arguments: %s' % [name]) if name.lstrip('-').startswith('enable-cxx'): if name.find('=') == -1: name = name.replace('enable-cxx','with-clanguage=C++',1) @@ -180,7 +180,7 @@ def chksynonyms(): elif int(tail)==64: name = '--with-64-bit-indices=1' else: - raise RuntimeError('--with-index-size= must be 32 or 64') + raise ValueError('--with-index-size= must be 32 or 64') if name.find('with-precision=') >=0: head,tail = name.split('=',1) @@ -416,19 +416,18 @@ def petsc_configure(configure_options): sys.argv = sys.argv[:1] + configure_options + sys.argv[1:] check_for_option_mistakes(sys.argv) check_for_option_changed(sys.argv) + check_for_unsupported_combinations(sys.argv) + + check_petsc_arch(sys.argv) + check_broken_configure_log_links() + + #rename '--enable-' to '--with-' + chkenable() + # support a few standard configure option types + chksynonyms() except (TypeError, ValueError) as e: msg = logger.build_multiline_error_message('ERROR in COMMAND LINE ARGUMENT to ./configure', str(e)) sys.exit(msg) - # check PETSC_ARCH - check_for_unsupported_combinations(sys.argv) - check_petsc_arch(sys.argv) - check_broken_configure_log_links() - - #rename '--enable-' to '--with-' - chkenable() - # support a few standard configure option types - chksynonyms() - # Check for broken cygwin chkbrokencygwin() # Disable threads on RHL9 chkrhl9() From c6b850fd0e87c272917932306164348f5edce4ae Mon Sep 17 00:00:00 2001 From: Satish Balay Date: Fri, 18 Oct 2024 10:40:12 -0500 Subject: [PATCH 50/59] python-3.12 removed distutils, and suggests using it from setuptools - so add in this check for petsc4py --- config/BuildSystem/config/packages/petsc4py.py | 3 +++ config/BuildSystem/config/packages/python.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/config/BuildSystem/config/packages/petsc4py.py b/config/BuildSystem/config/packages/petsc4py.py index 4a582430808..8831db1e5b5 100644 --- a/config/BuildSystem/config/packages/petsc4py.py +++ b/config/BuildSystem/config/packages/petsc4py.py @@ -121,9 +121,12 @@ def Install(self): return self.installDir def configureLibrary(self): + import sys if not self.sharedLibraries.useShared and not self.setCompilers.isCygwin(self.log): raise RuntimeError('petsc4py requires PETSc be built with shared libraries; rerun with --with-shared-libraries') chkpkgs = ['numpy'] + if sys.version_info >= (3, 12): + chkpkgs.append('setuptools') npkgs = [] for pkg in chkpkgs: if not getattr(self.python,pkg): npkgs.append(pkg) diff --git a/config/BuildSystem/config/packages/python.py b/config/BuildSystem/config/packages/python.py index e02ba2cd777..acb080b13fe 100644 --- a/config/BuildSystem/config/packages/python.py +++ b/config/BuildSystem/config/packages/python.py @@ -5,6 +5,7 @@ def __init__(self, framework): config.package.Package.__init__(self, framework) self.pyver = None self.cyver = None + self.setuptools = 0 self.cython = 0 self.numpy = 0 return @@ -32,6 +33,12 @@ def configure(self): except: self.logPrint('Unable to determine version of',self.pyexe) + try: + output,err1,ret1 = config.package.Package.executeShellCommand([self.pyexe,'-c','import setuptools;print(setuptools.__version__)'],timeout=60, log = self.log) + self.setuptools = 1 + except: + self.logPrint('Python being used '+self.pyexe+' does not have the setuptools package') + try: self.cyver,err1,ret1 = config.package.Package.executeShellCommand([self.pyexe,'-c','import cython;print(cython.__version__)'],timeout=60, log = self.log) self.cython = 1 From 2e1d0745e031e9eda42a183866b5a94ccb2b4e44 Mon Sep 17 00:00:00 2001 From: "Jose E. Roman" Date: Sat, 19 Oct 2024 12:11:25 +0200 Subject: [PATCH 51/59] Fix ranlib warnings 'file has no symbols' These warnings appear in macOS when configured --with-shared-libraries=0 /Library/Developer/CommandLineTools/usr/bin/ranlib: file: arch-darwin-c-debug-noshared/lib/libpetsc.a(sectionhdf5.o) has no symbols /Library/Developer/CommandLineTools/usr/bin/ranlib: file: arch-darwin-c-debug-noshared/lib/libpetsc.a(zhdf5io.o) has no symbols /Library/Developer/CommandLineTools/usr/bin/ranlib: file: arch-darwin-c-debug-noshared/lib/libpetsc.a(hdf5io.o) has no symbols /Library/Developer/CommandLineTools/usr/bin/ranlib: file: arch-darwin-c-debug-noshared/lib/libpetsc.a(aijhdf5.o) has no symbols /Library/Developer/CommandLineTools/usr/bin/ranlib: file: arch-darwin-c-debug-noshared/lib/libpetsc.a(baijfact81.o) has no symbols /Library/Developer/CommandLineTools/usr/bin/ranlib: file: arch-darwin-c-debug-noshared/lib/libpetsc.a(densehdf5.o) has no symbols /Library/Developer/CommandLineTools/usr/bin/ranlib: file: arch-darwin-c-debug-noshared/lib/libpetsc.a(feceed.o) has no symbols /Library/Developer/CommandLineTools/usr/bin/ranlib: file: arch-darwin-c-debug-noshared/lib/libpetsc.a(fvceed.o) has no symbols /Library/Developer/CommandLineTools/usr/bin/ranlib: file: arch-darwin-c-debug-noshared/lib/libpetsc.a(plexhdf5xdmf.o) has no symbols Includes the removal of unnecessary custom fortran stubs. --- src/dm/dt/fe/interface/{ => ceed}/feceed.c | 5 +-- src/dm/dt/fe/interface/ceed/makefile | 8 +++++ src/dm/dt/fv/interface/{ => ceed}/fvceed.c | 5 +-- src/dm/dt/fv/interface/ceed/makefile | 8 +++++ src/dm/impls/plex/hdf5/makefile | 9 ++++++ src/dm/impls/plex/{ => hdf5}/plexhdf5.c | 20 ++++++------ src/dm/impls/plex/{ => hdf5}/plexhdf5xdmf.c | 2 -- src/dm/interface/dlregisdmdm.c | 4 ++- src/mat/impls/aij/seq/{ => hdf5}/aijhdf5.c | 2 -- src/mat/impls/aij/seq/hdf5/makefile | 7 +++++ .../impls/dense/seq/{ => hdf5}/densehdf5.c | 16 +++++----- src/mat/impls/dense/seq/hdf5/makefile | 7 +++++ src/vec/is/section/interface/hdf5/makefile | 8 +++++ .../interface/{ => hdf5}/sectionhdf5.c | 31 +++++++++---------- src/vec/is/utils/ftn-custom/zhdf5io.c | 22 ------------- src/vec/is/utils/hdf5/hdf5io.c | 21 +++++-------- src/vec/is/utils/hdf5/makefile | 1 + 17 files changed, 91 insertions(+), 85 deletions(-) rename src/dm/dt/fe/interface/{ => ceed}/feceed.c (96%) create mode 100644 src/dm/dt/fe/interface/ceed/makefile rename src/dm/dt/fv/interface/{ => ceed}/fvceed.c (96%) create mode 100644 src/dm/dt/fv/interface/ceed/makefile create mode 100644 src/dm/impls/plex/hdf5/makefile rename src/dm/impls/plex/{ => hdf5}/plexhdf5.c (99%) rename src/dm/impls/plex/{ => hdf5}/plexhdf5xdmf.c (99%) rename src/mat/impls/aij/seq/{ => hdf5}/aijhdf5.c (99%) create mode 100644 src/mat/impls/aij/seq/hdf5/makefile rename src/mat/impls/dense/seq/{ => hdf5}/densehdf5.c (92%) create mode 100644 src/mat/impls/dense/seq/hdf5/makefile create mode 100644 src/vec/is/section/interface/hdf5/makefile rename src/vec/is/section/interface/{ => hdf5}/sectionhdf5.c (97%) delete mode 100644 src/vec/is/utils/ftn-custom/zhdf5io.c diff --git a/src/dm/dt/fe/interface/feceed.c b/src/dm/dt/fe/interface/ceed/feceed.c similarity index 96% rename from src/dm/dt/fe/interface/feceed.c rename to src/dm/dt/fe/interface/ceed/feceed.c index c7e57197396..797a365983f 100644 --- a/src/dm/dt/fe/interface/feceed.c +++ b/src/dm/dt/fe/interface/ceed/feceed.c @@ -1,7 +1,6 @@ #include /*I "petscfe.h" I*/ -#ifdef PETSC_HAVE_LIBCEED - #include +#include /*@C PetscFESetCeed - Set the `Ceed` object to a `PetscFE` @@ -64,5 +63,3 @@ PetscErrorCode PetscFEGetCeedBasis(PetscFE fe, CeedBasis *basis) *basis = fe->ceedBasis; PetscFunctionReturn(PETSC_SUCCESS); } - -#endif diff --git a/src/dm/dt/fe/interface/ceed/makefile b/src/dm/dt/fe/interface/ceed/makefile new file mode 100644 index 00000000000..bf62208533c --- /dev/null +++ b/src/dm/dt/fe/interface/ceed/makefile @@ -0,0 +1,8 @@ +-include ../../../../../../petscdir.mk +#requiresdefine 'PETSC_HAVE_LIBCEED' + +MANSEC = DM +SUBMANSEC = FE + +include ${PETSC_DIR}/lib/petsc/conf/variables +include ${PETSC_DIR}/lib/petsc/conf/rules_doc.mk diff --git a/src/dm/dt/fv/interface/fvceed.c b/src/dm/dt/fv/interface/ceed/fvceed.c similarity index 96% rename from src/dm/dt/fv/interface/fvceed.c rename to src/dm/dt/fv/interface/ceed/fvceed.c index 96573a688fe..dc37c070dd4 100644 --- a/src/dm/dt/fv/interface/fvceed.c +++ b/src/dm/dt/fv/interface/ceed/fvceed.c @@ -1,7 +1,6 @@ #include /*I "petscfv.h" I*/ -#ifdef PETSC_HAVE_LIBCEED - #include +#include /*@C PetscFVSetCeed - Set the `Ceed` object to a `PetscFV` @@ -61,5 +60,3 @@ PetscErrorCode PetscFVGetCeedBasis(PetscFV fv, CeedBasis *basis) *basis = fv->ceedBasis; PetscFunctionReturn(PETSC_SUCCESS); } - -#endif diff --git a/src/dm/dt/fv/interface/ceed/makefile b/src/dm/dt/fv/interface/ceed/makefile new file mode 100644 index 00000000000..cad1ad81e08 --- /dev/null +++ b/src/dm/dt/fv/interface/ceed/makefile @@ -0,0 +1,8 @@ +-include ../../../../../../petscdir.mk +#requiresdefine 'PETSC_HAVE_LIBCEED' + +MANSEC = DM +SUBMANSEC = FV + +include ${PETSC_DIR}/lib/petsc/conf/variables +include ${PETSC_DIR}/lib/petsc/conf/rules_doc.mk diff --git a/src/dm/impls/plex/hdf5/makefile b/src/dm/impls/plex/hdf5/makefile new file mode 100644 index 00000000000..1def181b1c1 --- /dev/null +++ b/src/dm/impls/plex/hdf5/makefile @@ -0,0 +1,9 @@ +-include ../../../../../petscdir.mk +#requiresdefine 'PETSC_HAVE_HDF5' + +CPPFLAGS = ${NETCDF_INCLUDE} ${PNETCDF_INCLUDE} ${EXODUSII_INCLUDE} +MANSEC = DM +SUBMANSEC = DMPlex + +include ${PETSC_DIR}/lib/petsc/conf/variables +include ${PETSC_DIR}/lib/petsc/conf/rules_doc.mk diff --git a/src/dm/impls/plex/plexhdf5.c b/src/dm/impls/plex/hdf5/plexhdf5.c similarity index 99% rename from src/dm/impls/plex/plexhdf5.c rename to src/dm/impls/plex/hdf5/plexhdf5.c index 5391e0d3509..361340e2e51 100644 --- a/src/dm/impls/plex/plexhdf5.c +++ b/src/dm/impls/plex/hdf5/plexhdf5.c @@ -7,7 +7,6 @@ /* Logging support */ PetscLogEvent DMPLEX_DistributionView, DMPLEX_DistributionLoad; -#if defined(PETSC_HAVE_HDF5) static PetscErrorCode PetscViewerParseVersion_Private(PetscViewer, const char[], DMPlexStorageVersion *); static PetscErrorCode PetscViewerCheckVersion_Private(PetscViewer, DMPlexStorageVersion); static PetscErrorCode PetscViewerAttachVersion_Private(PetscViewer, const char[], DMPlexStorageVersion); @@ -1360,7 +1359,7 @@ static PetscErrorCode DMPlexCoordinatesView_HDF5_XDMF_Private(DM dm, PetscViewer ncoords[coordSize++] = coords[off + 0]; ncoords[coordSize++] = PetscSinReal(2.0 * PETSC_PI * PetscRealPart(coords[off + 1]) / L[1]) * (L[1] / (2.0 * PETSC_PI)); ncoords[coordSize++] = -PetscCosReal(2.0 * PETSC_PI * PetscRealPart(coords[off + 1]) / L[1]) * (L[1] / (2.0 * PETSC_PI)); - #if 0 +#if 0 } else if ((bd[0] == DM_BOUNDARY_TWIST)) { PetscReal phi, r, R; /* Mobius strip */ @@ -1374,7 +1373,7 @@ static PetscErrorCode DMPlexCoordinatesView_HDF5_XDMF_Private(DM dm, PetscViewer ncoords[coordSize++] = -PetscCosReal(phi) * (R + r * PetscCosReal(phi/2.0)); ncoords[coordSize++] = PetscSinReal(phi/2.0) * r; ncoords[coordSize++] = PetscSinReal(phi) * (R + r * PetscCosReal(phi/2.0)); - #endif +#endif } else SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Cannot handle periodicity in this domain"); } else { for (d = 0; d < dof; ++d, ++coordSize) ncoords[coordSize] = coords[off + d]; @@ -2323,8 +2322,8 @@ static PetscErrorCode PlexLayerDistribute_Private(PlexLayer layer, PetscSF cellL PetscFunctionReturn(PETSC_SUCCESS); } - //TODO share code with DMPlexBuildFromCellListParallel() - #include +//TODO share code with DMPlexBuildFromCellListParallel() +#include static PetscErrorCode PlexLayerCreateSFs_Private(PlexLayer layer, PetscSF *vertexOverlapSF, PetscSF *sfXC) { PetscLayout vertexLayout = layer->vertexLayout; @@ -2905,8 +2904,8 @@ PetscErrorCode DMPlexSectionLoad_HDF5_Internal(DM dm, PetscViewer viewer, DM sec PetscCall(PetscSectionSetUp(sectionA)); } PetscCall(PetscSectionGetChart(sectionA, NULL, &n)); - /* Create sfAB: A -> B */ - #if defined(PETSC_USE_DEBUG) +/* Create sfAB: A -> B */ +#if defined(PETSC_USE_DEBUG) { PetscInt N, N1; @@ -2914,7 +2913,7 @@ PetscErrorCode DMPlexSectionLoad_HDF5_Internal(DM dm, PetscViewer viewer, DM sec PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); PetscCheck(N1 == N, comm, PETSC_ERR_ARG_SIZ, "Mismatching sizes: on-disk order array size (%" PetscInt_FMT ") != number of loaded section points (%" PetscInt_FMT ")", N1, N); } - #endif +#endif { IS orderIS; const PetscInt *gpoints; @@ -3057,7 +3056,7 @@ PetscErrorCode DMPlexVecLoad_HDF5_Internal(DM dm, PetscViewer viewer, DM section PetscCall(DMGetPointSF(dm, &pointsf)); PetscCall(DMGetPointSF(sectiondm, &pointsf1)); PetscCheck(pointsf1 == pointsf, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatching point SFs for dm and sectiondm"); - #if defined(PETSC_USE_DEBUG) +#if defined(PETSC_USE_DEBUG) { PetscInt MA, MA1; @@ -3065,7 +3064,7 @@ PetscErrorCode DMPlexVecLoad_HDF5_Internal(DM dm, PetscViewer viewer, DM section PetscCall(PetscViewerHDF5ReadSizes(viewer, vec_name, NULL, &MA1)); PetscCheck(MA1 == MA, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Total SF root size (%" PetscInt_FMT ") != On-disk vector data size (%" PetscInt_FMT ")", MA, MA1); } - #endif +#endif PetscCall(VecGetLocalSize(vec, &m1)); PetscCheck(m1 >= m, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Target vector size (%" PetscInt_FMT ") < SF leaf size (%" PetscInt_FMT ")", m1, m); for (i = 0; i < m; ++i) { @@ -3092,4 +3091,3 @@ PetscErrorCode DMPlexVecLoad_HDF5_Internal(DM dm, PetscViewer viewer, DM section PetscCall(PetscViewerHDF5PopGroup(viewer)); PetscFunctionReturn(PETSC_SUCCESS); } -#endif diff --git a/src/dm/impls/plex/plexhdf5xdmf.c b/src/dm/impls/plex/hdf5/plexhdf5xdmf.c similarity index 99% rename from src/dm/impls/plex/plexhdf5xdmf.c rename to src/dm/impls/plex/hdf5/plexhdf5xdmf.c index b176b3cd4ad..c8cd57da942 100644 --- a/src/dm/impls/plex/plexhdf5xdmf.c +++ b/src/dm/impls/plex/hdf5/plexhdf5xdmf.c @@ -3,7 +3,6 @@ #include #include -#if defined(PETSC_HAVE_HDF5) static PetscErrorCode SplitPath_Private(char path[], char name[]) { char *tmp; @@ -169,4 +168,3 @@ PetscErrorCode DMPlexLoad_HDF5_Xdmf_Internal(DM dm, PetscViewer viewer) /* PetscCall(DMPlexLabelsLoad_HDF5_Internal(dm, viewer)); */ PetscFunctionReturn(PETSC_SUCCESS); } -#endif diff --git a/src/dm/interface/dlregisdmdm.c b/src/dm/interface/dlregisdmdm.c index db6b0fef880..8bb99130b30 100644 --- a/src/dm/interface/dlregisdmdm.c +++ b/src/dm/interface/dlregisdmdm.c @@ -116,14 +116,16 @@ PetscErrorCode DMInitializePackage(void) PetscCall(PetscLogEventRegister("DMPlexRebalance", DM_CLASSID, &DMPLEX_RebalanceSharedPoints)); PetscCall(PetscLogEventRegister("DMPlexLocatePoints", DM_CLASSID, &DMPLEX_LocatePoints)); PetscCall(PetscLogEventRegister("DMPlexTopologyView", DM_CLASSID, &DMPLEX_TopologyView)); - PetscCall(PetscLogEventRegister("DMPlexDistributionView", DM_CLASSID, &DMPLEX_DistributionView)); PetscCall(PetscLogEventRegister("DMPlexLabelsView", DM_CLASSID, &DMPLEX_LabelsView)); PetscCall(PetscLogEventRegister("DMPlexCoordinatesView", DM_CLASSID, &DMPLEX_CoordinatesView)); PetscCall(PetscLogEventRegister("DMPlexSectionView", DM_CLASSID, &DMPLEX_SectionView)); PetscCall(PetscLogEventRegister("DMPlexGlobalVectorView", DM_CLASSID, &DMPLEX_GlobalVectorView)); PetscCall(PetscLogEventRegister("DMPlexLocalVectorView", DM_CLASSID, &DMPLEX_LocalVectorView)); PetscCall(PetscLogEventRegister("DMPlexTopologyLoad", DM_CLASSID, &DMPLEX_TopologyLoad)); +#if defined(PETSC_HAVE_HDF5) + PetscCall(PetscLogEventRegister("DMPlexDistributionView", DM_CLASSID, &DMPLEX_DistributionView)); PetscCall(PetscLogEventRegister("DMPlexDistributionLoad", DM_CLASSID, &DMPLEX_DistributionLoad)); +#endif PetscCall(PetscLogEventRegister("DMPlexLabelsLoad", DM_CLASSID, &DMPLEX_LabelsLoad)); PetscCall(PetscLogEventRegister("DMPlexCoordinatesLoad", DM_CLASSID, &DMPLEX_CoordinatesLoad)); PetscCall(PetscLogEventRegister("DMPlexSectionLoad", DM_CLASSID, &DMPLEX_SectionLoad)); diff --git a/src/mat/impls/aij/seq/aijhdf5.c b/src/mat/impls/aij/seq/hdf5/aijhdf5.c similarity index 99% rename from src/mat/impls/aij/seq/aijhdf5.c rename to src/mat/impls/aij/seq/hdf5/aijhdf5.c index 933575f6b33..1d34aa2ec12 100644 --- a/src/mat/impls/aij/seq/aijhdf5.c +++ b/src/mat/impls/aij/seq/hdf5/aijhdf5.c @@ -3,7 +3,6 @@ #include #include -#if defined(PETSC_HAVE_HDF5) PetscErrorCode MatLoad_AIJ_HDF5(Mat mat, PetscViewer viewer) { PetscViewerFormat format; @@ -161,4 +160,3 @@ PetscErrorCode MatLoad_AIJ_HDF5(Mat mat, PetscViewer viewer) PetscCall(VecDestroy(&vec_a)); PetscFunctionReturn(PETSC_SUCCESS); } -#endif diff --git a/src/mat/impls/aij/seq/hdf5/makefile b/src/mat/impls/aij/seq/hdf5/makefile new file mode 100644 index 00000000000..b8c37d9bbd5 --- /dev/null +++ b/src/mat/impls/aij/seq/hdf5/makefile @@ -0,0 +1,7 @@ +-include ../../../../../../petscdir.mk +#requiresdefine 'PETSC_HAVE_HDF5' + +MANSEC = Mat + +include ${PETSC_DIR}/lib/petsc/conf/variables +include ${PETSC_DIR}/lib/petsc/conf/rules_doc.mk diff --git a/src/mat/impls/dense/seq/densehdf5.c b/src/mat/impls/dense/seq/hdf5/densehdf5.c similarity index 92% rename from src/mat/impls/dense/seq/densehdf5.c rename to src/mat/impls/dense/seq/hdf5/densehdf5.c index 2e1067246a4..e1f46979b89 100644 --- a/src/mat/impls/dense/seq/densehdf5.c +++ b/src/mat/impls/dense/seq/hdf5/densehdf5.c @@ -7,7 +7,6 @@ #include #include -#if defined(PETSC_HAVE_HDF5) PetscErrorCode MatLoad_Dense_HDF5(Mat mat, PetscViewer viewer) { PetscViewer_HDF5 *hdf5; @@ -35,15 +34,15 @@ PetscErrorCode MatLoad_Dense_HDF5(Mat mat, PetscViewer viewer) hdf5->horizontal = PETSC_TRUE; PetscCheck(((PetscObject)mat)->name, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Mat name must be set with PetscObjectSetName() before MatLoad()"); - #if defined(PETSC_USE_REAL_SINGLE) +#if defined(PETSC_USE_REAL_SINGLE) scalartype = H5T_NATIVE_FLOAT; - #elif defined(PETSC_USE_REAL___FLOAT128) - #error "HDF5 output with 128 bit floats not supported." - #elif defined(PETSC_USE_REAL___FP16) - #error "HDF5 output with 16 bit floats not supported." - #else +#elif defined(PETSC_USE_REAL___FLOAT128) + #error "HDF5 output with 128 bit floats not supported." +#elif defined(PETSC_USE_REAL___FP16) + #error "HDF5 output with 16 bit floats not supported." +#else scalartype = H5T_NATIVE_DOUBLE; - #endif +#endif PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); PetscCallMPI(MPI_Comm_rank(comm, &rank)); @@ -92,4 +91,3 @@ PetscErrorCode MatLoad_Dense_HDF5(Mat mat, PetscViewer viewer) PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); PetscFunctionReturn(PETSC_SUCCESS); } -#endif diff --git a/src/mat/impls/dense/seq/hdf5/makefile b/src/mat/impls/dense/seq/hdf5/makefile new file mode 100644 index 00000000000..b8c37d9bbd5 --- /dev/null +++ b/src/mat/impls/dense/seq/hdf5/makefile @@ -0,0 +1,7 @@ +-include ../../../../../../petscdir.mk +#requiresdefine 'PETSC_HAVE_HDF5' + +MANSEC = Mat + +include ${PETSC_DIR}/lib/petsc/conf/variables +include ${PETSC_DIR}/lib/petsc/conf/rules_doc.mk diff --git a/src/vec/is/section/interface/hdf5/makefile b/src/vec/is/section/interface/hdf5/makefile new file mode 100644 index 00000000000..e168832dd44 --- /dev/null +++ b/src/vec/is/section/interface/hdf5/makefile @@ -0,0 +1,8 @@ +-include ../../../../../../petscdir.mk +#requiresdefine 'PETSC_HAVE_HDF5' + +MANSEC = Vec +SUBMANSEC = PetscSection + +include ${PETSC_DIR}/lib/petsc/conf/variables +include ${PETSC_DIR}/lib/petsc/conf/rules_doc.mk diff --git a/src/vec/is/section/interface/sectionhdf5.c b/src/vec/is/section/interface/hdf5/sectionhdf5.c similarity index 97% rename from src/vec/is/section/interface/sectionhdf5.c rename to src/vec/is/section/interface/hdf5/sectionhdf5.c index f5924087a35..ac3cd42cd04 100644 --- a/src/vec/is/section/interface/sectionhdf5.c +++ b/src/vec/is/section/interface/hdf5/sectionhdf5.c @@ -4,7 +4,6 @@ #include #include -#if defined(PETSC_HAVE_HDF5) static PetscErrorCode PetscSectionView_HDF5_SingleField(PetscSection s, PetscViewer viewer) { MPI_Comm comm; @@ -176,9 +175,9 @@ static PetscErrorCode PetscSectionLoad_HDF5_SingleField(PetscSection s, PetscVie { MPI_Comm comm; PetscInt pStart, pEnd, p, N, n, M, m; - #if defined(PETSC_USE_DEBUG) +#if defined(PETSC_USE_DEBUG) PetscInt N1, M1; - #endif +#endif PetscBool hasConstraints, includesConstraints; IS dofIS, offIS, cdofIS, coffIS, cindIS; const PetscInt *dofs, *offs, *cdofs; @@ -190,15 +189,15 @@ static PetscErrorCode PetscSectionLoad_HDF5_SingleField(PetscSection s, PetscVie PetscCall(PetscSectionSetIncludesConstraints(s, includesConstraints)); PetscCall(PetscSectionGetChart(s, &pStart, &pEnd)); n = pEnd - pStart; - #if defined(PETSC_USE_DEBUG) +#if defined(PETSC_USE_DEBUG) PetscCallMPI(MPIU_Allreduce(&n, &N1, 1, MPIU_INT, MPI_SUM, comm)); - #endif +#endif PetscCall(ISCreate(comm, &dofIS)); PetscCall(PetscObjectSetName((PetscObject)dofIS, "atlasDof")); PetscCall(PetscViewerHDF5ReadSizes(viewer, "atlasDof", NULL, &N)); - #if defined(PETSC_USE_DEBUG) +#if defined(PETSC_USE_DEBUG) PetscCheck(N1 == N, comm, PETSC_ERR_ARG_SIZ, "Unable to load s->atlasDof: sum of local sizes (%" PetscInt_FMT ") != global size (%" PetscInt_FMT "): local size on this process is %" PetscInt_FMT, N1, N, n); - #endif +#endif PetscCall(ISGetLayout(dofIS, &map)); PetscCall(PetscLayoutSetSize(map, N)); PetscCall(PetscLayoutSetLocalSize(map, n)); @@ -206,9 +205,9 @@ static PetscErrorCode PetscSectionLoad_HDF5_SingleField(PetscSection s, PetscVie PetscCall(ISCreate(comm, &offIS)); PetscCall(PetscObjectSetName((PetscObject)offIS, "atlasOff")); PetscCall(PetscViewerHDF5ReadSizes(viewer, "atlasOff", NULL, &N)); - #if defined(PETSC_USE_DEBUG) +#if defined(PETSC_USE_DEBUG) PetscCheck(N1 == N, comm, PETSC_ERR_ARG_SIZ, "Unable to load s->atlasOff: sum of local sizes (%" PetscInt_FMT ") != global size (%" PetscInt_FMT "): local size on this process is %" PetscInt_FMT, N1, N, n); - #endif +#endif PetscCall(ISGetLayout(offIS, &map)); PetscCall(PetscLayoutSetSize(map, N)); PetscCall(PetscLayoutSetLocalSize(map, n)); @@ -229,9 +228,9 @@ static PetscErrorCode PetscSectionLoad_HDF5_SingleField(PetscSection s, PetscVie PetscCall(ISCreate(comm, &cdofIS)); PetscCall(PetscObjectSetName((PetscObject)cdofIS, "atlasDof")); PetscCall(PetscViewerHDF5ReadSizes(viewer, "atlasDof", NULL, &N)); - #if defined(PETSC_USE_DEBUG) +#if defined(PETSC_USE_DEBUG) PetscCheck(N1 == N, comm, PETSC_ERR_ARG_SIZ, "Unable to load s->bc->atlasDof: sum of local sizes (%" PetscInt_FMT ") != global size (%" PetscInt_FMT "): local size on this process is %" PetscInt_FMT, N1, N, n); - #endif +#endif PetscCall(ISGetLayout(cdofIS, &map)); PetscCall(PetscLayoutSetSize(map, N)); PetscCall(PetscLayoutSetLocalSize(map, n)); @@ -243,9 +242,9 @@ static PetscErrorCode PetscSectionLoad_HDF5_SingleField(PetscSection s, PetscVie PetscCall(ISCreate(comm, &coffIS)); PetscCall(PetscObjectSetName((PetscObject)coffIS, "atlasOff")); PetscCall(PetscViewerHDF5ReadSizes(viewer, "atlasOff", NULL, &N)); - #if defined(PETSC_USE_DEBUG) +#if defined(PETSC_USE_DEBUG) PetscCheck(N1 == N, comm, PETSC_ERR_ARG_SIZ, "Unable to load s->bc->atlasOff: sum of local sizes (%" PetscInt_FMT ") != global size (%" PetscInt_FMT "): local size on this process is %" PetscInt_FMT, N1, N, n); - #endif +#endif PetscCall(ISGetLayout(coffIS, &map)); PetscCall(PetscLayoutSetSize(map, N)); PetscCall(PetscLayoutSetLocalSize(map, n)); @@ -256,10 +255,10 @@ static PetscErrorCode PetscSectionLoad_HDF5_SingleField(PetscSection s, PetscVie PetscCall(PetscViewerHDF5ReadSizes(viewer, "bcIndices", NULL, &M)); if (!s->bc) m = 0; else PetscCall(PetscSectionGetStorageSize(s->bc, &m)); - #if defined(PETSC_USE_DEBUG) +#if defined(PETSC_USE_DEBUG) PetscCallMPI(MPIU_Allreduce(&m, &M1, 1, MPIU_INT, MPI_SUM, comm)); PetscCheck(M1 == M, comm, PETSC_ERR_ARG_SIZ, "Unable to load s->bcIndices: sum of local sizes (%" PetscInt_FMT ") != global size (%" PetscInt_FMT "): local size on this process is %" PetscInt_FMT, M1, M, m); - #endif +#endif PetscCall(ISGetLayout(cindIS, &map)); PetscCall(PetscLayoutSetSize(map, M)); PetscCall(PetscLayoutSetLocalSize(map, m)); @@ -320,5 +319,3 @@ PetscErrorCode PetscSectionLoad_HDF5_Internal(PetscSection s, PetscViewer viewer PetscCall(PetscViewerHDF5PopGroup(viewer)); PetscFunctionReturn(PETSC_SUCCESS); } - -#endif diff --git a/src/vec/is/utils/ftn-custom/zhdf5io.c b/src/vec/is/utils/ftn-custom/zhdf5io.c deleted file mode 100644 index 1c90520d359..00000000000 --- a/src/vec/is/utils/ftn-custom/zhdf5io.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include - -#if defined(PETSC_HAVE_HDF5) - - #if defined(PETSC_HAVE_FORTRAN_CAPS) - #define petscviewerhdf5readsizes_ PETSCVIEWERHDF5READSIZES - #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) - #define petscviewerhdf5readsizes_ petscviewerhdf5readsizes - #endif - -PETSC_EXTERN void petscviewerhdf5readsizes_(PetscViewer *viewer, char *name, PetscInt *bs, PetscInt *N, PetscErrorCode *ierr, PETSC_FORTRAN_CHARLEN_T len) -{ - char *c1; - - FIXCHAR(name, len, c1); - *ierr = PetscViewerHDF5ReadSizes(*viewer, c1, bs, N); - FREECHAR(name, c1); -} - -#endif /* defined(PETSC_HAVE_HDF5) */ diff --git a/src/vec/is/utils/hdf5/hdf5io.c b/src/vec/is/utils/hdf5/hdf5io.c index 463adae93e7..3fc0195850b 100644 --- a/src/vec/is/utils/hdf5/hdf5io.c +++ b/src/vec/is/utils/hdf5/hdf5io.c @@ -1,8 +1,5 @@ #include -#include /*I "petsclayoutdf5.h" I*/ -#include /*I "petscis.h" I*/ - -#if defined(PETSC_HAVE_HDF5) +#include /*I "petsclayouthdf5.h" I*/ struct _n_HDF5ReadCtx { const char *name; @@ -94,11 +91,11 @@ static PetscErrorCode PetscViewerHDF5ReadSizes_Private(PetscViewer viewer, HDF5R const PetscInt *range; MPI_Comm comm; - #if defined(PETSC_USE_64BIT_INDICES) +#if defined(PETSC_USE_64BIT_INDICES) inttype = H5T_NATIVE_LLONG; - #else +#else inttype = H5T_NATIVE_INT; - #endif +#endif PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); PetscCall(PetscLayoutCreate(PetscObjectComm((PetscObject)viewer), &cmap)); cmap->bs = 3; @@ -250,14 +247,14 @@ static PetscErrorCode PetscViewerHDF5Load_Internal(PetscViewer viewer, const cha PetscCall(PetscViewerHDF5HasDataset(viewer, name, &has)); PetscCheck(has, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Object (dataset) \"%s\" not stored in group %s", name, group); PetscCall(PetscViewerHDF5ReadInitialize_Private(viewer, name, &h)); - #if defined(PETSC_USE_COMPLEX) +#if defined(PETSC_USE_COMPLEX) if (!h->complexVal) { H5T_class_t clazz = H5Tget_class(datatype); PetscCheck(clazz != H5T_FLOAT, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Dataset %s/%s is marked as real but PETSc is configured for complex scalars. The conversion is not yet implemented. Configure with --with-scalar-type=real to read this dataset", group ? group : "", name); } - #else +#else PetscCheck(!h->complexVal, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Dataset %s/%s is marked as complex but PETSc is configured for real scalars. Configure with --with-scalar-type=complex to read this dataset", group, name); - #endif +#endif PetscCall(PetscViewerHDF5ReadSizes_Private(viewer, h, uncompress, PETSC_TRUE, &map)); PetscCall(PetscViewerHDF5ReadSelectHyperslab_Private(viewer, h, map, &memspace)); @@ -333,7 +330,7 @@ PetscErrorCode PetscViewerHDF5Load(PetscViewer viewer, const char name[], PetscL PetscFunctionReturn(PETSC_SUCCESS); } -/*@C +/*@ PetscViewerHDF5ReadSizes - Read block size and global size of a `Vec` or `IS` stored in an HDF5 file. Input Parameters: @@ -369,5 +366,3 @@ PetscErrorCode PetscViewerHDF5ReadSizes(PetscViewer viewer, const char name[], P PetscCall(PetscLayoutDestroy(&map)); PetscFunctionReturn(PETSC_SUCCESS); } - -#endif /* defined(PETSC_HAVE_HDF5) */ diff --git a/src/vec/is/utils/hdf5/makefile b/src/vec/is/utils/hdf5/makefile index 857989296ae..f1bdc354030 100644 --- a/src/vec/is/utils/hdf5/makefile +++ b/src/vec/is/utils/hdf5/makefile @@ -1,4 +1,5 @@ -include ../../../../../petscdir.mk +#requiresdefine 'PETSC_HAVE_HDF5' MANSEC = Sys SUBMANSEC = Viewer From c3034d77915de22bc594e3395799a7fd5c39e0a7 Mon Sep 17 00:00:00 2001 From: "Jose E. Roman" Date: Sat, 19 Oct 2024 18:36:29 +0200 Subject: [PATCH 52/59] MATBAIJ: merge two files to avoid generating an object file with no symbols --- src/mat/impls/baij/seq/baijfact81.c | 417 ---------------------------- src/mat/impls/baij/seq/baijfact9.c | 411 +++++++++++++++++++++++++++ 2 files changed, 411 insertions(+), 417 deletions(-) delete mode 100644 src/mat/impls/baij/seq/baijfact81.c diff --git a/src/mat/impls/baij/seq/baijfact81.c b/src/mat/impls/baij/seq/baijfact81.c deleted file mode 100644 index 8a620c8752f..00000000000 --- a/src/mat/impls/baij/seq/baijfact81.c +++ /dev/null @@ -1,417 +0,0 @@ -/* - Factorization code for BAIJ format. - */ -#include <../src/mat/impls/baij/seq/baij.h> -#include -#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) - #include -#endif -/* - Version for when blocks are 9 by 9 - */ -#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) -PetscErrorCode MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) -{ - Mat C = B; - Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; - PetscInt i, j, k, nz, nzL, row; - const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; - const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag, *pj, bs2 = a->bs2; - MatScalar *rtmp, *pc, *mwork, *v, *pv, *aa = a->a; - PetscInt flg; - PetscReal shift = info->shiftamount; - PetscBool allowzeropivot, zeropivotdetected; - - PetscFunctionBegin; - allowzeropivot = PetscNot(A->erroriffailure); - - /* generate work space needed by the factorization */ - PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork)); - PetscCall(PetscArrayzero(rtmp, bs2 * n)); - - for (i = 0; i < n; i++) { - /* zero rtmp */ - /* L part */ - nz = bi[i + 1] - bi[i]; - bjtmp = bj + bi[i]; - for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); - - /* U part */ - nz = bdiag[i] - bdiag[i + 1]; - bjtmp = bj + bdiag[i + 1] + 1; - for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); - - /* load in initial (unfactored row) */ - nz = ai[i + 1] - ai[i]; - ajtmp = aj + ai[i]; - v = aa + bs2 * ai[i]; - for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ajtmp[j], v + bs2 * j, bs2)); - - /* elimination */ - bjtmp = bj + bi[i]; - nzL = bi[i + 1] - bi[i]; - for (k = 0; k < nzL; k++) { - row = bjtmp[k]; - pc = rtmp + bs2 * row; - for (flg = 0, j = 0; j < bs2; j++) { - if (pc[j] != 0.0) { - flg = 1; - break; - } - } - if (flg) { - pv = b->a + bs2 * bdiag[row]; - /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ - PetscCall(PetscKernel_A_gets_A_times_B_9(pc, pv, mwork)); - - pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ - pv = b->a + bs2 * (bdiag[row + 1] + 1); - nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */ - for (j = 0; j < nz; j++) { - /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ - /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ - v = rtmp + bs2 * pj[j]; - PetscCall(PetscKernel_A_gets_A_minus_B_times_C_9(v, pc, pv + 81 * j)); - /* pv incremented in PetscKernel_A_gets_A_minus_B_times_C_9 */ - } - PetscCall(PetscLogFlops(1458 * nz + 1377)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ - } - } - - /* finished row so stick it into b->a */ - /* L part */ - pv = b->a + bs2 * bi[i]; - pj = b->j + bi[i]; - nz = bi[i + 1] - bi[i]; - for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); - - /* Mark diagonal and invert diagonal for simpler triangular solves */ - pv = b->a + bs2 * bdiag[i]; - pj = b->j + bdiag[i]; - PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); - PetscCall(PetscKernel_A_gets_inverse_A_9(pv, shift, allowzeropivot, &zeropivotdetected)); - if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; - - /* U part */ - pv = b->a + bs2 * (bdiag[i + 1] + 1); - pj = b->j + bdiag[i + 1] + 1; - nz = bdiag[i] - bdiag[i + 1] - 1; - for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); - } - PetscCall(PetscFree2(rtmp, mwork)); - - C->ops->solve = MatSolve_SeqBAIJ_9_NaturalOrdering; - C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_N; - C->assembled = PETSC_TRUE; - - PetscCall(PetscLogFlops(1.333333333333 * 9 * 9 * 9 * n)); /* from inverting diagonal blocks */ - PetscFunctionReturn(PETSC_SUCCESS); -} - -PetscErrorCode MatSolve_SeqBAIJ_9_NaturalOrdering(Mat A, Vec bb, Vec xx) -{ - Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; - const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; - PetscInt i, k, n = a->mbs; - PetscInt nz, bs = A->rmap->bs, bs2 = a->bs2; - const MatScalar *aa = a->a, *v; - PetscScalar *x, *s, *t, *ls; - const PetscScalar *b; - __m256d a0, a1, a2, a3, a4, a5, w0, w1, w2, w3, s0, s1, s2, v0, v1, v2, v3; - - PetscFunctionBegin; - PetscCall(VecGetArrayRead(bb, &b)); - PetscCall(VecGetArray(xx, &x)); - t = a->solve_work; - - /* forward solve the lower triangular */ - PetscCall(PetscArraycpy(t, b, bs)); /* copy 1st block of b to t */ - - for (i = 1; i < n; i++) { - v = aa + bs2 * ai[i]; - vi = aj + ai[i]; - nz = ai[i + 1] - ai[i]; - s = t + bs * i; - PetscCall(PetscArraycpy(s, b + bs * i, bs)); /* copy i_th block of b to t */ - - __m256d s0, s1, s2; - s0 = _mm256_loadu_pd(s + 0); - s1 = _mm256_loadu_pd(s + 4); - s2 = _mm256_maskload_pd(s + 8, _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); - - for (k = 0; k < nz; k++) { - w0 = _mm256_set1_pd((t + bs * vi[k])[0]); - a0 = _mm256_loadu_pd(&v[0]); - s0 = _mm256_fnmadd_pd(a0, w0, s0); - a1 = _mm256_loadu_pd(&v[4]); - s1 = _mm256_fnmadd_pd(a1, w0, s1); - a2 = _mm256_loadu_pd(&v[8]); - s2 = _mm256_fnmadd_pd(a2, w0, s2); - - w1 = _mm256_set1_pd((t + bs * vi[k])[1]); - a3 = _mm256_loadu_pd(&v[9]); - s0 = _mm256_fnmadd_pd(a3, w1, s0); - a4 = _mm256_loadu_pd(&v[13]); - s1 = _mm256_fnmadd_pd(a4, w1, s1); - a5 = _mm256_loadu_pd(&v[17]); - s2 = _mm256_fnmadd_pd(a5, w1, s2); - - w2 = _mm256_set1_pd((t + bs * vi[k])[2]); - a0 = _mm256_loadu_pd(&v[18]); - s0 = _mm256_fnmadd_pd(a0, w2, s0); - a1 = _mm256_loadu_pd(&v[22]); - s1 = _mm256_fnmadd_pd(a1, w2, s1); - a2 = _mm256_loadu_pd(&v[26]); - s2 = _mm256_fnmadd_pd(a2, w2, s2); - - w3 = _mm256_set1_pd((t + bs * vi[k])[3]); - a3 = _mm256_loadu_pd(&v[27]); - s0 = _mm256_fnmadd_pd(a3, w3, s0); - a4 = _mm256_loadu_pd(&v[31]); - s1 = _mm256_fnmadd_pd(a4, w3, s1); - a5 = _mm256_loadu_pd(&v[35]); - s2 = _mm256_fnmadd_pd(a5, w3, s2); - - w0 = _mm256_set1_pd((t + bs * vi[k])[4]); - a0 = _mm256_loadu_pd(&v[36]); - s0 = _mm256_fnmadd_pd(a0, w0, s0); - a1 = _mm256_loadu_pd(&v[40]); - s1 = _mm256_fnmadd_pd(a1, w0, s1); - a2 = _mm256_loadu_pd(&v[44]); - s2 = _mm256_fnmadd_pd(a2, w0, s2); - - w1 = _mm256_set1_pd((t + bs * vi[k])[5]); - a3 = _mm256_loadu_pd(&v[45]); - s0 = _mm256_fnmadd_pd(a3, w1, s0); - a4 = _mm256_loadu_pd(&v[49]); - s1 = _mm256_fnmadd_pd(a4, w1, s1); - a5 = _mm256_loadu_pd(&v[53]); - s2 = _mm256_fnmadd_pd(a5, w1, s2); - - w2 = _mm256_set1_pd((t + bs * vi[k])[6]); - a0 = _mm256_loadu_pd(&v[54]); - s0 = _mm256_fnmadd_pd(a0, w2, s0); - a1 = _mm256_loadu_pd(&v[58]); - s1 = _mm256_fnmadd_pd(a1, w2, s1); - a2 = _mm256_loadu_pd(&v[62]); - s2 = _mm256_fnmadd_pd(a2, w2, s2); - - w3 = _mm256_set1_pd((t + bs * vi[k])[7]); - a3 = _mm256_loadu_pd(&v[63]); - s0 = _mm256_fnmadd_pd(a3, w3, s0); - a4 = _mm256_loadu_pd(&v[67]); - s1 = _mm256_fnmadd_pd(a4, w3, s1); - a5 = _mm256_loadu_pd(&v[71]); - s2 = _mm256_fnmadd_pd(a5, w3, s2); - - w0 = _mm256_set1_pd((t + bs * vi[k])[8]); - a0 = _mm256_loadu_pd(&v[72]); - s0 = _mm256_fnmadd_pd(a0, w0, s0); - a1 = _mm256_loadu_pd(&v[76]); - s1 = _mm256_fnmadd_pd(a1, w0, s1); - a2 = _mm256_maskload_pd(v + 80, _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); - s2 = _mm256_fnmadd_pd(a2, w0, s2); - v += bs2; - } - _mm256_storeu_pd(&s[0], s0); - _mm256_storeu_pd(&s[4], s1); - _mm256_maskstore_pd(&s[8], _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63), s2); - } - - /* backward solve the upper triangular */ - ls = a->solve_work + A->cmap->n; - for (i = n - 1; i >= 0; i--) { - v = aa + bs2 * (adiag[i + 1] + 1); - vi = aj + adiag[i + 1] + 1; - nz = adiag[i] - adiag[i + 1] - 1; - PetscCall(PetscArraycpy(ls, t + i * bs, bs)); - - s0 = _mm256_loadu_pd(ls + 0); - s1 = _mm256_loadu_pd(ls + 4); - s2 = _mm256_maskload_pd(ls + 8, _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); - - for (k = 0; k < nz; k++) { - w0 = _mm256_set1_pd((t + bs * vi[k])[0]); - a0 = _mm256_loadu_pd(&v[0]); - s0 = _mm256_fnmadd_pd(a0, w0, s0); - a1 = _mm256_loadu_pd(&v[4]); - s1 = _mm256_fnmadd_pd(a1, w0, s1); - a2 = _mm256_loadu_pd(&v[8]); - s2 = _mm256_fnmadd_pd(a2, w0, s2); - - /* v += 9; */ - w1 = _mm256_set1_pd((t + bs * vi[k])[1]); - a3 = _mm256_loadu_pd(&v[9]); - s0 = _mm256_fnmadd_pd(a3, w1, s0); - a4 = _mm256_loadu_pd(&v[13]); - s1 = _mm256_fnmadd_pd(a4, w1, s1); - a5 = _mm256_loadu_pd(&v[17]); - s2 = _mm256_fnmadd_pd(a5, w1, s2); - - /* v += 9; */ - w2 = _mm256_set1_pd((t + bs * vi[k])[2]); - a0 = _mm256_loadu_pd(&v[18]); - s0 = _mm256_fnmadd_pd(a0, w2, s0); - a1 = _mm256_loadu_pd(&v[22]); - s1 = _mm256_fnmadd_pd(a1, w2, s1); - a2 = _mm256_loadu_pd(&v[26]); - s2 = _mm256_fnmadd_pd(a2, w2, s2); - - /* v += 9; */ - w3 = _mm256_set1_pd((t + bs * vi[k])[3]); - a3 = _mm256_loadu_pd(&v[27]); - s0 = _mm256_fnmadd_pd(a3, w3, s0); - a4 = _mm256_loadu_pd(&v[31]); - s1 = _mm256_fnmadd_pd(a4, w3, s1); - a5 = _mm256_loadu_pd(&v[35]); - s2 = _mm256_fnmadd_pd(a5, w3, s2); - - /* v += 9; */ - w0 = _mm256_set1_pd((t + bs * vi[k])[4]); - a0 = _mm256_loadu_pd(&v[36]); - s0 = _mm256_fnmadd_pd(a0, w0, s0); - a1 = _mm256_loadu_pd(&v[40]); - s1 = _mm256_fnmadd_pd(a1, w0, s1); - a2 = _mm256_loadu_pd(&v[44]); - s2 = _mm256_fnmadd_pd(a2, w0, s2); - - /* v += 9; */ - w1 = _mm256_set1_pd((t + bs * vi[k])[5]); - a3 = _mm256_loadu_pd(&v[45]); - s0 = _mm256_fnmadd_pd(a3, w1, s0); - a4 = _mm256_loadu_pd(&v[49]); - s1 = _mm256_fnmadd_pd(a4, w1, s1); - a5 = _mm256_loadu_pd(&v[53]); - s2 = _mm256_fnmadd_pd(a5, w1, s2); - - /* v += 9; */ - w2 = _mm256_set1_pd((t + bs * vi[k])[6]); - a0 = _mm256_loadu_pd(&v[54]); - s0 = _mm256_fnmadd_pd(a0, w2, s0); - a1 = _mm256_loadu_pd(&v[58]); - s1 = _mm256_fnmadd_pd(a1, w2, s1); - a2 = _mm256_loadu_pd(&v[62]); - s2 = _mm256_fnmadd_pd(a2, w2, s2); - - /* v += 9; */ - w3 = _mm256_set1_pd((t + bs * vi[k])[7]); - a3 = _mm256_loadu_pd(&v[63]); - s0 = _mm256_fnmadd_pd(a3, w3, s0); - a4 = _mm256_loadu_pd(&v[67]); - s1 = _mm256_fnmadd_pd(a4, w3, s1); - a5 = _mm256_loadu_pd(&v[71]); - s2 = _mm256_fnmadd_pd(a5, w3, s2); - - /* v += 9; */ - w0 = _mm256_set1_pd((t + bs * vi[k])[8]); - a0 = _mm256_loadu_pd(&v[72]); - s0 = _mm256_fnmadd_pd(a0, w0, s0); - a1 = _mm256_loadu_pd(&v[76]); - s1 = _mm256_fnmadd_pd(a1, w0, s1); - a2 = _mm256_maskload_pd(v + 80, _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); - s2 = _mm256_fnmadd_pd(a2, w0, s2); - v += bs2; - } - - _mm256_storeu_pd(&ls[0], s0); - _mm256_storeu_pd(&ls[4], s1); - _mm256_maskstore_pd(&ls[8], _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63), s2); - - w0 = _mm256_setzero_pd(); - w1 = _mm256_setzero_pd(); - w2 = _mm256_setzero_pd(); - - /* first row */ - v0 = _mm256_set1_pd(ls[0]); - a0 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[0]); - w0 = _mm256_fmadd_pd(a0, v0, w0); - a1 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[4]); - w1 = _mm256_fmadd_pd(a1, v0, w1); - a2 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[8]); - w2 = _mm256_fmadd_pd(a2, v0, w2); - - /* second row */ - v1 = _mm256_set1_pd(ls[1]); - a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[9]); - w0 = _mm256_fmadd_pd(a3, v1, w0); - a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[13]); - w1 = _mm256_fmadd_pd(a4, v1, w1); - a5 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[17]); - w2 = _mm256_fmadd_pd(a5, v1, w2); - - /* third row */ - v2 = _mm256_set1_pd(ls[2]); - a0 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[18]); - w0 = _mm256_fmadd_pd(a0, v2, w0); - a1 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[22]); - w1 = _mm256_fmadd_pd(a1, v2, w1); - a2 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[26]); - w2 = _mm256_fmadd_pd(a2, v2, w2); - - /* fourth row */ - v3 = _mm256_set1_pd(ls[3]); - a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[27]); - w0 = _mm256_fmadd_pd(a3, v3, w0); - a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[31]); - w1 = _mm256_fmadd_pd(a4, v3, w1); - a5 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[35]); - w2 = _mm256_fmadd_pd(a5, v3, w2); - - /* fifth row */ - v0 = _mm256_set1_pd(ls[4]); - a0 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[36]); - w0 = _mm256_fmadd_pd(a0, v0, w0); - a1 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[40]); - w1 = _mm256_fmadd_pd(a1, v0, w1); - a2 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[44]); - w2 = _mm256_fmadd_pd(a2, v0, w2); - - /* sixth row */ - v1 = _mm256_set1_pd(ls[5]); - a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[45]); - w0 = _mm256_fmadd_pd(a3, v1, w0); - a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[49]); - w1 = _mm256_fmadd_pd(a4, v1, w1); - a5 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[53]); - w2 = _mm256_fmadd_pd(a5, v1, w2); - - /* seventh row */ - v2 = _mm256_set1_pd(ls[6]); - a0 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[54]); - w0 = _mm256_fmadd_pd(a0, v2, w0); - a1 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[58]); - w1 = _mm256_fmadd_pd(a1, v2, w1); - a2 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[62]); - w2 = _mm256_fmadd_pd(a2, v2, w2); - - /* eighth row */ - v3 = _mm256_set1_pd(ls[7]); - a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[63]); - w0 = _mm256_fmadd_pd(a3, v3, w0); - a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[67]); - w1 = _mm256_fmadd_pd(a4, v3, w1); - a5 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[71]); - w2 = _mm256_fmadd_pd(a5, v3, w2); - - /* ninth row */ - v0 = _mm256_set1_pd(ls[8]); - a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[72]); - w0 = _mm256_fmadd_pd(a3, v0, w0); - a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[76]); - w1 = _mm256_fmadd_pd(a4, v0, w1); - a2 = _mm256_maskload_pd(&(aa + bs2 * adiag[i])[80], _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); - w2 = _mm256_fmadd_pd(a2, v0, w2); - - _mm256_storeu_pd(&(t + i * bs)[0], w0); - _mm256_storeu_pd(&(t + i * bs)[4], w1); - _mm256_maskstore_pd(&(t + i * bs)[8], _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63), w2); - - PetscCall(PetscArraycpy(x + i * bs, t + i * bs, bs)); - } - - PetscCall(VecRestoreArrayRead(bb, &b)); - PetscCall(VecRestoreArray(xx, &x)); - PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n)); - PetscFunctionReturn(PETSC_SUCCESS); -} -#endif diff --git a/src/mat/impls/baij/seq/baijfact9.c b/src/mat/impls/baij/seq/baijfact9.c index 9b944c84095..66dde5c5982 100644 --- a/src/mat/impls/baij/seq/baijfact9.c +++ b/src/mat/impls/baij/seq/baijfact9.c @@ -764,3 +764,414 @@ PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat B, Mat A, const PetscCall(PetscLogFlops(1.333333333333 * 5 * 5 * 5 * n)); /* from inverting diagonal blocks */ PetscFunctionReturn(PETSC_SUCCESS); } + +/* + Version for when blocks are 9 by 9 + */ +#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) + #include +PetscErrorCode MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) +{ + Mat C = B; + Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; + PetscInt i, j, k, nz, nzL, row; + const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; + const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag, *pj, bs2 = a->bs2; + MatScalar *rtmp, *pc, *mwork, *v, *pv, *aa = a->a; + PetscInt flg; + PetscReal shift = info->shiftamount; + PetscBool allowzeropivot, zeropivotdetected; + + PetscFunctionBegin; + allowzeropivot = PetscNot(A->erroriffailure); + + /* generate work space needed by the factorization */ + PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork)); + PetscCall(PetscArrayzero(rtmp, bs2 * n)); + + for (i = 0; i < n; i++) { + /* zero rtmp */ + /* L part */ + nz = bi[i + 1] - bi[i]; + bjtmp = bj + bi[i]; + for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); + + /* U part */ + nz = bdiag[i] - bdiag[i + 1]; + bjtmp = bj + bdiag[i + 1] + 1; + for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); + + /* load in initial (unfactored row) */ + nz = ai[i + 1] - ai[i]; + ajtmp = aj + ai[i]; + v = aa + bs2 * ai[i]; + for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ajtmp[j], v + bs2 * j, bs2)); + + /* elimination */ + bjtmp = bj + bi[i]; + nzL = bi[i + 1] - bi[i]; + for (k = 0; k < nzL; k++) { + row = bjtmp[k]; + pc = rtmp + bs2 * row; + for (flg = 0, j = 0; j < bs2; j++) { + if (pc[j] != 0.0) { + flg = 1; + break; + } + } + if (flg) { + pv = b->a + bs2 * bdiag[row]; + /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ + PetscCall(PetscKernel_A_gets_A_times_B_9(pc, pv, mwork)); + + pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ + pv = b->a + bs2 * (bdiag[row + 1] + 1); + nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */ + for (j = 0; j < nz; j++) { + /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ + /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ + v = rtmp + bs2 * pj[j]; + PetscCall(PetscKernel_A_gets_A_minus_B_times_C_9(v, pc, pv + 81 * j)); + /* pv incremented in PetscKernel_A_gets_A_minus_B_times_C_9 */ + } + PetscCall(PetscLogFlops(1458 * nz + 1377)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ + } + } + + /* finished row so stick it into b->a */ + /* L part */ + pv = b->a + bs2 * bi[i]; + pj = b->j + bi[i]; + nz = bi[i + 1] - bi[i]; + for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); + + /* Mark diagonal and invert diagonal for simpler triangular solves */ + pv = b->a + bs2 * bdiag[i]; + pj = b->j + bdiag[i]; + PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); + PetscCall(PetscKernel_A_gets_inverse_A_9(pv, shift, allowzeropivot, &zeropivotdetected)); + if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; + + /* U part */ + pv = b->a + bs2 * (bdiag[i + 1] + 1); + pj = b->j + bdiag[i + 1] + 1; + nz = bdiag[i] - bdiag[i + 1] - 1; + for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); + } + PetscCall(PetscFree2(rtmp, mwork)); + + C->ops->solve = MatSolve_SeqBAIJ_9_NaturalOrdering; + C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_N; + C->assembled = PETSC_TRUE; + + PetscCall(PetscLogFlops(1.333333333333 * 9 * 9 * 9 * n)); /* from inverting diagonal blocks */ + PetscFunctionReturn(PETSC_SUCCESS); +} + +PetscErrorCode MatSolve_SeqBAIJ_9_NaturalOrdering(Mat A, Vec bb, Vec xx) +{ + Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; + const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; + PetscInt i, k, n = a->mbs; + PetscInt nz, bs = A->rmap->bs, bs2 = a->bs2; + const MatScalar *aa = a->a, *v; + PetscScalar *x, *s, *t, *ls; + const PetscScalar *b; + __m256d a0, a1, a2, a3, a4, a5, w0, w1, w2, w3, s0, s1, s2, v0, v1, v2, v3; + + PetscFunctionBegin; + PetscCall(VecGetArrayRead(bb, &b)); + PetscCall(VecGetArray(xx, &x)); + t = a->solve_work; + + /* forward solve the lower triangular */ + PetscCall(PetscArraycpy(t, b, bs)); /* copy 1st block of b to t */ + + for (i = 1; i < n; i++) { + v = aa + bs2 * ai[i]; + vi = aj + ai[i]; + nz = ai[i + 1] - ai[i]; + s = t + bs * i; + PetscCall(PetscArraycpy(s, b + bs * i, bs)); /* copy i_th block of b to t */ + + __m256d s0, s1, s2; + s0 = _mm256_loadu_pd(s + 0); + s1 = _mm256_loadu_pd(s + 4); + s2 = _mm256_maskload_pd(s + 8, _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); + + for (k = 0; k < nz; k++) { + w0 = _mm256_set1_pd((t + bs * vi[k])[0]); + a0 = _mm256_loadu_pd(&v[0]); + s0 = _mm256_fnmadd_pd(a0, w0, s0); + a1 = _mm256_loadu_pd(&v[4]); + s1 = _mm256_fnmadd_pd(a1, w0, s1); + a2 = _mm256_loadu_pd(&v[8]); + s2 = _mm256_fnmadd_pd(a2, w0, s2); + + w1 = _mm256_set1_pd((t + bs * vi[k])[1]); + a3 = _mm256_loadu_pd(&v[9]); + s0 = _mm256_fnmadd_pd(a3, w1, s0); + a4 = _mm256_loadu_pd(&v[13]); + s1 = _mm256_fnmadd_pd(a4, w1, s1); + a5 = _mm256_loadu_pd(&v[17]); + s2 = _mm256_fnmadd_pd(a5, w1, s2); + + w2 = _mm256_set1_pd((t + bs * vi[k])[2]); + a0 = _mm256_loadu_pd(&v[18]); + s0 = _mm256_fnmadd_pd(a0, w2, s0); + a1 = _mm256_loadu_pd(&v[22]); + s1 = _mm256_fnmadd_pd(a1, w2, s1); + a2 = _mm256_loadu_pd(&v[26]); + s2 = _mm256_fnmadd_pd(a2, w2, s2); + + w3 = _mm256_set1_pd((t + bs * vi[k])[3]); + a3 = _mm256_loadu_pd(&v[27]); + s0 = _mm256_fnmadd_pd(a3, w3, s0); + a4 = _mm256_loadu_pd(&v[31]); + s1 = _mm256_fnmadd_pd(a4, w3, s1); + a5 = _mm256_loadu_pd(&v[35]); + s2 = _mm256_fnmadd_pd(a5, w3, s2); + + w0 = _mm256_set1_pd((t + bs * vi[k])[4]); + a0 = _mm256_loadu_pd(&v[36]); + s0 = _mm256_fnmadd_pd(a0, w0, s0); + a1 = _mm256_loadu_pd(&v[40]); + s1 = _mm256_fnmadd_pd(a1, w0, s1); + a2 = _mm256_loadu_pd(&v[44]); + s2 = _mm256_fnmadd_pd(a2, w0, s2); + + w1 = _mm256_set1_pd((t + bs * vi[k])[5]); + a3 = _mm256_loadu_pd(&v[45]); + s0 = _mm256_fnmadd_pd(a3, w1, s0); + a4 = _mm256_loadu_pd(&v[49]); + s1 = _mm256_fnmadd_pd(a4, w1, s1); + a5 = _mm256_loadu_pd(&v[53]); + s2 = _mm256_fnmadd_pd(a5, w1, s2); + + w2 = _mm256_set1_pd((t + bs * vi[k])[6]); + a0 = _mm256_loadu_pd(&v[54]); + s0 = _mm256_fnmadd_pd(a0, w2, s0); + a1 = _mm256_loadu_pd(&v[58]); + s1 = _mm256_fnmadd_pd(a1, w2, s1); + a2 = _mm256_loadu_pd(&v[62]); + s2 = _mm256_fnmadd_pd(a2, w2, s2); + + w3 = _mm256_set1_pd((t + bs * vi[k])[7]); + a3 = _mm256_loadu_pd(&v[63]); + s0 = _mm256_fnmadd_pd(a3, w3, s0); + a4 = _mm256_loadu_pd(&v[67]); + s1 = _mm256_fnmadd_pd(a4, w3, s1); + a5 = _mm256_loadu_pd(&v[71]); + s2 = _mm256_fnmadd_pd(a5, w3, s2); + + w0 = _mm256_set1_pd((t + bs * vi[k])[8]); + a0 = _mm256_loadu_pd(&v[72]); + s0 = _mm256_fnmadd_pd(a0, w0, s0); + a1 = _mm256_loadu_pd(&v[76]); + s1 = _mm256_fnmadd_pd(a1, w0, s1); + a2 = _mm256_maskload_pd(v + 80, _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); + s2 = _mm256_fnmadd_pd(a2, w0, s2); + v += bs2; + } + _mm256_storeu_pd(&s[0], s0); + _mm256_storeu_pd(&s[4], s1); + _mm256_maskstore_pd(&s[8], _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63), s2); + } + + /* backward solve the upper triangular */ + ls = a->solve_work + A->cmap->n; + for (i = n - 1; i >= 0; i--) { + v = aa + bs2 * (adiag[i + 1] + 1); + vi = aj + adiag[i + 1] + 1; + nz = adiag[i] - adiag[i + 1] - 1; + PetscCall(PetscArraycpy(ls, t + i * bs, bs)); + + s0 = _mm256_loadu_pd(ls + 0); + s1 = _mm256_loadu_pd(ls + 4); + s2 = _mm256_maskload_pd(ls + 8, _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); + + for (k = 0; k < nz; k++) { + w0 = _mm256_set1_pd((t + bs * vi[k])[0]); + a0 = _mm256_loadu_pd(&v[0]); + s0 = _mm256_fnmadd_pd(a0, w0, s0); + a1 = _mm256_loadu_pd(&v[4]); + s1 = _mm256_fnmadd_pd(a1, w0, s1); + a2 = _mm256_loadu_pd(&v[8]); + s2 = _mm256_fnmadd_pd(a2, w0, s2); + + /* v += 9; */ + w1 = _mm256_set1_pd((t + bs * vi[k])[1]); + a3 = _mm256_loadu_pd(&v[9]); + s0 = _mm256_fnmadd_pd(a3, w1, s0); + a4 = _mm256_loadu_pd(&v[13]); + s1 = _mm256_fnmadd_pd(a4, w1, s1); + a5 = _mm256_loadu_pd(&v[17]); + s2 = _mm256_fnmadd_pd(a5, w1, s2); + + /* v += 9; */ + w2 = _mm256_set1_pd((t + bs * vi[k])[2]); + a0 = _mm256_loadu_pd(&v[18]); + s0 = _mm256_fnmadd_pd(a0, w2, s0); + a1 = _mm256_loadu_pd(&v[22]); + s1 = _mm256_fnmadd_pd(a1, w2, s1); + a2 = _mm256_loadu_pd(&v[26]); + s2 = _mm256_fnmadd_pd(a2, w2, s2); + + /* v += 9; */ + w3 = _mm256_set1_pd((t + bs * vi[k])[3]); + a3 = _mm256_loadu_pd(&v[27]); + s0 = _mm256_fnmadd_pd(a3, w3, s0); + a4 = _mm256_loadu_pd(&v[31]); + s1 = _mm256_fnmadd_pd(a4, w3, s1); + a5 = _mm256_loadu_pd(&v[35]); + s2 = _mm256_fnmadd_pd(a5, w3, s2); + + /* v += 9; */ + w0 = _mm256_set1_pd((t + bs * vi[k])[4]); + a0 = _mm256_loadu_pd(&v[36]); + s0 = _mm256_fnmadd_pd(a0, w0, s0); + a1 = _mm256_loadu_pd(&v[40]); + s1 = _mm256_fnmadd_pd(a1, w0, s1); + a2 = _mm256_loadu_pd(&v[44]); + s2 = _mm256_fnmadd_pd(a2, w0, s2); + + /* v += 9; */ + w1 = _mm256_set1_pd((t + bs * vi[k])[5]); + a3 = _mm256_loadu_pd(&v[45]); + s0 = _mm256_fnmadd_pd(a3, w1, s0); + a4 = _mm256_loadu_pd(&v[49]); + s1 = _mm256_fnmadd_pd(a4, w1, s1); + a5 = _mm256_loadu_pd(&v[53]); + s2 = _mm256_fnmadd_pd(a5, w1, s2); + + /* v += 9; */ + w2 = _mm256_set1_pd((t + bs * vi[k])[6]); + a0 = _mm256_loadu_pd(&v[54]); + s0 = _mm256_fnmadd_pd(a0, w2, s0); + a1 = _mm256_loadu_pd(&v[58]); + s1 = _mm256_fnmadd_pd(a1, w2, s1); + a2 = _mm256_loadu_pd(&v[62]); + s2 = _mm256_fnmadd_pd(a2, w2, s2); + + /* v += 9; */ + w3 = _mm256_set1_pd((t + bs * vi[k])[7]); + a3 = _mm256_loadu_pd(&v[63]); + s0 = _mm256_fnmadd_pd(a3, w3, s0); + a4 = _mm256_loadu_pd(&v[67]); + s1 = _mm256_fnmadd_pd(a4, w3, s1); + a5 = _mm256_loadu_pd(&v[71]); + s2 = _mm256_fnmadd_pd(a5, w3, s2); + + /* v += 9; */ + w0 = _mm256_set1_pd((t + bs * vi[k])[8]); + a0 = _mm256_loadu_pd(&v[72]); + s0 = _mm256_fnmadd_pd(a0, w0, s0); + a1 = _mm256_loadu_pd(&v[76]); + s1 = _mm256_fnmadd_pd(a1, w0, s1); + a2 = _mm256_maskload_pd(v + 80, _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); + s2 = _mm256_fnmadd_pd(a2, w0, s2); + v += bs2; + } + + _mm256_storeu_pd(&ls[0], s0); + _mm256_storeu_pd(&ls[4], s1); + _mm256_maskstore_pd(&ls[8], _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63), s2); + + w0 = _mm256_setzero_pd(); + w1 = _mm256_setzero_pd(); + w2 = _mm256_setzero_pd(); + + /* first row */ + v0 = _mm256_set1_pd(ls[0]); + a0 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[0]); + w0 = _mm256_fmadd_pd(a0, v0, w0); + a1 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[4]); + w1 = _mm256_fmadd_pd(a1, v0, w1); + a2 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[8]); + w2 = _mm256_fmadd_pd(a2, v0, w2); + + /* second row */ + v1 = _mm256_set1_pd(ls[1]); + a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[9]); + w0 = _mm256_fmadd_pd(a3, v1, w0); + a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[13]); + w1 = _mm256_fmadd_pd(a4, v1, w1); + a5 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[17]); + w2 = _mm256_fmadd_pd(a5, v1, w2); + + /* third row */ + v2 = _mm256_set1_pd(ls[2]); + a0 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[18]); + w0 = _mm256_fmadd_pd(a0, v2, w0); + a1 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[22]); + w1 = _mm256_fmadd_pd(a1, v2, w1); + a2 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[26]); + w2 = _mm256_fmadd_pd(a2, v2, w2); + + /* fourth row */ + v3 = _mm256_set1_pd(ls[3]); + a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[27]); + w0 = _mm256_fmadd_pd(a3, v3, w0); + a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[31]); + w1 = _mm256_fmadd_pd(a4, v3, w1); + a5 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[35]); + w2 = _mm256_fmadd_pd(a5, v3, w2); + + /* fifth row */ + v0 = _mm256_set1_pd(ls[4]); + a0 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[36]); + w0 = _mm256_fmadd_pd(a0, v0, w0); + a1 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[40]); + w1 = _mm256_fmadd_pd(a1, v0, w1); + a2 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[44]); + w2 = _mm256_fmadd_pd(a2, v0, w2); + + /* sixth row */ + v1 = _mm256_set1_pd(ls[5]); + a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[45]); + w0 = _mm256_fmadd_pd(a3, v1, w0); + a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[49]); + w1 = _mm256_fmadd_pd(a4, v1, w1); + a5 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[53]); + w2 = _mm256_fmadd_pd(a5, v1, w2); + + /* seventh row */ + v2 = _mm256_set1_pd(ls[6]); + a0 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[54]); + w0 = _mm256_fmadd_pd(a0, v2, w0); + a1 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[58]); + w1 = _mm256_fmadd_pd(a1, v2, w1); + a2 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[62]); + w2 = _mm256_fmadd_pd(a2, v2, w2); + + /* eighth row */ + v3 = _mm256_set1_pd(ls[7]); + a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[63]); + w0 = _mm256_fmadd_pd(a3, v3, w0); + a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[67]); + w1 = _mm256_fmadd_pd(a4, v3, w1); + a5 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[71]); + w2 = _mm256_fmadd_pd(a5, v3, w2); + + /* ninth row */ + v0 = _mm256_set1_pd(ls[8]); + a3 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[72]); + w0 = _mm256_fmadd_pd(a3, v0, w0); + a4 = _mm256_loadu_pd(&(aa + bs2 * adiag[i])[76]); + w1 = _mm256_fmadd_pd(a4, v0, w1); + a2 = _mm256_maskload_pd(&(aa + bs2 * adiag[i])[80], _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63)); + w2 = _mm256_fmadd_pd(a2, v0, w2); + + _mm256_storeu_pd(&(t + i * bs)[0], w0); + _mm256_storeu_pd(&(t + i * bs)[4], w1); + _mm256_maskstore_pd(&(t + i * bs)[8], _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL << 63), w2); + + PetscCall(PetscArraycpy(x + i * bs, t + i * bs, bs)); + } + + PetscCall(VecRestoreArrayRead(bb, &b)); + PetscCall(VecRestoreArray(xx, &x)); + PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n)); + PetscFunctionReturn(PETSC_SUCCESS); +} +#endif From e5ab922b98e46d08bff5648dc83519731eee5dc4 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Mon, 21 Oct 2024 06:56:18 +0000 Subject: [PATCH 53/59] DMPLEX: remove flush of VTK viewer --- src/dm/impls/plex/plex.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/dm/impls/plex/plex.c b/src/dm/impls/plex/plex.c index 98c46c0d216..6c45ee6ca83 100644 --- a/src/dm/impls/plex/plex.c +++ b/src/dm/impls/plex/plex.c @@ -578,8 +578,6 @@ PetscErrorCode VecView_Plex(Vec v, PetscViewer viewer) PetscCall(VecView_Plex_Local(locv, viewer)); PetscCall(PetscObjectCompose((PetscObject)locv, "__Vec_bc_zero__", NULL)); PetscCall(DMRestoreLocalVector(dm, &locv)); - /* Call flush for proper logging of VecView timings */ - if (isvtk) PetscCall(PetscViewerFlush(viewer)); } else if (ishdf5) { #if defined(PETSC_HAVE_HDF5) PetscCall(VecView_Plex_HDF5_Internal(v, viewer)); From 4a21bc2239901b4c4ff4e1a41ba4d77a8d4785f8 Mon Sep 17 00:00:00 2001 From: James Wright Date: Wed, 2 Oct 2024 11:18:43 -0600 Subject: [PATCH 54/59] doc: Add info to PetscMalloc and PetscCalloc - Adds docs to address potential overflowing and the general handling of the function parameters Co-authored-by: Barry Smith Co-authored-by: Jose E. Roman --- include/petscsys.h | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/include/petscsys.h b/include/petscsys.h index 7992b80a362..ac05dae8205 100644 --- a/include/petscsys.h +++ b/include/petscsys.h @@ -504,8 +504,8 @@ M*/ Level: beginner Note: - This uses the sizeof() of the memory type requested to determine the total memory to be allocated, therefore you should not - multiply the number of elements requested by the `sizeof()` the type. For example use + This uses `sizeof()` of the memory type requested to determine the total memory to be allocated; therefore, you should not + multiply the number of elements requested by the `sizeof()` the type. For example, use .vb PetscInt *id; PetscMalloc1(10,&id); @@ -516,7 +516,26 @@ M*/ PetscMalloc1(10*sizeof(PetscInt),&id); .ve - Does not zero the memory allocated, use `PetscCalloc1()` to obtain memory that has been zeroed. + Does not zero the memory allocated, use `PetscCalloc1()` to obtain memory that has been zeroed. + + The `PetscMalloc[N]()` and `PetscCalloc[N]()` take an argument of type `size_t`! However, most codes use `value`, computed via `int` or `PetscInt` variables. This can overflow in + 32bit `int` computation - while computation in 64bit `size_t` would not overflow! + It's best if any arithmetic that is done for size computations is done with `size_t` type - avoiding arithmetic overflow! + + `PetscMalloc[N]()` and `PetscCalloc[N]()` attempt to work-around this by casting the first variable to `size_t`. + This works for most expressions, but not all, such as +.vb + PetscInt *id, a, b; + PetscMalloc1(use_a_squared ? a * a * b : a * b, &id); // use_a_squared is cast to size_t, but a and b are still PetscInt + PetscMalloc1(a + b * b, &id); // a is cast to size_t, but b * b is performed at PetscInt precision first due to order-of-operations +.ve + + These expressions should either be avoided, or appropriately cast variables to `size_t`: +.vb + PetscInt *id, a, b; + PetscMalloc1(use_a_squared ? (size_t)a * a * b : (size_t)a * b, &id); // Cast a to size_t before multiplication + PetscMalloc1(b * b + a, &id); // b is automatically cast to size_t and order-of-operations ensures size_t precision is maintained +.ve .seealso: `PetscFree()`, `PetscNew()`, `PetscMalloc()`, `PetscCalloc1()`, `PetscMalloc2()` M*/ From 32603206efff945aaef677fd0dad12e9aa894f1f Mon Sep 17 00:00:00 2001 From: James Wright Date: Wed, 2 Oct 2024 16:25:37 -0600 Subject: [PATCH 55/59] Address potential Malloc/Calloc ternary operator bugs - Bug either by the argument of the ternary operator being cast inappropriately to `size_t` or by the values themselves not being cast to `size_t` when necessary. --- src/dm/field/impls/ds/dmfieldds.c | 4 ++-- src/dm/impls/plex/plexfem.c | 10 +++++----- src/dm/impls/plex/plextree.c | 2 +- src/ksp/pc/impls/hpddm/pchpddm.cxx | 4 ++-- src/mat/impls/aij/mpi/mpiaij.c | 4 ++-- src/mat/impls/aij/seq/inode.c | 2 +- src/ts/utils/dmplexlandau/plexland.c | 2 +- src/vec/vec/impls/mpi/pbvec.c | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/dm/field/impls/ds/dmfieldds.c b/src/dm/field/impls/ds/dmfieldds.c index 19f04a69d2e..8f689614f9f 100644 --- a/src/dm/field/impls/ds/dmfieldds.c +++ b/src/dm/field/impls/ds/dmfieldds.c @@ -269,8 +269,8 @@ static PetscErrorCode DMFieldEvaluate_DS(DMField field, Vec points, PetscDataTyp } PetscCall(PetscMalloc3(gatherSize * dim, &cellPoints, gatherMax * dim, &coordsReal, gatherMax * dimR, &coordsRef)); PetscCall(PetscMalloc4(gatherMax * dimR, &v, gatherMax * dimR * dimR, &J, gatherMax * dimR * dimR, &invJ, gatherMax, &detJ)); - if (datatype == PETSC_SCALAR) PetscCall(PetscMalloc3(B ? nc * gatherSize : 0, &cellBs, D ? nc * dim * gatherSize : 0, &cellDs, H ? nc * dim * dim * gatherSize : 0, &cellHs)); - else PetscCall(PetscMalloc3(B ? nc * gatherSize : 0, &cellBr, D ? nc * dim * gatherSize : 0, &cellDr, H ? nc * dim * dim * gatherSize : 0, &cellHr)); + if (datatype == PETSC_SCALAR) PetscCall(PetscMalloc3((B ? (size_t)nc * gatherSize : 0), &cellBs, (D ? (size_t)nc * dim * gatherSize : 0), &cellDs, (H ? (size_t)nc * dim * dim * gatherSize : 0), &cellHs)); + else PetscCall(PetscMalloc3((B ? (size_t)nc * gatherSize : 0), &cellBr, (D ? (size_t)nc * dim * gatherSize : 0), &cellDr, (H ? (size_t)nc * dim * dim * gatherSize : 0), &cellHr)); PetscCallMPI(MPI_Type_contiguous((PetscMPIInt)dim, MPIU_SCALAR, &pointType)); PetscCallMPI(MPI_Type_commit(&pointType)); diff --git a/src/dm/impls/plex/plexfem.c b/src/dm/impls/plex/plexfem.c index 6a1c43cfcc8..79d4d6b5531 100644 --- a/src/dm/impls/plex/plexfem.c +++ b/src/dm/impls/plex/plexfem.c @@ -2689,7 +2689,7 @@ static PetscErrorCode DMPlexComputeBdIntegral_Internal(DM dm, Vec locX, IS point PetscCall(ISGetLocalSize(pointIS, &numFaces)); PetscCall(ISGetIndices(pointIS, &points)); - PetscCall(PetscCalloc2(numFaces * totDim, &u, locA ? numFaces * totDimAux : 0, &a)); + PetscCall(PetscCalloc2(numFaces * totDim, &u, (locA ? (size_t)numFaces * totDimAux : 0), &a)); PetscCall(DMFieldGetDegree(coordField, pointIS, NULL, &maxDegree)); for (face = 0; face < numFaces; ++face) { const PetscInt point = points[face], *support; @@ -4857,7 +4857,7 @@ static PetscErrorCode DMPlexComputeBdResidual_Single_Internal(DM dm, PetscReal t } PetscCall(ISGetLocalSize(pointIS, &numFaces)); PetscCall(ISGetIndices(pointIS, &points)); - PetscCall(PetscMalloc4(numFaces * totDim, &u, locX_t ? numFaces * totDim : 0, &u_t, numFaces * totDim, &elemVec, locA ? numFaces * totDimAux : 0, &a)); + PetscCall(PetscMalloc4(numFaces * totDim, &u, (locX_t ? (size_t)numFaces * totDim : 0), &u_t, numFaces * totDim, &elemVec, (locA ? (size_t)numFaces * totDimAux : 0), &a)); PetscCall(DMFieldGetDegree(coordField, pointIS, NULL, &maxDegree)); if (maxDegree <= 1) PetscCall(DMFieldCreateDefaultQuadrature(coordField, pointIS, &qGeom)); if (!qGeom) { @@ -5738,7 +5738,7 @@ static PetscErrorCode DMPlexComputeBdJacobian_Single_Internal(DM dm, PetscReal t } PetscCall(ISGetLocalSize(pointIS, &numFaces)); PetscCall(ISGetIndices(pointIS, &points)); - PetscCall(PetscMalloc5(numFaces * totDim, &u, locX_t ? numFaces * totDim : 0, &u_t, hasJac ? numFaces * totDim * totDim : 0, &elemMat, hasPrec ? numFaces * totDim * totDim : 0, &elemMatP, locA ? numFaces * totDimAux : 0, &a)); + PetscCall(PetscMalloc5(numFaces * totDim, &u, (locX_t ? (size_t)numFaces * totDim : 0), &u_t, (hasJac ? (size_t)numFaces * totDim * totDim : 0), &elemMat, (hasPrec ? (size_t)numFaces * totDim * totDim : 0), &elemMatP, (locA ? (size_t)numFaces * totDimAux : 0), &a)); PetscCall(DMFieldGetDegree(coordField, pointIS, NULL, &maxDegree)); if (maxDegree <= 1) PetscCall(DMFieldCreateDefaultQuadrature(coordField, pointIS, &qGeom)); if (!qGeom) { @@ -5940,7 +5940,7 @@ PetscErrorCode DMPlexComputeJacobian_Internal(DM dm, PetscFormKey key, IS cellIS if (hasJac && Jac == JacP) hasPrec = PETSC_FALSE; PetscCall(PetscDSHasDynamicJacobian(prob, &hasDyn)); hasDyn = hasDyn && (X_tShift != 0.0) ? PETSC_TRUE : PETSC_FALSE; - PetscCall(PetscMalloc5(numCells * totDim, &u, X_t ? numCells * totDim : 0, &u_t, hasJac ? numCells * totDim * totDim : 0, &elemMat, hasPrec ? numCells * totDim * totDim : 0, &elemMatP, hasDyn ? numCells * totDim * totDim : 0, &elemMatD)); + PetscCall(PetscMalloc5(numCells * totDim, &u, (X_t ? (size_t)numCells * totDim : 0), &u_t, (hasJac ? (size_t)numCells * totDim * totDim : 0), &elemMat, (hasPrec ? (size_t)numCells * totDim * totDim : 0), &elemMatP, (hasDyn ? (size_t)numCells * totDim * totDim : 0), &elemMatD)); if (dmAux) PetscCall(PetscMalloc1(numCells * totDimAux, &a)); for (c = cStart; c < cEnd; ++c) { const PetscInt cell = cells ? cells[c] : c; @@ -6492,7 +6492,7 @@ PetscErrorCode DMPlexComputeJacobian_Action_Internal(DM dm, PetscFormKey key, IS PetscCall(PetscDSGetTotalDimension(probAux, &totDimAux)); } PetscCall(VecSet(Z, 0.0)); - PetscCall(PetscMalloc6(numCells * totDim, &u, X_t ? numCells * totDim : 0, &u_t, numCells * totDim * totDim, &elemMat, hasDyn ? numCells * totDim * totDim : 0, &elemMatD, numCells * totDim, &y, totDim, &z)); + PetscCall(PetscMalloc6(numCells * totDim, &u, (X_t ? (size_t)numCells * totDim : 0), &u_t, numCells * totDim * totDim, &elemMat, (hasDyn ? (size_t)numCells * totDim * totDim : 0), &elemMatD, numCells * totDim, &y, totDim, &z)); if (dmAux) PetscCall(PetscMalloc1(numCells * totDimAux, &a)); PetscCall(DMGetCoordinateField(dm, &coordField)); for (c = cStart; c < cEnd; ++c) { diff --git a/src/dm/impls/plex/plextree.c b/src/dm/impls/plex/plextree.c index f6dcf1336b8..254ebaedca5 100644 --- a/src/dm/impls/plex/plextree.c +++ b/src/dm/impls/plex/plextree.c @@ -3316,7 +3316,7 @@ static PetscErrorCode DMPlexTransferInjectorTree(DM coarse, DM fine, PetscSF coa PetscCall(PetscMalloc1(numPointsWithDofs, &pointsWithDofs)); PetscCall(PetscSectionSetUp(leafIndicesSec)); PetscCall(PetscSectionGetStorageSize(leafIndicesSec, &numIndices)); - PetscCall(PetscMalloc1(gatheredIndices ? numIndices : (maxDof + 1), &leafInds)); + PetscCall(PetscMalloc1((gatheredIndices ? numIndices : (maxDof + 1)), &leafInds)); if (gatheredValues) PetscCall(PetscMalloc1(numIndices, &leafVals)); for (l = 0, offset = 0; l < nleaves; l++) { p = leaves ? leaves[l] : l; diff --git a/src/ksp/pc/impls/hpddm/pchpddm.cxx b/src/ksp/pc/impls/hpddm/pchpddm.cxx index 659149baf1b..f4ef3dc38d0 100644 --- a/src/ksp/pc/impls/hpddm/pchpddm.cxx +++ b/src/ksp/pc/impls/hpddm/pchpddm.cxx @@ -2084,7 +2084,7 @@ static PetscErrorCode PCSetUp_HPDDM(PC pc) if (!flg) PetscCall(PetscOptionsHasName(nullptr, prefix, "-svd_relative_threshold", &flg)); PetscCall(ISSort(ov[0])); if (!flg) PetscCall(ISSort(ov[1])); - PetscCall(PetscMalloc1(!flg ? 5 : 3, &h->is)); + PetscCall(PetscCalloc1(5, &h->is)); PetscCall(MatCreateSubMatrices(uaux ? uaux : P, 1, ov + !flg, ov + 1, MAT_INITIAL_MATRIX, &a)); /* submatrix from above, either square (!flg) or rectangular (flg) */ for (PetscInt j = 0; j < 2; ++j) { PetscCall(ISGetIndices(ov[j], i + j)); @@ -3089,7 +3089,7 @@ static PetscErrorCode MatDestroy_Harmonic(Mat A) PetscFunctionBegin; PetscCall(MatShellGetContext(A, &h)); - for (PetscInt i = 0; i < (h->A[1] ? 5 : 3); ++i) PetscCall(ISDestroy(h->is + i)); + for (PetscInt i = 0; i < 5; ++i) PetscCall(ISDestroy(h->is + i)); PetscCall(PetscFree(h->is)); PetscCall(VecDestroy(&h->v)); for (PetscInt i = 0; i < 2; ++i) PetscCall(MatDestroy(h->A + i)); diff --git a/src/mat/impls/aij/mpi/mpiaij.c b/src/mat/impls/aij/mpi/mpiaij.c index 65e4f3d0d52..df21bdb30cc 100644 --- a/src/mat/impls/aij/mpi/mpiaij.c +++ b/src/mat/impls/aij/mpi/mpiaij.c @@ -7816,7 +7816,7 @@ PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmet b = d->B; } PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); - PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); + PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); for (c = a, kk = 0; c && kk < 2; c = b, kk++) { PetscInt *nnz = (c == a) ? d_nnz : o_nnz; const PetscInt *cols1, *cols2; @@ -7938,7 +7938,7 @@ PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmet Determine the preallocation needed for the scalar matrix derived from the vector matrix. */ PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); - PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); + PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); if (isseqaij) { PetscInt max_d_nnz; diff --git a/src/mat/impls/aij/seq/inode.c b/src/mat/impls/aij/seq/inode.c index 0936727980f..f49f0d48366 100644 --- a/src/mat/impls/aij/seq/inode.c +++ b/src/mat/impls/aij/seq/inode.c @@ -4521,7 +4521,7 @@ PetscErrorCode MatInodeAdjustForInodes_SeqAIJ_Inode(Mat A, IS *rperm, IS *cperm) if (a->inode.node_count == m) PetscFunctionReturn(PETSC_SUCCESS); /* all inodes are of size 1 */ PetscCall(MatCreateColInode_Private(A, &nslim_col, &ns_col)); - PetscCall(PetscMalloc1(((nslim_row > nslim_col) ? nslim_row : nslim_col) + 1, &tns)); + PetscCall(PetscMalloc1(((nslim_row > nslim_col ? nslim_row : nslim_col) + 1), &tns)); PetscCall(PetscMalloc2(m, &permr, n, &permc)); PetscCall(ISGetIndices(ris, &ridx)); diff --git a/src/ts/utils/dmplexlandau/plexland.c b/src/ts/utils/dmplexlandau/plexland.c index 52229227527..03c98e7a7c9 100644 --- a/src/ts/utils/dmplexlandau/plexland.c +++ b/src/ts/utils/dmplexlandau/plexland.c @@ -241,7 +241,7 @@ static PetscErrorCode LandauFormJacobian_Internal(Vec a_X, Mat JacP, const Petsc starttime = MPI_Wtime(); #endif PetscCall(PetscLogEventBegin(ctx->events[8], 0, 0, 0, 0)); - PetscCall(PetscMalloc4(IPf_sz_tot, &ff, IPf_sz_tot, &dudx, IPf_sz_tot, &dudy, dim == 3 ? IPf_sz_tot : 0, &dudz)); + PetscCall(PetscMalloc4(IPf_sz_tot, &ff, IPf_sz_tot, &dudx, IPf_sz_tot, &dudy, (dim == 3 ? IPf_sz_tot : 0), &dudz)); // F df/dx for (PetscInt tid = 0; tid < ctx->batch_sz * elem_offset[num_grids]; tid++) { // for each element const PetscInt b_Nelem = elem_offset[num_grids], b_elem_idx = tid % b_Nelem, b_id = tid / b_Nelem; // b_id == OMP thd_id in batch diff --git a/src/vec/vec/impls/mpi/pbvec.c b/src/vec/vec/impls/mpi/pbvec.c index 22e786936e6..4f8527826de 100644 --- a/src/vec/vec/impls/mpi/pbvec.c +++ b/src/vec/vec/impls/mpi/pbvec.c @@ -295,7 +295,7 @@ static PetscErrorCode VecAssemblyEnd_MPI_BTS(Vec X) PetscCheck(x->segrecvframe, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing segrecvframe! Probably you forgot to call VecAssemblyBegin() first"); PetscCall(VecGetArray(X, &xarray)); PetscCall(PetscSegBufferExtractInPlace(x->segrecvframe, &frame)); - PetscCall(PetscMalloc2(4 * x->nrecvranks, &some_indices, x->use_status ? 4 * x->nrecvranks : 0, &some_statuses)); + PetscCall(PetscMalloc2(4 * x->nrecvranks, &some_indices, (x->use_status ? (size_t)4 * x->nrecvranks : 0), &some_statuses)); for (r = 0, npending = 0; r < x->nrecvranks; r++) npending += frame[r].pendings + frame[r].pendingb; while (npending > 0) { PetscMPIInt ndone = 0, ii; From 6d8ac330347fe166beb0e98a4ca629a709e384d3 Mon Sep 17 00:00:00 2001 From: James Wright Date: Tue, 22 Oct 2024 09:29:00 -0600 Subject: [PATCH 56/59] doc: Fix testing globbing example --- doc/manual/tests.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/manual/tests.rst b/doc/manual/tests.rst index 89d25e5b5d6..74640c3861d 100644 --- a/doc/manual/tests.rst +++ b/doc/manual/tests.rst @@ -105,9 +105,9 @@ Some examples are: .. code-block:: console - $ make test search='ts%' # Run all TS examples + $ make test search='ts*' # Run all TS examples $ make test searchin='tutorials' # Run all tutorials - $ make test search='ts%' searchin='tutorials' # Run all TS tutorials + $ make test search='ts*' searchin='tutorials' # Run all TS tutorials $ make test argsearch='cuda' # Run examples with cuda in arguments $ make test test-fail='1' $ make test query='requires' queryval='*MPI_PROCESS_SHARED_MEMORY*' From 6afbcc8f81ad2e3196e6026a043ded42f7f55267 Mon Sep 17 00:00:00 2001 From: Junchao Zhang Date: Sat, 26 Oct 2024 23:25:56 +0000 Subject: [PATCH 57/59] Update arch-alcf-polaris.py --- config/examples/arch-alcf-polaris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/examples/arch-alcf-polaris.py b/config/examples/arch-alcf-polaris.py index d2684e85dbd..6a2699dfba1 100755 --- a/config/examples/arch-alcf-polaris.py +++ b/config/examples/arch-alcf-polaris.py @@ -5,7 +5,7 @@ # Note cray-libsci provides BLAS etc. In summary, we have # module use /soft/modulefiles # module unload darshan -# module load cudatoolkit-standalone/12.4.1 PrgEnv-gnu cray-libsci nvhpc-mixed craype-accel-nvidia80 +# module load PrgEnv-gnu cray-libsci nvhpc-mixed craype-accel-nvidia80 cudatoolkit-standalone/12.4.1 # export MPICH_GPU_SUPPORT_ENABLED=1 # export MPICH_GPU_IPC_ENABLED=0 # From a2290cb0f6d8be03a5c7e3e7d47c5f61aebdb148 Mon Sep 17 00:00:00 2001 From: Junchao Zhang Date: Sun, 27 Oct 2024 17:57:26 +0000 Subject: [PATCH 58/59] Unify the name for petsc annual user meetings (PAUM) --- doc/community/meetings/meeting.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/community/meetings/meeting.rst b/doc/community/meetings/meeting.rst index aeb25082a0b..37d1968a504 100644 --- a/doc/community/meetings/meeting.rst +++ b/doc/community/meetings/meeting.rst @@ -1,10 +1,10 @@ .. _meetings: -********************* -Annual PETSc Meetings -********************* +********************************* +PETSc Annual User Meetings (PAUM) +********************************* -The PETSc community hosts an annual PETSc-users conference in order to foster +The PETSc community hosts annual user meetings in order to foster continuous dialogue with our community. These yearly meetings allow us to (among other things): @@ -17,7 +17,7 @@ things): developers and designers of simulation packages that use PETSc. - Receive valuable feedback on current or requested features. -PETSc user meetings rely on two-way communication: PETSc developers provide insights into +PETSc annual user meetings rely on two-way communication: PETSc developers provide insights into the latest developments, whereas PETSc users provide input on both technical and non-technical matters, hence aligning development with user needs. In particular, we encourage you to present work illustrating your own use of PETSc. We also invite you to @@ -28,7 +28,8 @@ simulations by scientists and engineers. Upcoming Meetings ================= -- The 2025 user meeting will take place May 21-22, 2024 in Buffalo, New York, USA. +- The PETSc Annual User Meeting (PAUM) 2025 will take place in Buffalo, New York, USA on May 19-21, 2025. + - `Submit a presentation or poster `__ Previous Meetings ================= From 7823c76e06f7ea1a1b4f691ec013101633e1735b Mon Sep 17 00:00:00 2001 From: Satish Balay Date: Mon, 28 Oct 2024 11:25:12 -0500 Subject: [PATCH 59/59] Increase patchlevel to 3.22.1 --- doc/install/download.rst | 6 +++--- include/petscversion.h | 2 +- src/binding/petsc4py/src/petsc4py/__init__.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/install/download.rst b/doc/install/download.rst index 6df083ce547..ae599d39087 100644 --- a/doc/install/download.rst +++ b/doc/install/download.rst @@ -31,16 +31,16 @@ Alternative: Obtain Release Version with Tarball Tarball which contains only the source. Documentation available `online `__. -- `petsc-3.22.0.tar.gz `__ +- `petsc-3.22.1.tar.gz `__ Tarball which includes all documentation, recommended for offline use. -- `petsc-with-docs-3.22.0.tar.gz `__ +- `petsc-with-docs-3.22.1.tar.gz `__ Tarball to enable a separate installation of petsc4py. -- `petsc4py-3.22.0.tar.gz `__ +- `petsc4py-3.22.1.tar.gz `__ To extract the sources use: diff --git a/include/petscversion.h b/include/petscversion.h index a27ae6e7b5f..a3297273a3d 100644 --- a/include/petscversion.h +++ b/include/petscversion.h @@ -5,7 +5,7 @@ #define PETSC_VERSION_RELEASE 1 #define PETSC_VERSION_MAJOR 3 #define PETSC_VERSION_MINOR 22 -#define PETSC_VERSION_SUBMINOR 0 +#define PETSC_VERSION_SUBMINOR 1 #define PETSC_RELEASE_DATE "Sep 28, 2024" #define PETSC_VERSION_DATE "unknown" diff --git a/src/binding/petsc4py/src/petsc4py/__init__.py b/src/binding/petsc4py/src/petsc4py/__init__.py index 59a251e0cfe..a09febeb05f 100644 --- a/src/binding/petsc4py/src/petsc4py/__init__.py +++ b/src/binding/petsc4py/src/petsc4py/__init__.py @@ -16,7 +16,7 @@ """ __author__ = 'Lisandro Dalcin' -__version__ = '3.22.0' +__version__ = '3.22.1' __credits__ = 'PETSc Team '