Skip to content

Commit

Permalink
Refactor libCEED QFunction source code to accelerate JIT compilation
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastiangrimberg committed Feb 2, 2024
1 parent e7c0456 commit 4a464e8
Show file tree
Hide file tree
Showing 122 changed files with 3,478 additions and 2,738 deletions.
2 changes: 0 additions & 2 deletions palace/fem/integ/curlcurl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
PalacePragmaDiagnosticPush
PalacePragmaDiagnosticDisableUnused

#include "fem/qfunctions/hdiv_build_qf.h"
#include "fem/qfunctions/hdiv_qf.h"
#include "fem/qfunctions/l2_build_qf.h"
#include "fem/qfunctions/l2_qf.h"

PalacePragmaDiagnosticPop
Expand Down
1 change: 0 additions & 1 deletion palace/fem/integ/curlcurlmass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "fem/libceed/coefficient.hpp"
#include "fem/libceed/integrator.hpp"

#include "fem/qfunctions/hdivmass_build_qf.h"
#include "fem/qfunctions/hdivmass_qf.h"

namespace palace
Expand Down
1 change: 0 additions & 1 deletion palace/fem/integ/diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "fem/libceed/coefficient.hpp"
#include "fem/libceed/integrator.hpp"

#include "fem/qfunctions/hcurl_build_qf.h"
#include "fem/qfunctions/hcurl_qf.h"

namespace palace
Expand Down
1 change: 0 additions & 1 deletion palace/fem/integ/diffusionmass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "fem/libceed/coefficient.hpp"
#include "fem/libceed/integrator.hpp"

#include "fem/qfunctions/hcurlmass_build_qf.h"
#include "fem/qfunctions/hcurlmass_qf.h"

namespace palace
Expand Down
1 change: 0 additions & 1 deletion palace/fem/integ/divdiv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "fem/libceed/coefficient.hpp"
#include "fem/libceed/integrator.hpp"

#include "fem/qfunctions/l2_build_qf.h"
#include "fem/qfunctions/l2_qf.h"

namespace palace
Expand Down
1 change: 0 additions & 1 deletion palace/fem/integ/divdivmass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "fem/libceed/coefficient.hpp"
#include "fem/libceed/integrator.hpp"

#include "fem/qfunctions/l2mass_build_qf.h"
#include "fem/qfunctions/l2mass_qf.h"

namespace palace
Expand Down
1 change: 0 additions & 1 deletion palace/fem/integ/grad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "fem/libceed/coefficient.hpp"
#include "fem/libceed/integrator.hpp"

#include "fem/qfunctions/hcurlh1d_build_qf.h"
#include "fem/qfunctions/hcurlh1d_qf.h"

namespace palace
Expand Down
1 change: 0 additions & 1 deletion palace/fem/integ/mass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "fem/libceed/coefficient.hpp"
#include "fem/libceed/integrator.hpp"

#include "fem/qfunctions/h1_build_qf.h"
#include "fem/qfunctions/h1_qf.h"

namespace palace
Expand Down
2 changes: 0 additions & 2 deletions palace/fem/integ/mixedveccurl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
PalacePragmaDiagnosticPush
PalacePragmaDiagnosticDisableUnused

#include "fem/qfunctions/hcurlhdiv_build_qf.h"
#include "fem/qfunctions/hcurlhdiv_qf.h"
#include "fem/qfunctions/hdiv_build_qf.h"
#include "fem/qfunctions/hdiv_qf.h"

PalacePragmaDiagnosticPop
Expand Down
1 change: 0 additions & 1 deletion palace/fem/integ/mixedvecgrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "fem/libceed/coefficient.hpp"
#include "fem/libceed/integrator.hpp"

#include "fem/qfunctions/hcurl_build_qf.h"
#include "fem/qfunctions/hcurl_qf.h"

namespace palace
Expand Down
3 changes: 0 additions & 3 deletions palace/fem/integ/vecfemass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@
#include "fem/libceed/coefficient.hpp"
#include "fem/libceed/integrator.hpp"

#include "fem/qfunctions/hcurl_build_qf.h"
#include "fem/qfunctions/hcurl_qf.h"
#include "fem/qfunctions/hcurlhdiv_build_qf.h"
#include "fem/qfunctions/hcurlhdiv_qf.h"
#include "fem/qfunctions/hdiv_build_qf.h"
#include "fem/qfunctions/hdiv_qf.h"

namespace palace
Expand Down
41 changes: 15 additions & 26 deletions palace/fem/libceed/coefficient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "fem/libceed/ceed.hpp"
#include "models/materialoperator.hpp"

#include "fem/qfunctions/coeff_qf.h"
#include "fem/qfunctions/coeff/coeff_qf.h"

namespace palace::ceed
{
Expand Down Expand Up @@ -36,26 +36,15 @@ auto InitDefaultCoefficient(int dim)

void MakeDiagonalCoefficient(int dim, CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k)
{
switch (dim)
const int coeff_dim = CoeffDim(dim);
for (int i = 0; i < coeff_dim; i++)
{
case 1:
mat_coeff[k].second = a;
break;
case 2:
mat_coeff[3 * k + 0].second = a;
mat_coeff[3 * k + 1].second = 0.0;
mat_coeff[3 * k + 2].second = a;
break;
case 3:
mat_coeff[6 * k + 0].second = a;
mat_coeff[6 * k + 1].second = 0.0;
mat_coeff[6 * k + 2].second = 0.0;
mat_coeff[6 * k + 3].second = a;
mat_coeff[6 * k + 4].second = 0.0;
mat_coeff[6 * k + 5].second = a;
break;
default:
MFEM_ABORT("Unsupported dimension for diagonal coefficient!");
mat_coeff[coeff_dim * k + i].second = 0.0;
}
for (int di = 0; di < dim; ++di)
{
const int idx = (di * dim) - (((di - 1) * di) / 2);
mat_coeff[coeff_dim * k + idx].second = a;
}
}

Expand Down Expand Up @@ -97,8 +86,8 @@ PopulateCoefficientContext(int dim, const MaterialPropertyCoefficient *Q, double

// Map unassigned attributes to zero material property coefficient (the last material
// property is reserved for zero).
std::vector<CeedIntScalar> ctx(2 + attr_mat.Size() +
CoeffDim(dim) * (mat_coeff.SizeK() + 1));
const int coeff_dim = CoeffDim(dim);
std::vector<CeedIntScalar> ctx(2 + attr_mat.Size() + coeff_dim * (mat_coeff.SizeK() + 1));
ctx[0].first = attr_mat.Size();
const int zero_mat = mat_coeff.SizeK();
for (int i = 0; i < attr_mat.Size(); i++)
Expand All @@ -123,16 +112,16 @@ PopulateCoefficientContext(int dim, const MaterialPropertyCoefficient *Q, double
{
for (int di = dj; di < dim; ++di)
{
// Column-major ordering.
const int idx = (dj * dim) - (((dj - 1) * dj) / 2) + di - dj;
MatCoeff(ctx.data())[CoeffDim(dim) * k + idx].second =
a * mat_coeff(di, dj, k); // Column-major
MatCoeff(ctx.data())[coeff_dim * k + idx].second = a * mat_coeff(di, dj, k);
}
}
}
}
for (int d = 0; d < CoeffDim(dim); d++)
for (int d = 0; d < coeff_dim; d++)
{
MatCoeff(ctx.data())[CoeffDim(dim) * zero_mat + d].second = 0.0;
MatCoeff(ctx.data())[coeff_dim * zero_mat + d].second = 0.0;
}

return ctx;
Expand Down
24 changes: 24 additions & 0 deletions palace/fem/qfunctions/1/h1_1_qf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

#ifndef PALACE_LIBCEED_H1_1_QF_H
#define PALACE_LIBCEED_H1_1_QF_H

#include "../coeff/coeff_1_qf.h"

CEED_QFUNCTION(f_apply_h1_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
CeedScalar *const *out)
{
const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1];
CeedScalar *v = out[0];

CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
{
const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);

v[i] = coeff * wdetJ[i] * u[i];
}
return 0;
}

#endif // PALACE_LIBCEED_H1_1_QF_H
24 changes: 24 additions & 0 deletions palace/fem/qfunctions/1/h1_build_1_qf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

#ifndef PALACE_LIBCEED_H1_BUILD_1_QF_H
#define PALACE_LIBCEED_H1_BUILD_1_QF_H

#include "../coeff/coeff_1_qf.h"

CEED_QFUNCTION(f_build_h1_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
CeedScalar *const *out)
{
const CeedScalar *attr = in[0], *wdetJ = in[0] + Q;
CeedScalar *qd = out[0];

CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
{
const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);

qd[i] = coeff * wdetJ[i];
}
return 0;
}

#endif // PALACE_LIBCEED_H1_BUILD_1_QF_H
24 changes: 24 additions & 0 deletions palace/fem/qfunctions/1/l2_1_qf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

#ifndef PALACE_LIBCEED_L2_1_QF_H
#define PALACE_LIBCEED_L2_1_QF_H

#include "../coeff/coeff_1_qf.h"

CEED_QFUNCTION(f_apply_l2_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
CeedScalar *const *out)
{
const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2];
CeedScalar *v = out[0];

CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
{
const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);

v[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * u[i];
}
return 0;
}

#endif // PALACE_LIBCEED_L2_1_QF_H
24 changes: 24 additions & 0 deletions palace/fem/qfunctions/1/l2_build_1_qf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

#ifndef PALACE_LIBCEED_L2_BUILD_1_QF_H
#define PALACE_LIBCEED_L2_BUILD_1_QF_H

#include "../coeff/coeff_1_qf.h"

CEED_QFUNCTION(f_build_l2_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
CeedScalar *const *out)
{
const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1];
CeedScalar *qd = out[0];

CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
{
const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]);

qd[i] = coeff * qw[i] * qw[i] / wdetJ[i];
}
return 0;
}

#endif // PALACE_LIBCEED_L2_BUILD_1_QF_H
28 changes: 28 additions & 0 deletions palace/fem/qfunctions/2/h1_2_qf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

#ifndef PALACE_LIBCEED_H1_2_QF_H
#define PALACE_LIBCEED_H1_2_QF_H

#include "../coeff/coeff_2_qf.h"

CEED_QFUNCTION(f_apply_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
CeedScalar *const *out)
{
const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1];
CeedScalar *v = out[0];

CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
{
CeedScalar coeff[3];
CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);

const CeedScalar u0 = u[i + Q * 0];
const CeedScalar u1 = u[i + Q * 1];
v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[1] * u1);
v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[2] * u1);
}
return 0;
}

#endif // PALACE_LIBCEED_H1_2_QF_H
27 changes: 27 additions & 0 deletions palace/fem/qfunctions/2/h1_build_2_qf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

#ifndef PALACE_LIBCEED_H1_BUILD_2_QF_H
#define PALACE_LIBCEED_H1_BUILD_2_QF_H

#include "../coeff/coeff_2_qf.h"

CEED_QFUNCTION(f_build_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
CeedScalar *const *out)
{
const CeedScalar *attr = in[0], *wdetJ = in[0] + Q;
CeedScalar *qd = out[0];

CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
{
CeedScalar coeff[3];
CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);

qd[i + Q * 0] = wdetJ[i] * coeff[0];
qd[i + Q * 1] = wdetJ[i] * coeff[1];
qd[i + Q * 2] = wdetJ[i] * coeff[2];
}
return 0;
}

#endif // PALACE_LIBCEED_H1_BUILD_2_QF_H
29 changes: 29 additions & 0 deletions palace/fem/qfunctions/2/l2_2_qf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

#ifndef PALACE_LIBCEED_L2_2_QF_H
#define PALACE_LIBCEED_L2_2_QF_H

#include "../coeff/coeff_2_qf.h"

CEED_QFUNCTION(f_apply_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
CeedScalar *const *out)
{
const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2];
CeedScalar *v = out[0];

CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
{
CeedScalar coeff[3];
CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
const CeedScalar w = qw[i] * qw[i] / wdetJ[i];

const CeedScalar u0 = u[i + Q * 0];
const CeedScalar u1 = u[i + Q * 1];
v[i + Q * 0] = w * (coeff[0] * u0 + coeff[1] * u1);
v[i + Q * 1] = w * (coeff[1] * u0 + coeff[2] * u1);
}
return 0;
}

#endif // PALACE_LIBCEED_L2_2_QF_H
28 changes: 28 additions & 0 deletions palace/fem/qfunctions/2/l2_build_2_qf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

#ifndef PALACE_LIBCEED_L2_BUILD_2_QF_H
#define PALACE_LIBCEED_L2_BUILD_2_QF_H

#include "../coeff/coeff_2_qf.h"

CEED_QFUNCTION(f_build_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in,
CeedScalar *const *out)
{
const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1];
CeedScalar *qd = out[0];

CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
{
CeedScalar coeff[3];
CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
const CeedScalar w = qw[i] * qw[i] / wdetJ[i];

qd[i + Q * 0] = w * coeff[0];
qd[i + Q * 1] = w * coeff[1];
qd[i + Q * 2] = w * coeff[2];
}
return 0;
}

#endif // PALACE_LIBCEED_L2_BUILD_2_QF_H
Loading

0 comments on commit 4a464e8

Please sign in to comment.