awslabs · sebastiangrimberg · Aug 22, 2023 · Jun 16, 2023 · Jun 19, 2023 · Jun 27, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,10 +24,14 @@ The format of this changelog is based on
   - Changed implementation of numeric wave ports to use MFEM's `SubMesh` functionality. As
     of [#3379](https://github.com/mfem/mfem/pull/3379) in MFEM, this has full ND and RT
     basis support. For now, support for nonconforming mesh boundaries is limited.
-  - Added Apptainer/Singularity container build definition for Palace.
+  - Added support for operator partial assembly for high-order finite element spaces based
+    on libCEED for non-tensor product element meshes. This option is disabled by default,
+    but can be activated using `config["Solver"]["PartialAssemblyOrder"]` set to some number
+    less than `"Order"` and `config["Solver"]["Device"]: "ceed-cpu"`.
   - Added build dependencies on [libCEED](https://github.com/CEED/libCEED) and
     [LIBXSMM](https://github.com/libxsmm/libxsmm) to support operator partial assembly (CPU-
     based for now).
+  - Added Apptainer/Singularity container build definition for Palace.
 
 ## [0.11.2] - 2023-07-14
 

diff --git a/docs/src/config/solver.md b/docs/src/config/solver.md
@@ -9,6 +9,8 @@
 "Solver":
 {
     "Order": <int>,
+    "PartialAssemblyOrder": <int>,
+    "Device": <string>,
     "Eigenmode":
     {
         ...
@@ -40,6 +42,16 @@ with
 
 `"Order" [1]` :  Finite element order (degree). Arbitrary high-order spaces are supported.
 
+`"PartialAssemblyOrder" [100]` :  Order at which to switch from full assembly of finite
+element operators to [partial assembly](https://mfem.org/howto/assembly_levels/). Setting
+this parameter equal to 1 will fully activate operator partial assembly on all levels.
+
+`"Device" ["cpu"]` :  The device configuration passed to [MFEM]
+(https://mfem.org/howto/assembly_levels/) in order to activate different backends at
+runtime. CPU-based partial assembly is supported by the `"cpu"` backend for tensor-product
+meshes using the native MFEM kernels and `"ceed-cpu"` backend for all mesh types using
+libCEED.
+
 `"Eigenmode"` :  Top-level object for configuring the eigenvalue solver for the eigenmode
 simulation type. Thus, this object is only relevant for
 [`config["Problem"]["Type"]: "Eigenmode"`](problem.md#config%5B%22Problem%22%5D).
@@ -299,13 +311,13 @@ directory specified by [`config["Problem"]["Output"]`]
     "Tol": <float>,
     "MaxIts": <int>,
     "MaxSize": <int>,
-    "UsePCMatShifted": <bool>,
-    "PCSide": <string>,
-    "UseMultigrid": <bool>,
-    "MGAuxiliarySmoother": <bool>,
+    "MGMaxLevels": <int>,
+    "MGCoarsenType": <string>,
     "MGCycleIts": <int>,
     "MGSmoothIts": <int>,
     "MGSmoothOrder": <int>,
+    "PCMatShifted": <bool>,
+    "PCSide": <string>,
     "DivFreeTol": <float>,
     "DivFreeMaxIts": <float>,
     "GSOrthogonalization": <string>
@@ -365,26 +377,15 @@ equations arising for each simulation type. The available options are:
 `"MaxSize" [0]` :  Maximum Krylov space size for the GMRES and FGMRES solvers. A value less
 than 1 defaults to the value specified by `"MaxIts"`.
 
-`"UsePCMatShifted" [false]` :  When set to `true`, constructs the preconditioner for frequency
-domain problems using a real SPD approximation of the system matrix, which can help
-performance at high frequencies (relative to the lowest nonzero eigenfrequencies of the
-model).
-
-`"PCSide" ["Default"]` :  Side for preconditioning. Not all options are available for all
-iterative solver choices, and the default choice depends on the iterative solver used.
-
-  - `"Left"`
-  - `"Right"`
-  - `"Default"`
-
-`"UseMultigrid" [true]` :  Chose whether to enable [geometric multigrid preconditioning]
+`"MGMaxLevels" [100]` :  Chose whether to enable [geometric multigrid preconditioning]
 (https://en.wikipedia.org/wiki/Multigrid_method) which uses p- and h-multigrid coarsening as
 available to construct the multigrid hierarchy. The solver specified by `"Type"` is used on
 the coarsest level. Relaxation on the fine levels is performed with Chebyshev smoothing.
 
-`"MGAuxiliarySmoother"` :  Activate hybrid smoothing from Hiptmair for multigrid levels when
-`"UseMultigrid"` is `true`. For non-singular problems involving curl-curl operators, this
-option is `true` by default.
+`"MGCoarsenType" ["Logarithmic"]` :  Coarsening to create p-multigrid levels.
+
+  - `"Logarithmic"`
+  - `"Linear"`
 
 `"MGCycleIts" [1]` :  Number of V-cycle iterations per preconditioner application for
 multigrid preconditioners (when `"UseMultigrid"` is `true` or `"Type"` is `"AMS"` or
@@ -396,6 +397,18 @@ preconditioners (when `"UseMultigrid"` is `true` or `"Type"` is `"AMS"` or `"Boo
 `"MGSmoothOrder" [3]` :  Order of polynomial smoothing for geometric multigrid
 preconditioning (when `"UseMultigrid"` is `true`).
 
+`"PCMatShifted" [false]` :  When set to `true`, constructs the preconditioner for frequency
+domain problems using a real SPD approximation of the system matrix, which can help
+performance at high frequencies (relative to the lowest nonzero eigenfrequencies of the
+model).
+
+`"PCSide" ["Default"]` :  Side for preconditioning. Not all options are available for all
+iterative solver choices, and the default choice depends on the iterative solver used.
+
+  - `"Left"`
+  - `"Right"`
+  - `"Default"`
+
 `"DivFreeTol" [1.0e-12]` :  Relative tolerance for divergence-free cleaning used in the
 eigenmode simulation type.
 
@@ -411,10 +424,11 @@ vectors in Krylov subspace methods or other parts of the code.
 
 ### Advanced linear solver options
 
-  - `"UseInitialGuess" [true]`
-  - `"UsePartialAssembly" [false]`
-  - `"UseLowOrderRefined" [false]`
-  - `"Reordering" ["Default"]` :  `"METIS"`, `"ParMETIS"`,`"Scotch"`, `"PTScotch"`,
+  - `"InitialGuess" [true]`
+  - `"MGLegacyTransfer" [false]`
+  - `"MGAuxiliarySmoother" [true]`
+  - `"PCLowOrderRefined" [false]`
+  - `"ColumnOrdering" ["Default"]` :  `"METIS"`, `"ParMETIS"`,`"Scotch"`, `"PTScotch"`,
     `"Default"`
   - `"STRUMPACKCompressionType" ["None"]` :  `"None"`, `"BLR"`, `"HSS"`, `"HODLR"`, `"ZFP"`,
     `"BLR-HODLR"`, `"ZFP-BLR-HODLR"`

diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
@@ -163,7 +163,8 @@ void EigenSolver::Solve(std::vector<std::unique_ptr<mfem::ParMesh>> &mesh,
     divfree = std::make_unique<DivFreeSolver>(
         spaceop.GetMaterialOp(), spaceop.GetNDSpace(), spaceop.GetH1Spaces(),
         spaceop.GetAuxBdrTDofLists(), iodata.solver.linear.divfree_tol,
-        iodata.solver.linear.divfree_max_it, divfree_verbose);
+        iodata.solver.linear.divfree_max_it, divfree_verbose,
+        iodata.solver.pa_order_threshold);
     eigen->SetDivFreeProjector(*divfree);
   }
 

diff --git a/palace/fem/multigrid.hpp b/palace/fem/multigrid.hpp
@@ -9,6 +9,7 @@
 #include <mfem.hpp>
 #include "linalg/operator.hpp"
 #include "linalg/rap.hpp"
+#include "utils/iodata.hpp"
 
 namespace palace::utils
 {
@@ -17,61 +18,118 @@ namespace palace::utils
 // Methods for constructing hierarchies of finite element spaces for geometric multigrid.
 //
 
+// Helper function for getting the order of the finite element space underlying a bilinear
+// form.
+inline auto GetMaxElementOrder(mfem::BilinearForm &a)
+{
+  return a.FESpace()->GetMaxElementOrder();
+}
+
+// Helper function for getting the order of the finite element space underlying a mixed
+// bilinear form.
+inline auto GetMaxElementOrder(mfem::MixedBilinearForm &a)
+{
+  return std::max(a.TestFESpace()->GetMaxElementOrder(),
+                  a.TrialFESpace()->GetMaxElementOrder());
+}
+
+// Assemble a bilinear or mixed bilinear form. If the order is lower than the specified
+// threshold, the operator is assembled as a sparse matrix.
+template <typename BilinearForm>
+inline std::unique_ptr<Operator>
+AssembleOperator(std::unique_ptr<BilinearForm> &&a, bool mfem_pa_support,
+                 int pa_order_threshold, int skip_zeros = 1)
+{
+  mfem::AssemblyLevel assembly_level =
+      (mfem::DeviceCanUseCeed() ||
+       (mfem_pa_support && GetMaxElementOrder(*a) >= pa_order_threshold))
+          ? mfem::AssemblyLevel::PARTIAL
+          : mfem::AssemblyLevel::LEGACY;
+  a->SetAssemblyLevel(assembly_level);
+  a->Assemble(skip_zeros);
+  a->Finalize(skip_zeros);
+  if (assembly_level == mfem::AssemblyLevel::LEGACY ||
+      (assembly_level == mfem::AssemblyLevel::PARTIAL &&
+       GetMaxElementOrder(*a) < pa_order_threshold &&
+       std::is_base_of<mfem::BilinearForm, BilinearForm>::value))
+  {
+    // libCEED full assembly does not support mixed forms.
+#ifdef MFEM_USE_CEED
+    mfem::SparseMatrix *spm =
+        a->HasExt() ? mfem::ceed::CeedOperatorFullAssemble(*a) : a->LoseMat();
+#else
+    mfem::SparseMatrix *spm = a->LoseMat();
+#endif
+    MFEM_VERIFY(spm, "Missing assembled sparse matrix!");
+    return std::unique_ptr<Operator>(spm);
+  }
+  else
+  {
+    return std::move(a);
+  }
+}
+
 // Construct sequence of FECollection objects.
 template <typename FECollection>
-std::vector<std::unique_ptr<FECollection>> ConstructFECollections(bool pc_pmg, bool pc_lor,
-                                                                  int p, int dim)
+std::vector<std::unique_ptr<FECollection>> inline ConstructFECollections(
+    int p, int dim, int mg_max_levels,
+    config::LinearSolverData::MultigridCoarsenType mg_coarsen_type, bool mat_lor)
 {
   // If the solver will use a LOR preconditioner, we need to construct with a specific basis
   // type.
-  constexpr int pmin = (std::is_same<FECollection, mfem::H1_FECollection>::value ||
-                        std::is_same<FECollection, mfem::ND_FECollection>::value)
+  constexpr int pmin = (std::is_base_of<mfem::H1_FECollection, FECollection>::value ||
+                        std::is_base_of<mfem::ND_FECollection, FECollection>::value)
                            ? 1
                            : 0;
   MFEM_VERIFY(p >= pmin, "FE space order must not be less than " << pmin << "!");
   int b1 = mfem::BasisType::GaussLobatto, b2 = mfem::BasisType::GaussLegendre;
-  if (pc_lor)
+  if (mat_lor)
   {
     b2 = mfem::BasisType::IntegratedGLL;
   }
+
+  // Construct the p-multigrid hierarchy, first finest to coarsest and then reverse the
+  // order.
   std::vector<std::unique_ptr<FECollection>> fecs;
-  if (pc_pmg)
-  {
-    fecs.reserve(p);
-    for (int o = pmin; o <= p; o++)
-    {
-      if constexpr (std::is_same<FECollection, mfem::ND_FECollection>::value ||
-                    std::is_same<FECollection, mfem::RT_FECollection>::value)
-      {
-        fecs.push_back(std::make_unique<FECollection>(o, dim, b1, b2));
-      }
-      else
-      {
-        fecs.push_back(std::make_unique<FECollection>(o, dim, b1));
-      }
-    }
-  }
-  else
+  for (int l = 0; l < std::max(1, mg_max_levels); l++)
   {
-    fecs.reserve(1);
-    if constexpr (std::is_same<FECollection, mfem::ND_FECollection>::value ||
-                  std::is_same<FECollection, mfem::RT_FECollection>::value)
+    if constexpr (std::is_base_of<mfem::ND_FECollection, FECollection>::value ||
+                  std::is_base_of<mfem::RT_FECollection, FECollection>::value)
     {
       fecs.push_back(std::make_unique<FECollection>(p, dim, b1, b2));
     }
     else
     {
       fecs.push_back(std::make_unique<FECollection>(p, dim, b1));
+      MFEM_CONTRACT_VAR(b2);
+    }
+    if (p == pmin)
+    {
+      break;
+    }
+    switch (mg_coarsen_type)
+    {
+      case config::LinearSolverData::MultigridCoarsenType::LINEAR:
+        p--;
+        break;
+      case config::LinearSolverData::MultigridCoarsenType::LOGARITHMIC:
+        p = (p + pmin) / 2;
+        break;
+      case config::LinearSolverData::MultigridCoarsenType::INVALID:
+        MFEM_ABORT("Invalid coarsening type for p-multigrid levels!");
+        break;
     }
   }
+  std::reverse(fecs.begin(), fecs.end());
   return fecs;
 }
 
 // Construct a hierarchy of finite element spaces given a sequence of meshes and
 // finite element collections. Dirichlet boundary conditions are additionally
 // marked.
 template <typename FECollection>
-mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
+inline mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
+    int mg_max_levels, bool mg_legacy_transfer, int pa_order_threshold,
     const std::vector<std::unique_ptr<mfem::ParMesh>> &mesh,
     const std::vector<std::unique_ptr<FECollection>> &fecs,
     const mfem::Array<int> *dbc_marker = nullptr,
@@ -80,17 +138,18 @@ mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
   MFEM_VERIFY(!mesh.empty() && !fecs.empty() &&
                   (!dbc_tdof_lists || dbc_tdof_lists->empty()),
               "Empty mesh or FE collection for FE space construction!");
-  auto *fespace = new mfem::ParFiniteElementSpace(mesh[0].get(), fecs[0].get());
+  int coarse_mesh_l =
+      std::max(0, static_cast<int>(mesh.size() + fecs.size()) - 1 - mg_max_levels);
+  auto *fespace = new mfem::ParFiniteElementSpace(mesh[coarse_mesh_l].get(), fecs[0].get());
   if (dbc_marker && dbc_tdof_lists)
   {
     fespace->GetEssentialTrueDofs(*dbc_marker, dbc_tdof_lists->emplace_back());
   }
-  mfem::ParFiniteElementSpaceHierarchy fespaces(mesh[0].get(), fespace, false, true);
-
-  // XX TODO: LibCEED transfer operators!
+  mfem::ParFiniteElementSpaceHierarchy fespaces(mesh[coarse_mesh_l].get(), fespace, false,
+                                                true);
 
   // h-refinement
-  for (std::size_t l = 1; l < mesh.size(); l++)
+  for (std::size_t l = coarse_mesh_l + 1; l < mesh.size(); l++)
   {
     fespace = new mfem::ParFiniteElementSpace(mesh[l].get(), fecs[0].get());
     if (dbc_marker && dbc_tdof_lists)
@@ -111,31 +170,27 @@ mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy(
     {
       fespace->GetEssentialTrueDofs(*dbc_marker, dbc_tdof_lists->emplace_back());
     }
-    auto *P = new ParOperator(
-        std::make_unique<mfem::TransferOperator>(fespaces.GetFinestFESpace(), *fespace),
-        fespaces.GetFinestFESpace(), *fespace, true);
+    ParOperator *P;
+    if (!mg_legacy_transfer && mfem::DeviceCanUseCeed())
+    {
+      // Partial and full assembly for this operator is only available with libCEED backend.
+      auto p = std::make_unique<mfem::DiscreteLinearOperator>(&fespaces.GetFinestFESpace(),
+                                                              fespace);
+      p->AddDomainInterpolator(new mfem::IdentityInterpolator);
+      P = new ParOperator(AssembleOperator(std::move(p), false, pa_order_threshold),
+                          fespaces.GetFinestFESpace(), *fespace, true);
+    }
+    else
+    {
+      P = new ParOperator(
+          std::make_unique<mfem::TransferOperator>(fespaces.GetFinestFESpace(), *fespace),
+          fespaces.GetFinestFESpace(), *fespace, true);
+    }
     fespaces.AddLevel(mesh.back().get(), fespace, P, false, true, true);
   }
   return fespaces;
 }
 
-// Construct a single-level finite element space hierarchy from a single mesh and
-// finite element collection. Unnecessary to pass the dirichlet boundary
-// conditions as they need not be incorporated in any inter-space projectors.
-template <typename FECollection>
-mfem::ParFiniteElementSpaceHierarchy
-ConstructFiniteElementSpaceHierarchy(mfem::ParMesh &mesh, const FECollection &fec,
-                                     const mfem::Array<int> *dbc_marker = nullptr,
-                                     mfem::Array<int> *dbc_tdof_list = nullptr)
-{
-  auto *fespace = new mfem::ParFiniteElementSpace(&mesh, &fec);
-  if (dbc_marker && dbc_tdof_list)
-  {
-    fespace->GetEssentialTrueDofs(*dbc_marker, *dbc_tdof_list);
-  }
-  return mfem::ParFiniteElementSpaceHierarchy(&mesh, fespace, false, true);
-}
-
 }  // namespace palace::utils
 
 #endif  // PALACE_FEM_MULTIGRID_HPP
diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt
@@ -11,10 +11,10 @@ target_sources(${TARGET_NAME}
   ${CMAKE_CURRENT_SOURCE_DIR}/ams.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/arpack.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/chebyshev.cpp
-  ${CMAKE_CURRENT_SOURCE_DIR}/curlcurl.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/distrelaxation.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/divfree.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/gmg.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/hcurl.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/jacobi.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/ksp.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/iterative.cpp

diff --git a/palace/linalg/amg.hpp b/palace/linalg/amg.hpp
@@ -18,8 +18,8 @@ class BoomerAmgSolver : public mfem::HypreBoomerAMG
 {
 public:
   BoomerAmgSolver(int cycle_it = 1, int smooth_it = 1, int print = 0);
-  BoomerAmgSolver(const IoData &iodata, int print)
-    : BoomerAmgSolver(iodata.solver.linear.pc_mg ? 1 : iodata.solver.linear.mg_cycle_it,
+  BoomerAmgSolver(const IoData &iodata, bool coarse_solver, int print)
+    : BoomerAmgSolver(coarse_solver ? 1 : iodata.solver.linear.mg_cycle_it,
                       iodata.solver.linear.mg_smooth_it, print)
   {
   }