From 5e0aad34d1bbeafb2cc8bf7a8cf94f04a7d068cb Mon Sep 17 00:00:00 2001
From: Anderson <anderson316@llnl.gov>
Date: Wed, 15 Nov 2023 13:59:47 -0800
Subject: [PATCH 01/13] Added local prom for maxwell

---
 CMakeLists.txt                             |   2 +
 examples/prom/maxwell_local_rom_greedy.cpp | 737 +++++++++++++++++++++
 examples/prom/maxwell_local_rom_greedy.hpp |  13 +
 3 files changed, 752 insertions(+)
 create mode 100644 examples/prom/maxwell_local_rom_greedy.cpp
 create mode 100644 examples/prom/maxwell_local_rom_greedy.hpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c78dbe7f5..9f77e7f69 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -138,6 +138,7 @@ if (ENABLE_EXAMPLES)
       nonlinear_elasticity_global_rom
       linear_elasticity_global_rom
       maxwell_global_rom
+      maxwell_local_rom_greedy     
       dg_advection
       nonlinear_elasticity
       heat_conduction
@@ -159,6 +160,7 @@ if (ENABLE_EXAMPLES)
       prom
       prom
       prom
+      prom
       dmd
       dmd
       dmd
diff --git a/examples/prom/maxwell_local_rom_greedy.cpp b/examples/prom/maxwell_local_rom_greedy.cpp
new file mode 100644
index 000000000..c45119607
--- /dev/null
+++ b/examples/prom/maxwell_local_rom_greedy.cpp
@@ -0,0 +1,737 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2013-2023, Lawrence Livermore National Security, LLC
+ * and other libROM project developers. See the top-level COPYRIGHT
+ * file for details.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ *****************************************************************************/
+
+//               libROM MFEM Example: parametric local ROM for Maxwell equation (adapted from ex3p.cpp)
+//
+// Description:  This example code uses MFEM and librom to define a projection-based
+//               reduced-order model for a simple electromagnetic diffusion
+//               problem corresponding to the second order definite Maxwell
+//               equation: curl curl E + E = f. The boundary condition is given by
+//               E x n = <given tangential field>. Here, we use a given exact
+//               solution E and compute the corresponding r.h.s. f. Each component
+//               of the vector field E is assumed to be a sin wave, with frequency
+//               controlled by the parameter kappa.
+//
+//               The example highlights the greedy algorithm. The build_database phase
+//               builds a global ROM database using different frequncies and a latin-hypercube
+//               sampling procedure. The use_database phase uses the global ROM database,
+//               builds the ROM operator, solves thereduced order system, and
+//               lifts the solution to the full order space.
+//
+// build_database phase: maxwell_local_rom_greedy -build_database -greedy-param-min 1.0 -greedy-param-max 1.2 -greedy-param-size 5 -greedysubsize 2 -greedyconvsize 3 -greedyrelerrortol 0.01
+// use_database phase:   maxwell_local_rom_greedy -fom -f 1.15 (create a new solution to compare with)
+// use_database phase:   maxwell_local_rom_greedy -use_database -online -f 1.15 (use the database to compute at f 1.15 while comparing to the true offline solution at f 1.15)
+//
+// Larger example:
+// build_database phase: maxwell_local_rom_greedy -build_database -greedy-param-min 0.5 -greedy-param-max 1.5 -greedy-param-size 15 -greedysubsize 4 -greedyconvsize 6 -greedyrelerrortol 0.01
+// use_database phase:   maxwell_local_rom_greedy -fom -f X.XX (create a new solution to compare with. Set X.XX to your desired frequency.)
+// use_database phase:   maxwell_local_rom_greedy -use_database -online -f X.XX (use the database to compute at f X.XX while comparing to the true offline solution at f X.XX)
+//
+// This example runs in parallel with MPI, by using the same number of MPI ranks
+// in all phases (build_database, fom, online).
+
+#include "mfem.hpp"
+#include <fstream>
+#include <iostream>
+#include "linalg/Vector.h"
+#include "linalg/BasisGenerator.h"
+#include "linalg/BasisReader.h"
+#include "mfem/Utilities.hpp"
+#include <cmath>
+#include <set>
+#include <vector>
+#include "algo/greedy/GreedyRandomSampler.h"
+
+using namespace std;
+using namespace mfem;
+
+// Exact solution, E, and r.h.s., f. See below for implementation.
+void E_exact(const Vector &, Vector &);
+void f_exact(const Vector &, Vector &);
+double freq = 1.0, kappa;
+int dim;
+
+int main(int argc, char *argv[])
+{
+    // 1. Initialize MPI and HYPRE
+    Mpi::Init(argc, argv);
+    int num_procs = Mpi::WorldSize();
+    int myid = Mpi::WorldRank();
+    Hypre::Init();
+
+    // 2. Parse command-line options.
+    const char *mesh_file = "../data/star.mesh";
+    int order = 1;
+    bool static_cond = false;
+    bool pa = false;
+    const char *device_config = "cpu";
+    bool visualization = false;
+    bool visit = false;
+    bool fom = false;
+    bool offline = false;
+    bool merge = false;
+    bool online = false;
+    int precision = 8;
+//    int id = 0;
+//    int nsets = 0;
+    double coef = 1.0;
+    bool build_database = false;
+    bool use_database = false;
+    double greedy_param_space_min = 1.0;
+    double greedy_param_space_max = 1.0;
+    int greedy_param_space_size = 0;
+    double greedy_relative_error_tol = 0.01;
+    int greedy_subset_size = 0;
+    int greedy_convergence_subset_size = 0;
+#ifdef MFEM_USE_AMGX
+    bool useAmgX = false;
+#endif
+
+
+    OptionsParser args(argc, argv);
+    args.AddOption(&mesh_file, "-m", "--mesh",
+                   "Mesh file to use.");
+    args.AddOption(&order, "-o", "--order",
+                   "Finite element order (polynomial degree).");
+    args.AddOption(&freq, "-f", "--frequency", "Set the frequency for the exact"
+                   " solution.");
+    args.AddOption(&static_cond, "-sc", "--static-condensation", "-no-sc",
+                   "--no-static-condensation", "Enable static condensation.");
+    args.AddOption(&pa, "-pa", "--partial-assembly", "-no-pa",
+                   "--no-partial-assembly", "Enable Partial Assembly.");
+    args.AddOption(&device_config, "-d", "--device",
+                   "Device configuration string, see Device::Configure().");
+    args.AddOption(&visit, "-visit", "--visit-datafiles", "-no-visit",
+                   "--no-visit-datafiles",
+                   "Save data files for VisIt (visit.llnl.gov) visualization.");
+    args.AddOption(&visualization, "-vis", "--visualization", "-no-vis",
+                   "--no-visualization",
+                   "Enable or disable GLVis visualization.");
+#ifdef MFEM_USE_AMGX
+    args.AddOption(&useAmgX, "-amgx", "--useAmgX", "-no-amgx",
+                   "--no-useAmgX",
+                   "Enable or disable AmgX in MatrixFreeAMS.");
+#endif
+    args.AddOption(&fom, "-fom", "--fom", "-no-fom", "--no-fom",
+                   "Enable or disable the fom phase.");
+    args.AddOption(&offline, "-offline", "--offline", "-no-offline","--no-offline",
+                   "Enable or disable the offline phase.");
+    args.AddOption(&online, "-online", "--online", "-no-online", "--no-online",
+                   "Enable or disable the online phase.");
+    args.AddOption(&merge, "-merge", "--merge", "-no-merge", "--no-merge",
+                   "Enable or disable the merge phase.");
+//    args.AddOption(&id, "-id", "--id", "Parametric id");
+//    args.AddOption(&nsets, "-ns", "--nset", "Number of parametric snapshot sets");
+    args.AddOption(&build_database, "-build_database", "--build_database",
+                   "-no-build_database", "--no-build_database",
+                   "Enable or disable the build_database phase of the greedy algorithm.");
+    args.AddOption(&use_database, "-use_database", "--use_database",
+                   "-no-use_database", "--no-use_database",
+                   "Enable or disable the use_database phase of the greedy algorithm.");
+    args.AddOption(&greedy_param_space_min, "-greedy-param-min",
+                   "--greedy-param-min", "The minimum value of the parameter point space.");
+    args.AddOption(&greedy_param_space_max, "-greedy-param-max",
+                   "--greedy-param-max", "The maximum value of the parameter point space.");
+    args.AddOption(&greedy_param_space_size, "-greedy-param-size",
+                   "--greedy-param-size",
+                   "The number of values to search in the parameter point space.");
+    args.AddOption(&greedy_relative_error_tol, "-greedyrelerrortol",
+                   "--greedyrelerrortol", "The greedy algorithm relative error tolerance.");
+    args.AddOption(&greedy_subset_size, "-greedysubsize", "--greedysubsize",
+                   "The greedy algorithm subset size.");
+    args.AddOption(&greedy_convergence_subset_size, "-greedyconvsize",
+                   "--greedyconvsize", "The greedy algorithm convergence subset size.");
+
+    args.Parse();
+    if (!args.Good())
+    {
+        if (myid == 0)
+        {
+            args.PrintUsage(cout);
+        }
+        return 1;
+    }
+    if (myid == 0)
+    {
+        args.PrintOptions(cout);
+    }
+
+    if (fom)
+    {
+        MFEM_VERIFY(fom && !offline && !online
+                    && !merge, "everything must be turned off if fom is used.");
+    }
+
+    CAROM::GreedySampler* greedy_sampler = NULL;
+    MFEM_VERIFY(!build_database
+                || !use_database,
+                "both build_database and use_database can not be used at the same time.");
+
+    // 3a. Set up the ROM database for the greedy algorithm to run.
+    if (build_database)
+    {
+        MFEM_VERIFY(!offline
+                    && !online,
+                    "offline and online must be turned off during the build_database phase.");
+        MFEM_VERIFY(!visit
+                    && !visualization,
+                    "visit and visualization must be turned off during the build_database phase.")
+        std::ifstream infile("maxwell_local_rom_greedy_algorithm_data");
+        if (infile.good())
+        {
+            if (myid == 0) cout << "The database has already been built."
+                                    << "Exiting." << endl;
+            return 0;
+        }
+        infile.close();
+        greedy_sampler = new CAROM::GreedyRandomSampler(greedy_param_space_min,
+                greedy_param_space_max,
+                greedy_param_space_size, false, greedy_relative_error_tol, 1.05,
+                2.0, greedy_subset_size, greedy_convergence_subset_size,
+                true, "maxwell_local_rom_greedy_algorithm_log.txt");
+    }
+
+    // 3b. Or use the database set up by the greedy algorithm.
+    else if (use_database)
+    {
+        MFEM_VERIFY(!offline
+                    && online,
+                    "offline must be turned off and online must be turned on during the build_database phase.");
+        std::ifstream infile("maxwell_local_rom_greedy_algorithm_data");
+        if (!infile.good())
+        {
+            if (myid == 0) cout << "The database has not been built. Exiting."
+                                    << endl;
+            return 0;
+        }
+        infile.close();
+    }
+
+    vector<string> basisIdentifiers;
+    // The simulation is wrapped in a do-while statement so that the greedy
+    // algorithm (build_database) can run multiple simulations in succession.
+    do
+    {
+
+        if (build_database)
+        {
+            offline = false;
+            online = false;
+        }
+        bool calc_rel_error = false;
+        bool calc_err_indicator = false;
+        std::string curr_basis_identifier = "";
+
+        // 4a. Set the correct stage of the greedy algorithm (i.e. sampling new point,
+        //    calculating relative error of the last sampled point, or calculating
+        //    an error indicator at a new point.)
+        if (build_database)
+        {
+            double local_rom_freq = 0.0;
+            double curr_freq = 0.0;
+            struct CAROM::GreedyErrorIndicatorPoint pointRequiringRelativeError =
+                greedy_sampler->getNextPointRequiringRelativeError();
+            CAROM::Vector* relativeErrorPointData = pointRequiringRelativeError.point.get();
+            struct CAROM::GreedyErrorIndicatorPoint pointRequiringErrorIndicator =
+                greedy_sampler->getNextPointRequiringErrorIndicator();
+            CAROM::Vector* errorIndicatorPointData =
+                pointRequiringErrorIndicator.point.get();
+
+            if (relativeErrorPointData != NULL)
+            {
+                if (myid == 0) cout << "Calculating the relative error of "
+                                        << "the last sampled point." << endl;
+
+                local_rom_freq = pointRequiringRelativeError.localROM.get()->item(0);
+                curr_freq = pointRequiringRelativeError.point.get()->item(0);
+                if (myid == 0) cout << "Using the basis obtained at the frequency: " <<
+                                        local_rom_freq << endl;
+                online = true;
+                calc_rel_error = true;
+            }
+            else if (errorIndicatorPointData != NULL)
+            {
+                if (myid == 0) cout << "Calculating an error indicator at "
+                                        << "a new point." << endl;
+
+                local_rom_freq = pointRequiringErrorIndicator.localROM.get()->item(0);
+                curr_freq = pointRequiringErrorIndicator.point.get()->item(0);
+                if (myid == 0) cout << "Using the basis obtained at the frequency: "
+                                        << local_rom_freq << endl;
+                online = true;
+                calc_err_indicator = true;
+            }
+            else
+            {
+                std::shared_ptr<CAROM::Vector> nextSampleParameterPoint =
+                    greedy_sampler->getNextParameterPoint();
+                CAROM::Vector* samplePointData = nextSampleParameterPoint.get();
+                if (samplePointData != NULL)
+                {
+                    if (myid == 0) cout << "Sampling a new point." << endl;
+                    local_rom_freq = samplePointData->item(0);
+                    curr_freq = samplePointData->item(0);
+                    offline = true;
+                }
+                else
+                {
+                    if (myid == 0) cout << "The greedy algorithm has finished." << endl;
+                    greedy_sampler->save("maxwell_local_rom_greedy_algorithm_data");
+                    build_database = false;
+                    continue;
+                }
+            }
+            // 4b. Set the correct frequency as commanded by the greedy algorithm.
+            curr_basis_identifier += "_" + to_string(curr_freq);
+            freq = curr_freq;
+        }
+
+        if (myid == 0) cout << "Running loop at frequency: " << freq << endl;
+
+        kappa = freq * M_PI;
+
+        // 5. Enable hardware devices such as GPUs, and programming models such as
+        //    CUDA, OCCA, RAJA and OpenMP based on command line options.
+        Device device(device_config);
+        if (myid == 0) {
+            device.Print();
+        }
+
+        // 6. Read the (serial) mesh from the given mesh file on all processors.  We
+        //    can handle triangular, quadrilateral, tetrahedral, hexahedral, surface
+        //    and volume meshes with the same code.
+        Mesh *mesh = new Mesh(mesh_file, 1, 1);
+        dim = mesh->Dimension();
+        int sdim = mesh->SpaceDimension();
+
+        // 7. Refine the serial mesh on all processors to increase the resolution. In
+        //    this example we do 'ref_levels' of uniform refinement. We choose
+        //    'ref_levels' to be the largest number that gives a final mesh with no
+        //    more than 1,000 elements.
+        {
+            int ref_levels = (int)floor(log(1000./mesh->GetNE())/log(2.)/dim);
+            for (int l = 0; l < ref_levels; l++)
+            {
+                mesh->UniformRefinement();
+            }
+        }
+
+        // 8. Define a parallel mesh by a partitioning of the serial mesh. Refine
+        //    this mesh further in parallel to increase the resolution. Once the
+        //    parallel mesh is defined, the serial mesh can be deleted.
+        ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh);
+        delete mesh;
+        {
+            int par_ref_levels = 2;
+            for (int l = 0; l < par_ref_levels; l++)
+            {
+                pmesh->UniformRefinement();
+            }
+        }
+
+        // 9. Define a parallel finite element space on the parallel mesh. Here we
+        //    use the Nedelec finite elements of the specified order.
+        FiniteElementCollection *fec = new ND_FECollection(order, dim);
+        ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec);
+        HYPRE_BigInt size = fespace->GlobalTrueVSize();
+        if (myid == 0)
+        {
+            cout << "Number of finite element unknowns: " << size << endl;
+        }
+
+        // 10. Determine the list of true (i.e. parallel conforming) essential
+        //    boundary dofs. In this example, the boundary conditions are defined
+        //    by marking all the boundary attributes from the mesh as essential
+        //    (Dirichlet) and converting them to a list of true dofs.
+        Array<int> ess_tdof_list;
+        Array<int> ess_bdr;
+        if (pmesh->bdr_attributes.Size())
+        {
+            ess_bdr.SetSize(pmesh->bdr_attributes.Max());
+            ess_bdr = 1;
+            fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list);
+        }
+
+        // 11. Initiate ROM related variables
+        int max_num_snapshots = 100;
+        bool update_right_SV = false;
+        bool isIncremental = false;
+        const std::string saveBasisName = "basis" + curr_basis_identifier;
+        std::string loadBasisName = "basis";
+
+        // 11a. If using the greedy algorithm, load the global greedy basis.
+        if (build_database || use_database)
+        {
+            loadBasisName += "_greedy";
+        }
+
+//    const std::string basisName = "basis";
+//    const std::string basisFileName = basisName + std::to_string(id);
+        const CAROM::Matrix* spatialbasis;
+        CAROM::Options* options;
+        CAROM::BasisGenerator *generator;
+        int numRowRB, numColumnRB;
+        StopWatch solveTimer, assembleTimer, mergeTimer;
+
+        // 12. Set BasisGenerator if offline
+        if (offline)
+        {
+            options = new CAROM::Options(fespace->GetTrueVSize(), max_num_snapshots, 1,
+                                         update_right_SV);
+            if (myid == 0) cout << "Saving basis to: " << saveBasisName << endl;
+            generator = new CAROM::BasisGenerator(*options, isIncremental, saveBasisName);
+        }
+
+        // 13. Set up the parallel linear form b(.) which corresponds to the
+        //    right-hand side of the FEM linear system, which in this case is
+        //    (f,phi_i) where f is given by the function f_exact and phi_i are the
+        //    basis functions in the finite element fespace.
+        assembleTimer.Start();
+        VectorFunctionCoefficient f(sdim, f_exact);
+        ParLinearForm *b = new ParLinearForm(fespace);
+        b->AddDomainIntegrator(new VectorFEDomainLFIntegrator(f));
+        b->Assemble();
+
+        // 14. Define the solution vector x as a parallel finite element grid function
+        //     corresponding to fespace. Initialize x by projecting the exact
+        //     solution. Note that only values from the boundary edges will be used
+        //     when eliminating the non-homogeneous boundary condition to modify the
+        //     r.h.s. vector b.
+        ParGridFunction x(fespace);
+        VectorFunctionCoefficient E(sdim, E_exact);
+        x.ProjectCoefficient(E);
+
+        // 15. Set up the parallel bilinear form corresponding to the EM diffusion
+        //     operator curl muinv curl + sigma I, by adding the curl-curl and the
+        //     mass domain integrators.
+        Coefficient *muinv = new ConstantCoefficient(1.0);
+        Coefficient *sigma = new ConstantCoefficient(1.0);
+        ParBilinearForm *a = new ParBilinearForm(fespace);
+        if (pa) {
+            a->SetAssemblyLevel(AssemblyLevel::PARTIAL);
+        }
+        a->AddDomainIntegrator(new CurlCurlIntegrator(*muinv));
+        a->AddDomainIntegrator(new VectorFEMassIntegrator(*sigma));
+
+        // 16. Assemble the parallel bilinear form and the corresponding linear
+        //     system, applying any necessary transformations such as: parallel
+        //     assembly, eliminating boundary conditions, applying conforming
+        //     constraints for non-conforming AMR, static condensation, etc.
+        if (static_cond) {
+            a->EnableStaticCondensation();
+        }
+        a->Assemble();
+
+        OperatorPtr A;
+        Vector B, X;
+        a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B);
+        assembleTimer.Stop();
+
+        // 17. Offline Phase: Solve the system AX=B using PCG with an AMS preconditioner.
+        if(fom || offline)
+        {
+            // 18. Solve the full order linear system A X = B
+            if (pa)
+            {
+#ifdef MFEM_USE_AMGX
+                MatrixFreeAMS ams(*a, *A, *fespace, muinv, sigma, NULL, ess_bdr, useAmgX);
+#else
+                MatrixFreeAMS ams(*a, *A, *fespace, muinv, sigma, NULL, ess_bdr);
+#endif
+                CGSolver cg(MPI_COMM_WORLD);
+                cg.SetRelTol(1e-12);
+                cg.SetMaxIter(1000);
+                cg.SetPrintLevel(1);
+                cg.SetOperator(*A);
+                cg.SetPreconditioner(ams);
+                solveTimer.Start();
+                cg.Mult(B, X);
+                solveTimer.Stop();
+            }
+            else
+            {
+                if (myid == 0)
+                {
+                    cout << "Size of linear system: "
+                         << A.As<HypreParMatrix>()->GetGlobalNumRows() << endl;
+                }
+
+                ParFiniteElementSpace *prec_fespace =
+                    (a->StaticCondensationIsEnabled() ? a->SCParFESpace() : fespace);
+                HypreAMS ams(*A.As<HypreParMatrix>(), prec_fespace);
+                HyprePCG pcg(*A.As<HypreParMatrix>());
+                pcg.SetTol(1e-12);
+                pcg.SetMaxIter(500);
+                pcg.SetPrintLevel(2);
+                pcg.SetPreconditioner(ams);
+                solveTimer.Start();
+                pcg.Mult(B, X);
+                solveTimer.Stop();
+            }
+            // 19. take and write snapshot for ROM
+            if (offline)
+            {
+                bool addSample = generator->takeSample(X.GetData(), 0.0, 0.01);
+                generator->writeSnapshot();
+                basisIdentifiers.push_back(saveBasisName);
+                delete generator;
+                delete options;
+            }
+        }
+
+        // 20. The online phase
+        if (online) {
+            // 21. read the reduced basis
+            assembleTimer.Start();
+            CAROM::BasisReader reader(loadBasisName);
+            spatialbasis = reader.getSpatialBasis(0.0);
+            numRowRB = spatialbasis->numRows();
+            numColumnRB = spatialbasis->numColumns();
+            if (myid == 0) printf("spatial basis dimension is %d x %d\n", numRowRB,
+                                      numColumnRB);
+
+            // 22. form inverse ROM operator
+            CAROM::Matrix invReducedA(numColumnRB, numColumnRB, false);
+            ComputeCtAB(*A.As<HypreParMatrix>(), *spatialbasis, *spatialbasis,
+                        invReducedA);
+            invReducedA.inverse();
+
+            CAROM::Vector B_carom(B.GetData(), B.Size(), true, false);
+            CAROM::Vector X_carom(X.GetData(), X.Size(), true, false);
+            CAROM::Vector *reducedRHS = spatialbasis->transposeMult(&B_carom);
+            CAROM::Vector reducedSol(numColumnRB, false);
+            assembleTimer.Stop();
+
+            // 23. solve ROM
+            solveTimer.Start();
+            invReducedA.mult(*reducedRHS, reducedSol);
+            solveTimer.Stop();
+
+            // 24. reconstruct FOM state
+            spatialbasis->mult(reducedSol, X_carom);
+            delete spatialbasis;
+            delete reducedRHS;
+        }
+
+        // 25. Recover the parallel grid function corresponding to X. This is the
+        //     local finite element solution on each processor.
+        a->RecoverFEMSolution(X, *b, x);
+
+        // 26. Save the refined mesh and the solution in parallel. This output can
+        //     be viewed later using GLVis: "glvis -np <np> -m Mesh -g Sol".
+        if (fom || offline)
+        {
+            ostringstream mesh_name, sol_name;
+            mesh_name << "Mesh" << curr_basis_identifier << myid;
+            sol_name << "Sol" << curr_basis_identifier << myid;
+
+            ofstream mesh_ofs(mesh_name.str().c_str());
+            mesh_ofs.precision(precision);
+            pmesh->Print(mesh_ofs);
+
+            ofstream sol_ofs(sol_name.str().c_str());
+            if (myid == 0) cout << "Saving solution to: " << sol_name.str() << endl;
+
+            sol_ofs.precision(precision);
+            for (int i = 0; i < X.Size(); ++i)
+            {
+                sol_ofs << X[i] << endl;
+            }
+        }
+
+        // 27. Save data in the VisIt format.
+        DataCollection *dc = NULL;
+        if (visit)
+        {
+            if (offline) dc = new VisItDataCollection("maxwell_local", pmesh);
+            else if (fom) dc = new VisItDataCollection("maxwell_local_fom", pmesh);
+            else if (online) dc = new VisItDataCollection("maxwell_local_rom", pmesh);
+            dc->SetPrecision(precision);
+            dc->RegisterField("solution", &x);
+            dc->Save();
+            delete dc;
+        }
+
+        // 28. Send the solution by socket to a GLVis server.
+        if (visualization)
+        {
+            char vishost[] = "localhost";
+            int  visport   = 19916;
+            socketstream sol_sock(vishost, visport);
+            sol_sock << "parallel " << num_procs << " " << myid << "\n";
+            sol_sock.precision(8);
+            sol_sock << "solution\n" << *pmesh << x << flush;
+        }
+
+        double curr_error = 0;
+
+        // 29a. Calculate the relative error as commanded by the greedy algorithm.
+        if (calc_rel_error)
+        {
+            Vector true_solution(X.Size());
+            ifstream solution_file;
+            std::string solution_filename = "Sol" + curr_basis_identifier + to_string(myid);
+            if (myid == 0) cout << "Comparing current run to solution at: "
+                                    << solution_filename << endl;
+            solution_file.open(solution_filename);
+            true_solution.Load(solution_file, X.Size());
+            solution_file.close();
+            Vector residual(X.Size());
+            subtract(X, true_solution, residual);
+
+            const double abs_error = sqrt(InnerProduct(MPI_COMM_WORLD, residual, residual));
+            const double sol_norm = sqrt(InnerProduct(MPI_COMM_WORLD, true_solution,
+                                         true_solution));
+
+            curr_error = abs_error / sol_norm;
+            if (myid == 0) cout << "The relative error is: " << curr_error << endl;
+
+            greedy_sampler->setPointRelativeError(curr_error);
+        }
+        // 29b. Or calculate the error indicator as commanded by the greedy algorithm.
+        else if (calc_err_indicator)
+        {
+            Vector AX(X.Size());
+//            A.Mult(X, AX);
+            A.As<HypreParMatrix>()->Mult(X, AX);
+            Vector residual(X.Size());
+            subtract(B, AX, residual);
+
+            curr_error = sqrt(InnerProduct(MPI_COMM_WORLD, residual, residual));
+            if (myid == 0) cout << "The error indicator is: "
+                                    << curr_error << endl;
+            greedy_sampler->setPointErrorIndicator(curr_error, 1);
+        }
+        // 29c. Or if not using the greedy algorithm, output the relative error
+        //     the error indicator of the point. (just for extra information)
+        else if (!build_database && online)
+        {
+            Vector true_solution(X.Size());
+            ifstream solution_file;
+            std::string solution_filename = "Sol" + curr_basis_identifier + to_string(myid);
+            if (myid == 0) cout << "Comparing current run to solution at: "
+                                    << solution_filename << endl;
+            solution_file.open(solution_filename);
+            true_solution.Load(solution_file, X.Size());
+            solution_file.close();
+            Vector residual(X.Size());
+            subtract(X, true_solution, residual);
+
+            const double abs_error = sqrt(InnerProduct(MPI_COMM_WORLD, residual, residual));
+            const double sol_norm = sqrt(InnerProduct(MPI_COMM_WORLD, true_solution,
+                                         true_solution));
+
+            curr_error = abs_error / sol_norm;
+            if (myid == 0) cout << "The relative error is: " << curr_error << endl;
+
+            Vector AX(X.Size());
+//            A.Mult(X, AX);
+            A.As<HypreParMatrix>()->Mult(X, AX);
+            subtract(B, AX, residual);
+            curr_error = sqrt(InnerProduct(MPI_COMM_WORLD, residual, residual));
+            if (myid == 0) cout << "The error indicator is: "
+                                    << curr_error << endl;
+        }
+
+        // 30. If calculating the relative error, or after we sampled our first point,
+        //     create a global ROM basis.
+        if (calc_rel_error || (offline && basisIdentifiers.size() == 1))
+        {
+            mergeTimer.Start();
+            std::unique_ptr<CAROM::BasisGenerator> basis_generator;
+            options = new CAROM::Options(fespace->GetTrueVSize(), max_num_snapshots, 1,
+                                         update_right_SV);
+            generator = new CAROM::BasisGenerator(*options, isIncremental, loadBasisName);
+            for (int i = 0; i < basisIdentifiers.size(); ++i)
+            {
+                std::string snapshot_filename = basisIdentifiers[i] + "_snapshot";
+                generator->loadSamples(snapshot_filename,"snapshot");
+            }
+            generator->endSamples(); // save the merged basis file
+            mergeTimer.Stop();
+            if (myid == 0)
+            {
+                printf("Elapsed time for merging and building ROM basis: %e second\n",
+                       mergeTimer.RealTime());
+            }
+            delete generator;
+            delete options;
+        }
+
+        // 31. print timing info
+        if (myid == 0)
+        {
+            if(fom || offline)
+            {
+                printf("Elapsed time for assembling FOM: %e second\n",
+                       assembleTimer.RealTime());
+                printf("Elapsed time for solving FOM: %e second\n", solveTimer.RealTime());
+            }
+            if(online)
+            {
+                printf("Elapsed time for assembling ROM: %e second\n",
+                       assembleTimer.RealTime());
+                printf("Elapsed time for solving ROM: %e second\n", solveTimer.RealTime());
+            }
+        }
+
+        // 32. Free the used memory.
+        delete a;
+        delete sigma;
+        delete muinv;
+        delete b;
+        delete fespace;
+        delete fec;
+        delete pmesh;
+
+    } while(build_database);
+    MPI_Finalize();
+
+    return 0;
+}
+
+// 33. define Exact E
+void E_exact(const Vector &x, Vector &E)
+{
+    if (dim == 3)
+    {
+        E(0) = sin(kappa * x(1));
+        E(1) = sin(kappa * x(2));
+        E(2) = sin(kappa * x(0));
+    }
+    else
+    {
+        E(0) = sin(kappa * x(1));
+        E(1) = sin(kappa * x(0));
+        if (x.Size() == 3) {
+            E(2) = 0.0;
+        }
+    }
+}
+
+// 34. define spatially varying righthand side function
+void f_exact(const Vector &x, Vector &f)
+{
+    if (dim == 3)
+    {
+        f(0) = (1. + kappa * kappa) * sin(kappa * x(1));
+        f(1) = (1. + kappa * kappa) * sin(kappa * x(2));
+        f(2) = (1. + kappa * kappa) * sin(kappa * x(0));
+    }
+    else
+    {
+        f(0) = (1. + kappa * kappa) * sin(kappa * x(1));
+        f(1) = (1. + kappa * kappa) * sin(kappa * x(0));
+        if (x.Size() == 3) {
+            f(2) = 0.0;
+        }
+    }
+}
+
diff --git a/examples/prom/maxwell_local_rom_greedy.hpp b/examples/prom/maxwell_local_rom_greedy.hpp
new file mode 100644
index 000000000..e25915f21
--- /dev/null
+++ b/examples/prom/maxwell_local_rom_greedy.hpp
@@ -0,0 +1,13 @@
+//
+//  maxwell_local_rom_greedy.hpp
+//  
+//
+//  Created by Anderson, William Michael on 11/15/23.
+//
+
+#ifndef maxwell_local_rom_greedy_hpp
+#define maxwell_local_rom_greedy_hpp
+
+#include <stdio.h>
+
+#endif /* maxwell_local_rom_greedy_hpp */

From b36cf287daa511efae692732f688d678123845a5 Mon Sep 17 00:00:00 2001
From: Anderson <anderson316@llnl.gov>
Date: Wed, 15 Nov 2023 14:06:09 -0800
Subject: [PATCH 02/13] Removed hpp file

---
 examples/prom/maxwell_local_rom_greedy.hpp | 13 -------------
 1 file changed, 13 deletions(-)
 delete mode 100644 examples/prom/maxwell_local_rom_greedy.hpp

diff --git a/examples/prom/maxwell_local_rom_greedy.hpp b/examples/prom/maxwell_local_rom_greedy.hpp
deleted file mode 100644
index e25915f21..000000000
--- a/examples/prom/maxwell_local_rom_greedy.hpp
+++ /dev/null
@@ -1,13 +0,0 @@
-//
-//  maxwell_local_rom_greedy.hpp
-//  
-//
-//  Created by Anderson, William Michael on 11/15/23.
-//
-
-#ifndef maxwell_local_rom_greedy_hpp
-#define maxwell_local_rom_greedy_hpp
-
-#include <stdio.h>
-
-#endif /* maxwell_local_rom_greedy_hpp */

From dc403970bce2377b4d45886c39077c451391508c Mon Sep 17 00:00:00 2001
From: Anderson <anderson316@llnl.gov>
Date: Tue, 12 Dec 2023 12:37:43 -0800
Subject: [PATCH 03/13] Removes extra variables/lines

---
 examples/prom/maxwell_local_rom_greedy.cpp | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/examples/prom/maxwell_local_rom_greedy.cpp b/examples/prom/maxwell_local_rom_greedy.cpp
index c45119607..d1f350d02 100644
--- a/examples/prom/maxwell_local_rom_greedy.cpp
+++ b/examples/prom/maxwell_local_rom_greedy.cpp
@@ -76,12 +76,8 @@ int main(int argc, char *argv[])
     bool visit = false;
     bool fom = false;
     bool offline = false;
-    bool merge = false;
     bool online = false;
     int precision = 8;
-//    int id = 0;
-//    int nsets = 0;
-    double coef = 1.0;
     bool build_database = false;
     bool use_database = false;
     double greedy_param_space_min = 1.0;
@@ -125,10 +121,6 @@ int main(int argc, char *argv[])
                    "Enable or disable the offline phase.");
     args.AddOption(&online, "-online", "--online", "-no-online", "--no-online",
                    "Enable or disable the online phase.");
-    args.AddOption(&merge, "-merge", "--merge", "-no-merge", "--no-merge",
-                   "Enable or disable the merge phase.");
-//    args.AddOption(&id, "-id", "--id", "Parametric id");
-//    args.AddOption(&nsets, "-ns", "--nset", "Number of parametric snapshot sets");
     args.AddOption(&build_database, "-build_database", "--build_database",
                    "-no-build_database", "--no-build_database",
                    "Enable or disable the build_database phase of the greedy algorithm.");
@@ -165,8 +157,7 @@ int main(int argc, char *argv[])
 
     if (fom)
     {
-        MFEM_VERIFY(fom && !offline && !online
-                    && !merge, "everything must be turned off if fom is used.");
+        MFEM_VERIFY(fom && !offline && !online, "everything must be turned off if fom is used.");
     }
 
     CAROM::GreedySampler* greedy_sampler = NULL;
@@ -372,8 +363,6 @@ int main(int argc, char *argv[])
             loadBasisName += "_greedy";
         }
 
-//    const std::string basisName = "basis";
-//    const std::string basisFileName = basisName + std::to_string(id);
         const CAROM::Matrix* spatialbasis;
         CAROM::Options* options;
         CAROM::BasisGenerator *generator;
@@ -599,7 +588,6 @@ int main(int argc, char *argv[])
         else if (calc_err_indicator)
         {
             Vector AX(X.Size());
-//            A.Mult(X, AX);
             A.As<HypreParMatrix>()->Mult(X, AX);
             Vector residual(X.Size());
             subtract(B, AX, residual);
@@ -632,7 +620,6 @@ int main(int argc, char *argv[])
             if (myid == 0) cout << "The relative error is: " << curr_error << endl;
 
             Vector AX(X.Size());
-//            A.Mult(X, AX);
             A.As<HypreParMatrix>()->Mult(X, AX);
             subtract(B, AX, residual);
             curr_error = sqrt(InnerProduct(MPI_COMM_WORLD, residual, residual));

From 8dbaabf397dd8f8abdd2832001f0fcdbcf375a1d Mon Sep 17 00:00:00 2001
From: Anderson <anderson316@llnl.gov>
Date: Tue, 12 Dec 2023 12:41:34 -0800
Subject: [PATCH 04/13] Astyle

---
 examples/prom/maxwell_local_rom_greedy.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/prom/maxwell_local_rom_greedy.cpp b/examples/prom/maxwell_local_rom_greedy.cpp
index d1f350d02..e828ceba9 100644
--- a/examples/prom/maxwell_local_rom_greedy.cpp
+++ b/examples/prom/maxwell_local_rom_greedy.cpp
@@ -157,7 +157,8 @@ int main(int argc, char *argv[])
 
     if (fom)
     {
-        MFEM_VERIFY(fom && !offline && !online, "everything must be turned off if fom is used.");
+        MFEM_VERIFY(fom && !offline
+                    && !online, "everything must be turned off if fom is used.");
     }
 
     CAROM::GreedySampler* greedy_sampler = NULL;

From ee7296a4bc034ad8118a4a9fb70011603a33f87c Mon Sep 17 00:00:00 2001
From: Anderson <anderson316@llnl.gov>
Date: Tue, 2 Jan 2024 08:35:00 -0800
Subject: [PATCH 05/13] Updated description of 'use_database' to 'FOM'

---
 .../dmd/parametric_dmdc_heat_conduction.cpp   | 999 ++++++++++++++++++
 examples/prom/maxwell_local_rom_greedy.cpp    |   4 +-
 2 files changed, 1001 insertions(+), 2 deletions(-)
 create mode 100644 examples/dmd/parametric_dmdc_heat_conduction.cpp

diff --git a/examples/dmd/parametric_dmdc_heat_conduction.cpp b/examples/dmd/parametric_dmdc_heat_conduction.cpp
new file mode 100644
index 000000000..788e14161
--- /dev/null
+++ b/examples/dmd/parametric_dmdc_heat_conduction.cpp
@@ -0,0 +1,999 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2013-2023, Lawrence Livermore National Security, LLC
+ * and other libROM project developers. See the top-level COPYRIGHT
+ * file for details.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ *****************************************************************************/
+
+//      libROM MFEM Example: DMDc for Heat_Conduction (adapted from ex16p.cpp)
+//
+// Compile with: make heat_conduction_dmdc
+//
+// =================================================================================
+//
+// Sample runs and results for DMDc:
+//
+// Command 1:
+//   mpirun -np 8 heat_conduction_dmdc -s 1 -a 0.0 -k 1.0 -visit
+//
+// Output 1:
+//   Relative error of DMDc temperature (u) at t_final: 0.5 is 0.0021705658
+//
+// Command 2:
+//   mpirun -np 8 heat_conduction_dmdc -s 1 -a 0.5 -k 0.5 -o 4 -tf 0.7 -vs 1 -visit
+//
+// Output 2:
+//   Relative error of DMDc temperature (u) at t_final: 0.7 is 0.00099736216
+//
+// =================================================================================
+//
+// Description:  This example solves a time dependent nonlinear heat equation
+//               problem of the form du/dt = C(u) + s, with a non-linear diffusion
+//               operator C(u) = \nabla \cdot (\kappa + \alpha u) \nabla u
+//               and time-varying external inlet and outlet source.
+//               The inlet and the outlet is located at (0,0) and (0.5,0.5)
+//               in the reference domain [-1,1]^d, where the shut down time and
+//               the amplitude of the sources are the control variables.
+//
+//               The example demonstrates the use of nonlinear operators (the
+//               class ConductionOperator defining C(u)), as well as their
+//               implicit time integration. Note that implementing the method
+//               ConductionOperator::ImplicitSolve is the only requirement for
+//               high-order implicit (SDIRK) time integration. Optional saving
+//               with ADIOS2 (adios2.readthedocs.io) is also illustrated.
+
+#include "mfem.hpp"
+#include "algo/DMDc.h"
+//#include "algo/DMD.h"
+#include "linalg/Vector.h"
+#include <cmath>
+#include <fstream>
+#include <iostream>
+
+#include "utils/CSVDatabase.h"
+#include "utils/HDFDatabase.h"
+
+#ifndef _WIN32
+#include <sys/stat.h>  // mkdir
+#else
+#include <direct.h>    // _mkdir
+#define mkdir(dir, mode) _mkdir(dir)
+#endif
+
+using namespace std;
+using namespace mfem;
+
+/** After spatial discretization, the conduction model can be written as:
+ *
+ *     du/dt = M^{-1}(-Ku)
+ *
+ *  where u is the vector representing the temperature, M is the mass matrix,
+ *  and K is the diffusion operator with diffusivity depending on u:
+ *  (\kappa + \alpha u).
+ *
+ *  Class ConductionOperator represents the right-hand side of the above ODE.
+ */
+class ConductionOperator : public TimeDependentOperator
+{
+protected:
+    ParFiniteElementSpace &fespace;
+    Array<int> ess_tdof_list; // this list remains empty for pure Neumann b.c.
+
+    ParBilinearForm *M;
+    ParBilinearForm *K;
+    ParLinearForm *B;
+
+    HypreParMatrix Mmat;
+    HypreParMatrix Kmat;
+    HypreParMatrix *T; // T = M + dt K
+    double current_dt;
+
+    CGSolver M_solver;    // Krylov solver for inverting the mass matrix M
+    HypreSmoother M_prec; // Preconditioner for the mass matrix M
+
+    CGSolver T_solver;    // Implicit solver for T = M + dt K
+    HypreSmoother T_prec; // Preconditioner for the implicit solver
+
+    FunctionCoefficient &src_func; // Source function coefficient
+    double alpha, kappa; // Nonlinear parameters
+
+    mutable Vector z; // auxiliary vector
+    HypreParVector *b; // source vector
+
+public:
+    ConductionOperator(ParFiniteElementSpace &f, FunctionCoefficient &s,
+                       double alpha, double kappa, const Vector &u);
+
+    virtual void Mult(const Vector &u, Vector &du_dt) const;
+    /** Solve the Backward-Euler equation: k = f(u + dt*k, t), for the unknown k.
+        This is the only requirement for high-order SDIRK implicit integration.*/
+    virtual void ImplicitSolve(const double dt, const Vector &u, Vector &k);
+
+    /// Update the diffusion BilinearForm K using the given true-dof vector `u`.
+    void SetSourceTime(const double t);
+
+    /// Update the diffusion BilinearForm K using the given true-dof vector `u`.
+    void SetParameters(const Vector &u);
+
+    virtual ~ConductionOperator();
+};
+
+double InitialTemperature(const Vector &x);
+double TimeWindowFunction(const double t, const double t_begin,
+                          const double t_end);
+double Amplitude(const double t, const int index);
+double SourceFunction(const Vector &x, double t);
+
+Vector bb_min, bb_max; // Mesh bounding box
+double amp_in = 0.2;
+double t_end_in = 0.1;
+double amp_out = 0.1;
+double t_end_out = 0.3;
+double dt = 1.0e-2;
+
+int main(int argc, char *argv[])
+{
+    // 1. Initialize MPI.
+    int num_procs, myid;
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
+
+    // 2. Parse command-line options.
+    const char *mesh_file = "";
+    int ser_ref_levels = 2;
+    int par_ref_levels = 1;
+    int order = 2;
+    int ode_solver_type = 1;
+    double t_final = 0.5;
+    double alpha = 1.0e-2;
+    double kappa = 0.5;
+    double ef = 0.9999;
+    int rdim = -1;
+    bool visualization = true;
+    bool visit = false;
+    int vis_steps = 5;
+    bool adios2 = false;
+    bool offline = false;
+    bool online = false;
+    
+    double closest_rbf_val = 0.9;
+    bool predict = false;
+    bool save_dofs = false;
+    bool csvFormat = true;
+    const char *basename = "";
+
+    int precision = 8;
+    cout.precision(precision);
+
+    OptionsParser args(argc, argv);
+    args.AddOption(&mesh_file, "-m", "--mesh",
+                   "Mesh file to use.");
+    args.AddOption(&ser_ref_levels, "-rs", "--refine-serial",
+                   "Number of times to refine the mesh uniformly in serial.");
+    args.AddOption(&par_ref_levels, "-rp", "--refine-parallel",
+                   "Number of times to refine the mesh uniformly in parallel.");
+    args.AddOption(&order, "-o", "--order",
+                   "Order (degree) of the finite elements.");
+    args.AddOption(&ode_solver_type, "-s", "--ode-solver",
+                   "ODE solver: 1 - Backward Euler, 2 - SDIRK2, 3 - SDIRK3,\n\t"
+                   "\t   11 - Forward Euler, 12 - RK2, 13 - RK3 SSP, 14 - RK4.");
+    args.AddOption(&t_final, "-tf", "--t-final",
+                   "Final time; start time is 0.");
+    args.AddOption(&dt, "-dt", "--time-step",
+                   "Time step.");
+    args.AddOption(&alpha, "-a", "--alpha",
+                   "Alpha coefficient.");
+    args.AddOption(&kappa, "-k", "--kappa",
+                   "Kappa coefficient offset.");
+    args.AddOption(&amp_in, "-amp-in", "--amplitude-in",
+                   "Amplitude of inlet source at (0,0).");
+    args.AddOption(&amp_out, "-amp-out", "--amplitude-out",
+                   "Amplitude of outlet source at (0.5,0.5).");
+    args.AddOption(&t_end_in, "-t-end-in", "--t-end-in",
+                   "End time of inlet source at (0,0).");
+    args.AddOption(&t_end_out, "-t-end-out", "--t-end-out",
+                   "End time of outlet source at (0.5,0.5).");
+    args.AddOption(&visualization, "-vis", "--visualization", "-no-vis",
+                   "--no-visualization",
+                   "Enable or disable GLVis visualization.");
+    args.AddOption(&visit, "-visit", "--visit-datafiles", "-no-visit",
+                   "--no-visit-datafiles",
+                   "Save data files for VisIt (visit.llnl.gov) visualization.");
+    args.AddOption(&vis_steps, "-vs", "--visualization-steps",
+                   "Visualize every n-th timestep.");
+    args.AddOption(&adios2, "-adios2", "--adios2-streams", "-no-adios2",
+                   "--no-adios2-streams",
+                   "Save data using adios2 streams.");
+    args.AddOption(&ef, "-ef", "--energy_fraction",
+                   "Energy fraction for DMDc.");
+    args.AddOption(&rdim, "-rdim", "--rdim",
+                   "Reduced dimension for DMDc.");
+    
+    args.AddOption(&offline, "-offline", "--offline", "-no-offline", "--no-offline",
+                   "Enable or disable the offline phase.");
+    args.AddOption(&online, "-online", "--online", "-no-online", "--no-online",
+                   "Enable or disable the online phase.");
+    args.AddOption(&closest_rbf_val, "-crv", "--crv",
+                   "DMD Closest RBF Value.");
+    args.AddOption(&predict, "-predict", "--predict", "-no-predict", "--no-predict",
+                   "Enable or disable DMD prediction.");
+    args.AddOption(&save_dofs, "-save", "--save", "-no-save", "--no-save",
+                   "Enable or disable MFEM DOF solution snapshot files).");
+    args.AddOption(&csvFormat, "-csv", "--csv", "-hdf", "--hdf",
+                   "Use CSV or HDF format for files output by -save option.");
+    args.AddOption(&basename, "-out", "--outputfile-name",
+                   "Name of the sub-folder to dump files within the run directory.");
+    
+    args.Parse();
+    if (!args.Good())
+    {
+        args.PrintUsage(cout);
+        MPI_Finalize();
+        return 1;
+    }
+
+    if (myid == 0)
+    {
+        args.PrintOptions(cout);
+    }
+    
+    string outputPath = ".";
+    if (save_dofs)
+    {
+        outputPath = "run";
+        if (string(basename) != "") {
+            outputPath += "/" + string(basename);
+        }
+        if (myid == 0)
+        {
+            const char path_delim = '/';
+            string::size_type pos = 0;
+            do {
+                pos = outputPath.find(path_delim, pos+1);
+                string subdir = outputPath.substr(0, pos);
+                mkdir(subdir.c_str(), 0777);
+            }
+            while (pos != string::npos);
+        }
+    }
+
+    // 3. Read the serial mesh from the given mesh file on all processors. We can
+    //    handle triangular, quadrilateral, tetrahedral and hexahedral meshes
+    //    with the same code.
+    Mesh *mesh;
+    if (mesh_file == "")
+    {
+        mesh = new Mesh(Mesh::MakeCartesian2D(2, 2, Element::QUADRILATERAL));
+    }
+    else
+    {
+        mesh = new Mesh(mesh_file, 1, 1);
+    }
+    int dim = mesh->Dimension();
+
+    mesh->GetBoundingBox(bb_min, bb_max, max(order, 1));
+
+    // 4. Define the ODE solver used for time integration. Several implicit
+    //    singly diagonal implicit Runge-Kutta (SDIRK) methods, as well as
+    //    explicit Runge-Kutta methods are available.
+    ODESolver *ode_solver;
+    bool explicit_solver;
+    switch (ode_solver_type)
+    {
+    // Implicit L-stable methods
+    case 1:
+        ode_solver = new BackwardEulerSolver;
+        explicit_solver = false;
+        break;
+    case 2:
+        ode_solver = new SDIRK23Solver(2);
+        explicit_solver = false;
+        break;
+    case 3:
+        ode_solver = new SDIRK33Solver;
+        explicit_solver = false;
+        break;
+    // Explicit methods
+    case 11:
+        ode_solver = new ForwardEulerSolver;
+        explicit_solver = true;
+        break;
+    case 12:
+        ode_solver = new RK2Solver(0.5);
+        explicit_solver = true;
+        break; // midpoint method
+    case 13:
+        ode_solver = new RK3SSPSolver;
+        explicit_solver = true;
+        break;
+    case 14:
+        ode_solver = new RK4Solver;
+        explicit_solver = true;
+        break;
+    case 15:
+        ode_solver = new GeneralizedAlphaSolver(0.5);
+        explicit_solver = true;
+        break;
+    // Implicit A-stable methods (not L-stable)
+    case 22:
+        ode_solver = new ImplicitMidpointSolver;
+        explicit_solver = false;
+        break;
+    case 23:
+        ode_solver = new SDIRK23Solver;
+        explicit_solver = false;
+        break;
+    case 24:
+        ode_solver = new SDIRK34Solver;
+        explicit_solver = false;
+        break;
+    default:
+        cout << "Unknown ODE solver type: " << ode_solver_type << '\n';
+        delete mesh;
+        return 3;
+    }
+
+    // 5. Refine the mesh in serial to increase the resolution. In this example
+    //    we do 'ser_ref_levels' of uniform refinement, where 'ser_ref_levels' is
+    //    a command-line parameter.
+    for (int lev = 0; lev < ser_ref_levels; lev++)
+    {
+        mesh->UniformRefinement();
+    }
+
+    // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine
+    //    this mesh further in parallel to increase the resolution. Once the
+    //    parallel mesh is defined, the serial mesh can be deleted.
+    ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh);
+    delete mesh;
+    for (int lev = 0; lev < par_ref_levels; lev++)
+    {
+        pmesh->UniformRefinement();
+    }
+
+    // 7. Define the vector finite element space representing the current and the
+    //    initial temperature, u_ref.
+    H1_FECollection fe_coll(order, dim);
+    ParFiniteElementSpace fespace(pmesh, &fe_coll);
+
+    int fe_size = fespace.GlobalTrueVSize();
+    if (myid == 0)
+    {
+        cout << "Number of temperature unknowns: " << fe_size << endl;
+    }
+
+    ParGridFunction u_gf(&fespace);
+
+    // 8. Set the initial conditions for u. All boundaries are considered
+    //    natural.
+    FunctionCoefficient u_0(InitialTemperature);
+    u_gf.ProjectCoefficient(u_0);
+    Vector u;
+    u_gf.GetTrueDofs(u);
+
+    // 9. Initialize the conduction operator and the VisIt visualization.
+    FunctionCoefficient s(SourceFunction);
+    ConductionOperator oper(fespace, s, alpha, kappa, u);
+
+    u_gf.SetFromTrueDofs(u);
+    {
+        ostringstream mesh_name, sol_name;
+        mesh_name << outputPath << "/parametric_dmdc_heat_conduction_" << to_string(
+                      amp_in) << "_" << to_string(amp_out) << "-mesh." << setfill('0') << setw(6) << myid;
+        sol_name << outputPath << "/parametric_dmdc_heat_conduction_" << to_string(
+                     amp_in) << "_" << to_string(amp_out) <<  "-init." << setfill('0') << setw(6) << myid;
+        ofstream omesh(mesh_name.str().c_str());
+        omesh.precision(precision);
+        pmesh->Print(omesh);
+        ofstream osol(sol_name.str().c_str());
+        osol.precision(precision);
+        u_gf.Save(osol);
+    }
+//    {
+//        ostringstream mesh_name, sol_name;
+//        mesh_name << "heat_conduction_dmdc-mesh." << setfill('0') << setw(6) << myid;
+//        sol_name << "heat_conduction_dmdc-init." << setfill('0') << setw(6) << myid;
+//        ofstream omesh(mesh_name.str().c_str());
+//        omesh.precision(precision);
+//        pmesh->Print(omesh);
+//        ofstream osol(sol_name.str().c_str());
+//        osol.precision(precision);
+//        u_gf.Save(osol);
+//    }
+    
+
+//    VisItDataCollection visit_dc("Heat_Conduction", pmesh);
+    VisItDataCollection visit_dc(outputPath + "/parametric_dmdc_Heat_Conduction_" +
+                                 to_string(amp_in) + "_" + to_string(amp_out), pmesh);
+    visit_dc.RegisterField("temperature", &u_gf);
+    if (visit)
+    {
+        visit_dc.SetCycle(0);
+        visit_dc.SetTime(0.0);
+        visit_dc.Save();
+    }
+
+    // Optionally output a BP (binary pack) file using ADIOS2. This can be
+    // visualized with the ParaView VTX reader.
+#ifdef MFEM_USE_ADIOS2
+    ADIOS2DataCollection* adios2_dc = NULL;
+    if (adios2)
+    {
+        std::string postfix(mesh_file);
+        postfix.erase(0, std::string("../data/").size() );
+        postfix += "_o" + std::to_string(order);
+        postfix += "_solver" + std::to_string(ode_solver_type);
+        const std::string collection_name = "heat_conduction_dmdc-p-" + postfix + ".bp";
+
+        adios2_dc = new ADIOS2DataCollection(MPI_COMM_WORLD, collection_name, pmesh);
+        adios2_dc->SetParameter("SubStreams", std::to_string(num_procs/2) );
+        adios2_dc->RegisterField("temperature", &u_gf);
+        adios2_dc->SetCycle(0);
+        adios2_dc->SetTime(0.0);
+        adios2_dc->Save();
+    }
+#endif
+
+    socketstream sout;
+    if (visualization)
+    {
+        char vishost[] = "localhost";
+        int  visport   = 19916;
+        sout.open(vishost, visport);
+        sout << "parallel " << num_procs << " " << myid << endl;
+        int good = sout.good(), all_good;
+        MPI_Allreduce(&good, &all_good, 1, MPI_INT, MPI_MIN, pmesh->GetComm());
+        if (!all_good)
+        {
+            sout.close();
+            visualization = false;
+            if (myid == 0)
+            {
+                cout << "Unable to connect to GLVis server at "
+                     << vishost << ':' << visport << endl;
+                cout << "GLVis visualization disabled.\n";
+            }
+        }
+        else
+        {
+            sout.precision(precision);
+            sout << "solution\n" << *pmesh << u_gf;
+            sout << flush;
+        }
+    }
+
+    StopWatch fom_timer, dmd_training_timer, dmd_prediction_timer;
+
+    fom_timer.Start();
+
+    // 10. Perform time-integration (looping over the time iterations, ti, with a
+    //     time-step dt).
+    ode_solver->Init(oper);
+    double t = 0.0;
+    vector<double> ts;
+    double f[2];
+    
+    CAROM::Vector* init = NULL;
+
+    CAROM::Database *db = NULL;
+    if (csvFormat)
+        db = new CAROM::CSVDatabase();
+    else
+        db = new CAROM::HDFDatabase();
+
+    vector<int> snap_list;
+
+    fom_timer.Stop();
+
+//    dmd_training_timer.Start();
+    
+    CAROM::DMDc* dmd_u = NULL;
+    
+    if (offline)
+    {
+        dmd_training_timer.Start();
+        
+        // 11. Create DMDc object and take initial sample.
+        u_gf.SetFromTrueDofs(u);
+        f[0] = Amplitude(t, 0);
+        f[1] = Amplitude(t, 1);
+        
+        std::cout << "t =  " << t << std::endl;
+        
+//        CAROM::DMDc* dmd_u; //redefined this on 494
+        dmd_u = new CAROM::DMDc(u.Size(), 2, dt);
+        dmd_u->takeSample(u.GetData(), t, f, false);
+//        ts.push_back(t);
+        
+        dmd_training_timer.Stop();
+    }
+    
+    if (online)
+    {
+        u_gf.SetFromTrueDofs(u);
+        init = new CAROM::Vector(u.GetData(), u.Size(), true);
+    }
+    
+    if (save_dofs && myid == 0)
+    {
+        if (csvFormat)
+        {
+            mkdir((outputPath + "/step0").c_str(), 0777);
+            db->putDoubleArray(outputPath + "/step0/sol.csv", u.GetData(), u.Size());
+        }
+        else
+        {
+            db->create(outputPath + "/dmd_0.hdf");
+            db->putDoubleArray("step0sol", u.GetData(), u.Size());
+        }
+    }
+    
+    ts.push_back(t);
+    snap_list.push_back(0);
+    
+    bool last_step = false;
+    for (int ti = 1; !last_step; ti++)
+    {
+        fom_timer.Start();
+
+        if (t + dt >= t_final - dt/2)
+        {
+            last_step = true;
+        }
+
+        // Assuming Euler method is used
+        if (explicit_solver)
+        {
+            oper.SetSourceTime(t);
+        }
+        else
+        {
+            oper.SetSourceTime(t + dt);
+        }
+        ode_solver->Step(u, t, dt);
+
+        fom_timer.Stop();
+        
+        if (offline)
+        {
+            dmd_training_timer.Start();
+            
+            u_gf.SetFromTrueDofs(u);
+            f[0] = Amplitude(t, 0);
+            f[1] = Amplitude(t, 1);
+            dmd_u->takeSample(u.GetData(), t, f, last_step);
+//            ts.push_back(t);
+            
+            if (myid == 0)
+            {
+                std::cout << "Taking snapshot at: " << t << std::endl;
+            }
+            
+            dmd_training_timer.Stop();
+        }
+        
+        if (save_dofs && myid == 0)
+        {
+            if (csvFormat)
+            {
+                mkdir((outputPath + "/step" + to_string(ti)).c_str(), 0777);
+                db->putDoubleArray(outputPath + "/step" + to_string(ti) + "/sol.csv",
+                                   u.GetData(), u.Size());
+            }
+            else
+                db->putDoubleArray("step" + to_string(ti) + "sol",
+                                   u.GetData(), u.Size());
+        }
+        
+        ts.push_back(t);
+        snap_list.push_back(ti);
+        
+        if (last_step || (ti % vis_steps) == 0)
+        {
+            if (myid == 0)
+            {
+                cout << "step " << ti << ", t = " << t << endl;
+            }
+
+            u_gf.SetFromTrueDofs(u);
+            if (visualization)
+            {
+                sout << "parallel " << num_procs << " " << myid << "\n";
+                sout << "solution\n" << *pmesh << u_gf << flush;
+            }
+
+            if (visit)
+            {
+                visit_dc.SetCycle(ti);
+                visit_dc.SetTime(t);
+                visit_dc.Save();
+            }
+
+#ifdef MFEM_USE_ADIOS2
+            if (adios2)
+            {
+                adios2_dc->SetCycle(ti);
+                adios2_dc->SetTime(t);
+                adios2_dc->Save();
+            }
+#endif
+        }
+        oper.SetParameters(u);
+    }
+
+#ifdef MFEM_USE_ADIOS2
+    if (adios2)
+    {
+        delete adios2_dc;
+    }
+#endif
+    
+    if (save_dofs && myid == 0)
+    {
+        if (csvFormat)
+        {
+            db->putDoubleVector(outputPath + "/tval.csv", ts, ts.size());
+            db->putInteger(outputPath + "/numsnap", snap_list.size());
+            db->putIntegerArray(outputPath + "/snap_list.csv", snap_list.data(),
+                                snap_list.size());
+        }
+        else
+        {
+            db->putDoubleVector("tval", ts, ts.size());
+            db->putInteger("numsnap", snap_list.size());
+            db->putInteger("snap_bound_size", 0);
+            db->putIntegerArray("snap_list", snap_list.data(),
+                                snap_list.size());
+        }
+    }
+
+    // 12. Save the final solution in parallel. This output can be viewed later
+    //     using GLVis: "glvis -np <np> -m heat_conduction_dmdc-mesh -g heat_conduction_dmdc-final".
+    {
+        ostringstream sol_name;
+        sol_name << outputPath << "parametric_dmdc_Heat_Conduction" << to_string(amp_in) << "_" << to_string(amp_out) << "-final." << setfill('0') << setw(6) << myid;
+        ofstream osol(sol_name.str().c_str());
+        osol.precision(precision);
+        u_gf.Save(osol);
+//        ostringstream sol_name;
+//        sol_name << "heat_conduction_dmdc-final." << setfill('0') << setw(6) << myid;
+//        ofstream osol(sol_name.str().c_str());
+//        osol.precision(precision);
+//        u_gf.Save(osol);
+    }
+
+    // 13. Calculate the DMDc modes.
+    // pretty sure it is easier to get rid of EF because each snapshot must have same rdim
+    if (offline || online)
+    {
+        if (offline)
+        {
+            if (myid == 0 && rdim != -1 && ef != -1)
+            {
+                std::cout << "Both rdim and ef are set. ef will be ignored." << std::endl;
+            }
+            
+            dmd_training_timer.Start();
+            
+            if (rdim != -1)
+            {
+                if (myid == 0)
+                {
+                    std::cout << "Creating DMDc with rdim: " << rdim << std::endl;
+                }
+
+                dmd_u->train(rdim);
+
+            }
+            else if (ef != -1)
+            {
+                if (myid == 0)
+                {
+                    std::cout << "Creating DMDc with energy fraction: " << ef << std::endl;
+                }
+                dmd_u->train(ef);
+            }
+            
+            dmd_training_timer.Stop();
+            
+            dmd_u->save(outputPath + "/" + to_string(amp_in) + "_" + to_string(
+                                                                               amp_out));
+            
+            if (myid == 0)
+            {
+                std::ofstream fout;
+                fout.open("parameters.txt", std::ios::app);
+                fout << amp_in << " " << amp_out << std::endl;
+                fout.close();
+            }
+        }
+        
+//         probably will need to change a lot here
+        if (online)
+        {
+            if (myid == 0)
+            {
+                std::cout << "Creating DMD using the rdim of the offline phase" << std::endl;
+            }
+
+            std::fstream fin("parameters.txt", std::ios_base::in);
+            double curr_param;
+            std::vector<std::string> dmd_paths;
+            std::vector<CAROM::Vector*> param_vectors;
+
+            while (fin >> curr_param)
+            {
+                double curr_amp_in = curr_param;
+                fin >> curr_param;
+                double curr_amp_out = curr_param;
+
+                dmd_paths.push_back(outputPath + "/" + to_string(curr_amp_in) + "_" + to_string(curr_amp_out) );
+                CAROM::Vector* param_vector = new CAROM::Vector(2, false);
+                param_vector->item(0) = curr_amp_in;
+                param_vector->item(1) = curr_amp_out;
+                param_vectors.push_back(param_vector);
+            }
+            fin.close();
+
+            CAROM::Vector* desired_param = new CAROM::Vector(2, false);
+            desired_param->item(0) = amp_in;
+            desired_param->item(1) = amp_out;
+
+            dmd_training_timer.Start();
+
+            CAROM::getParametricDMD(dmd_u, param_vectors, dmd_paths, desired_param,
+                                    "G", "LS", closest_rbf_val);
+
+            dmd_u->project(init,param_vectors);
+
+            dmd_training_timer.Stop();
+            delete desired_param;
+        }
+        
+        if (predict)
+        {
+            Vector true_solution_u(u.Size());
+            true_solution_u = u.GetData();
+            
+            dmd_prediction_timer.Start();
+            
+            // 14. Predict the state at t_final using DMDc.
+            if (myid == 0)
+            {
+                std::cout << "Predicting temperature using DMDc" << std::endl;
+            }
+            
+            CAROM::Vector* result_u = dmd_u->predict(ts[0]);
+            Vector initial_dmd_solution_u(result_u->getData(), result_u->dim());
+            u_gf.SetFromTrueDofs(initial_dmd_solution_u);
+            
+            VisItDataCollection dmd_visit_dc("parametric_dmdc_Heat_Conduction", pmesh);
+            dmd_visit_dc.RegisterField("temperature", &u_gf);
+            
+            if (visit)
+            {
+                dmd_visit_dc.SetCycle(0);
+                dmd_visit_dc.SetTime(0.0);
+                dmd_visit_dc.Save();
+            }
+            
+            delete result_u;
+            
+            if (visit)
+            {
+                for (int i = 1; i < ts.size(); i++)
+                {
+                    if (i == ts.size() - 1 || (i % vis_steps) == 0)
+                    {
+                        result_u = dmd_u->predict(ts[i]);
+                        Vector dmd_solution_u(result_u->getData(), result_u->dim());
+                        u_gf.SetFromTrueDofs(dmd_solution_u);
+                        
+                        dmd_visit_dc.SetCycle(i);
+                        dmd_visit_dc.SetTime(ts[i]);
+                        dmd_visit_dc.Save();
+                        
+                        delete result_u;
+                    }
+                }
+            }
+            
+            dmd_prediction_timer.Stop();
+            
+            result_u = dmd_u->predict(t_final);
+            
+            // 15. Calculate the relative error between the DMDc final solution and the true solution.
+            Vector dmd_solution_u(result_u->getData(), result_u->dim());
+            Vector diff_u(true_solution_u.Size());
+            subtract(dmd_solution_u, true_solution_u, diff_u);
+            
+            double tot_diff_norm_u = sqrt(InnerProduct(MPI_COMM_WORLD, diff_u, diff_u));
+            double tot_true_solution_u_norm = sqrt(InnerProduct(MPI_COMM_WORLD,
+                                                                true_solution_u, true_solution_u));
+            
+            if (myid == 0)
+            {
+                
+                std::cout << "Relative error of DMDc temperature (u) at t_final: " << t_final <<
+                " is " << tot_diff_norm_u / tot_true_solution_u_norm << std::endl;
+                printf("Elapsed time for solving FOM: %e second\n", fom_timer.RealTime());
+                printf("Elapsed time for training DMDc: %e second\n",
+                       dmd_training_timer.RealTime());
+                printf("Elapsed time for predicting DMDc: %e second\n",
+                       dmd_prediction_timer.RealTime());
+            }
+            
+            delete result_u;
+            
+        }
+
+    }
+
+    // 16. Free the used memory.
+    delete ode_solver;
+    delete pmesh;
+//    delete result_u;
+    if (offline)
+    {
+        delete dmd_u;
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
+
+ConductionOperator::ConductionOperator(ParFiniteElementSpace &f,
+                                       FunctionCoefficient &s,
+                                       double al, double kap, const Vector &u)
+    : TimeDependentOperator(f.GetTrueVSize(), 0.0), fespace(f), src_func(s),
+      M(NULL), K(NULL), T(NULL), current_dt(0.0),
+      M_solver(f.GetComm()), T_solver(f.GetComm()), z(height)
+{
+    const double rel_tol = 1e-8;
+
+    M = new ParBilinearForm(&fespace);
+    M->AddDomainIntegrator(new MassIntegrator());
+    M->Assemble(0); // keep sparsity pattern of M and K the same
+    M->FormSystemMatrix(ess_tdof_list, Mmat);
+
+    M_solver.iterative_mode = false;
+    M_solver.SetRelTol(rel_tol);
+    M_solver.SetAbsTol(0.0);
+    M_solver.SetMaxIter(100);
+    M_solver.SetPrintLevel(0);
+    M_prec.SetType(HypreSmoother::Jacobi);
+    M_solver.SetPreconditioner(M_prec);
+    M_solver.SetOperator(Mmat);
+
+    alpha = al;
+    kappa = kap;
+
+    T_solver.iterative_mode = false;
+    T_solver.SetRelTol(rel_tol);
+    T_solver.SetAbsTol(0.0);
+    T_solver.SetMaxIter(100);
+    T_solver.SetPrintLevel(0);
+    T_solver.SetPreconditioner(T_prec);
+
+    SetParameters(u);
+
+    B = new ParLinearForm(&fespace);
+    B->AddDomainIntegrator(new DomainLFIntegrator(src_func));
+}
+
+void ConductionOperator::Mult(const Vector &u, Vector &du_dt) const
+{
+    // Compute:
+    //    du_dt = M^{-1}*-K(u)
+    // for du_dt
+    Kmat.Mult(u, z);
+    z.Neg(); // z = -z
+    z += *b;
+    M_solver.Mult(z, du_dt);
+}
+
+void ConductionOperator::ImplicitSolve(const double dt,
+                                       const Vector &u, Vector &du_dt)
+{
+    // Solve the equation:
+    //    du_dt = M^{-1}*[-K(u + dt*du_dt)]
+    // for du_dt
+    if (!T)
+    {
+        T = Add(1.0, Mmat, dt, Kmat);
+        current_dt = dt;
+        T_solver.SetOperator(*T);
+    }
+    MFEM_VERIFY(dt == current_dt, ""); // SDIRK methods use the same dt
+    Kmat.Mult(u, z);
+    z.Neg();
+    z += *b;
+    T_solver.Mult(z, du_dt);
+}
+
+void ConductionOperator::SetSourceTime(double t)
+{
+    src_func.SetTime(t);
+    B->Assemble();
+    b = B->ParallelAssemble();
+}
+
+void ConductionOperator::SetParameters(const Vector &u)
+{
+    ParGridFunction u_alpha_gf(&fespace);
+    u_alpha_gf.SetFromTrueDofs(u);
+    for (int i = 0; i < u_alpha_gf.Size(); i++)
+    {
+        u_alpha_gf(i) = kappa + alpha*u_alpha_gf(i);
+    }
+
+    delete K;
+    K = new ParBilinearForm(&fespace);
+
+    GridFunctionCoefficient u_coeff(&u_alpha_gf);
+
+    K->AddDomainIntegrator(new DiffusionIntegrator(u_coeff));
+    K->Assemble(0); // keep sparsity pattern of M and K the same
+    K->FormSystemMatrix(ess_tdof_list, Kmat);
+    delete T;
+    T = NULL; // re-compute T on the next ImplicitSolve
+}
+
+ConductionOperator::~ConductionOperator()
+{
+    delete T;
+    delete M;
+    delete K;
+    delete B;
+}
+
+double InitialTemperature(const Vector &x)
+{
+    if (x.Norml2() < 0.5)
+    {
+        return 2.0;
+    }
+    else
+    {
+        return 1.0;
+    }
+}
+
+double TimeWindowFunction(const double t, const double t_begin,
+                          const double t_end)
+{
+    return 0.5 * (tanh((t - t_begin) / (5*dt)) - tanh((t - t_end) / (5*dt)));
+}
+
+double Amplitude(const double t, const int index)
+{
+    if (index == 0)
+    {
+        return amp_in * TimeWindowFunction(t, 0.0, t_end_in);
+    }
+    else
+    {
+        return amp_out * TimeWindowFunction(t, 0.0, t_end_out);
+    }
+}
+
+double SourceFunction(const Vector &x, double t)
+{
+    // map to the reference [-1,1] domain
+    Vector X(x.Size()), Y(x.Size());
+    for (int i = 0; i < x.Size(); i++)
+    {
+        double center = (bb_min[i] + bb_max[i]) * 0.5;
+        X(i) = 2 * (x(i) - center) / (bb_max[i] - bb_min[i]);
+        Y(i) = X(i) - 0.5;
+    }
+
+    double r1 = X.Norml2() / 0.01;
+    double r2 = Y.Norml2() / 0.01;
+    return Amplitude(t,0) * exp(-0.5*r1*r1) - Amplitude(t,1) * exp(-0.5*r2*r2);
+}
diff --git a/examples/prom/maxwell_local_rom_greedy.cpp b/examples/prom/maxwell_local_rom_greedy.cpp
index e828ceba9..ccbd97cdb 100644
--- a/examples/prom/maxwell_local_rom_greedy.cpp
+++ b/examples/prom/maxwell_local_rom_greedy.cpp
@@ -26,12 +26,12 @@
 //               lifts the solution to the full order space.
 //
 // build_database phase: maxwell_local_rom_greedy -build_database -greedy-param-min 1.0 -greedy-param-max 1.2 -greedy-param-size 5 -greedysubsize 2 -greedyconvsize 3 -greedyrelerrortol 0.01
-// use_database phase:   maxwell_local_rom_greedy -fom -f 1.15 (create a new solution to compare with)
+// FOM phase:            maxwell_local_rom_greedy -fom -f 1.15 (create a new solution to compare with)
 // use_database phase:   maxwell_local_rom_greedy -use_database -online -f 1.15 (use the database to compute at f 1.15 while comparing to the true offline solution at f 1.15)
 //
 // Larger example:
 // build_database phase: maxwell_local_rom_greedy -build_database -greedy-param-min 0.5 -greedy-param-max 1.5 -greedy-param-size 15 -greedysubsize 4 -greedyconvsize 6 -greedyrelerrortol 0.01
-// use_database phase:   maxwell_local_rom_greedy -fom -f X.XX (create a new solution to compare with. Set X.XX to your desired frequency.)
+// FOM phase:            maxwell_local_rom_greedy -fom -f X.XX (create a new solution to compare with. Set X.XX to your desired frequency.)
 // use_database phase:   maxwell_local_rom_greedy -use_database -online -f X.XX (use the database to compute at f X.XX while comparing to the true offline solution at f X.XX)
 //
 // This example runs in parallel with MPI, by using the same number of MPI ranks

From 6e8ff6f0803015152a40d50670b6ef3fcb65d397 Mon Sep 17 00:00:00 2001
From: William Anderson <53445030+andersonw1@users.noreply.github.com>
Date: Tue, 2 Jan 2024 08:40:18 -0800
Subject: [PATCH 06/13] Deleted extra file

---
 .../dmd/parametric_dmdc_heat_conduction.cpp   | 999 ------------------
 1 file changed, 999 deletions(-)
 delete mode 100644 examples/dmd/parametric_dmdc_heat_conduction.cpp

diff --git a/examples/dmd/parametric_dmdc_heat_conduction.cpp b/examples/dmd/parametric_dmdc_heat_conduction.cpp
deleted file mode 100644
index 788e14161..000000000
--- a/examples/dmd/parametric_dmdc_heat_conduction.cpp
+++ /dev/null
@@ -1,999 +0,0 @@
-/******************************************************************************
- *
- * Copyright (c) 2013-2023, Lawrence Livermore National Security, LLC
- * and other libROM project developers. See the top-level COPYRIGHT
- * file for details.
- *
- * SPDX-License-Identifier: (Apache-2.0 OR MIT)
- *
- *****************************************************************************/
-
-//      libROM MFEM Example: DMDc for Heat_Conduction (adapted from ex16p.cpp)
-//
-// Compile with: make heat_conduction_dmdc
-//
-// =================================================================================
-//
-// Sample runs and results for DMDc:
-//
-// Command 1:
-//   mpirun -np 8 heat_conduction_dmdc -s 1 -a 0.0 -k 1.0 -visit
-//
-// Output 1:
-//   Relative error of DMDc temperature (u) at t_final: 0.5 is 0.0021705658
-//
-// Command 2:
-//   mpirun -np 8 heat_conduction_dmdc -s 1 -a 0.5 -k 0.5 -o 4 -tf 0.7 -vs 1 -visit
-//
-// Output 2:
-//   Relative error of DMDc temperature (u) at t_final: 0.7 is 0.00099736216
-//
-// =================================================================================
-//
-// Description:  This example solves a time dependent nonlinear heat equation
-//               problem of the form du/dt = C(u) + s, with a non-linear diffusion
-//               operator C(u) = \nabla \cdot (\kappa + \alpha u) \nabla u
-//               and time-varying external inlet and outlet source.
-//               The inlet and the outlet is located at (0,0) and (0.5,0.5)
-//               in the reference domain [-1,1]^d, where the shut down time and
-//               the amplitude of the sources are the control variables.
-//
-//               The example demonstrates the use of nonlinear operators (the
-//               class ConductionOperator defining C(u)), as well as their
-//               implicit time integration. Note that implementing the method
-//               ConductionOperator::ImplicitSolve is the only requirement for
-//               high-order implicit (SDIRK) time integration. Optional saving
-//               with ADIOS2 (adios2.readthedocs.io) is also illustrated.
-
-#include "mfem.hpp"
-#include "algo/DMDc.h"
-//#include "algo/DMD.h"
-#include "linalg/Vector.h"
-#include <cmath>
-#include <fstream>
-#include <iostream>
-
-#include "utils/CSVDatabase.h"
-#include "utils/HDFDatabase.h"
-
-#ifndef _WIN32
-#include <sys/stat.h>  // mkdir
-#else
-#include <direct.h>    // _mkdir
-#define mkdir(dir, mode) _mkdir(dir)
-#endif
-
-using namespace std;
-using namespace mfem;
-
-/** After spatial discretization, the conduction model can be written as:
- *
- *     du/dt = M^{-1}(-Ku)
- *
- *  where u is the vector representing the temperature, M is the mass matrix,
- *  and K is the diffusion operator with diffusivity depending on u:
- *  (\kappa + \alpha u).
- *
- *  Class ConductionOperator represents the right-hand side of the above ODE.
- */
-class ConductionOperator : public TimeDependentOperator
-{
-protected:
-    ParFiniteElementSpace &fespace;
-    Array<int> ess_tdof_list; // this list remains empty for pure Neumann b.c.
-
-    ParBilinearForm *M;
-    ParBilinearForm *K;
-    ParLinearForm *B;
-
-    HypreParMatrix Mmat;
-    HypreParMatrix Kmat;
-    HypreParMatrix *T; // T = M + dt K
-    double current_dt;
-
-    CGSolver M_solver;    // Krylov solver for inverting the mass matrix M
-    HypreSmoother M_prec; // Preconditioner for the mass matrix M
-
-    CGSolver T_solver;    // Implicit solver for T = M + dt K
-    HypreSmoother T_prec; // Preconditioner for the implicit solver
-
-    FunctionCoefficient &src_func; // Source function coefficient
-    double alpha, kappa; // Nonlinear parameters
-
-    mutable Vector z; // auxiliary vector
-    HypreParVector *b; // source vector
-
-public:
-    ConductionOperator(ParFiniteElementSpace &f, FunctionCoefficient &s,
-                       double alpha, double kappa, const Vector &u);
-
-    virtual void Mult(const Vector &u, Vector &du_dt) const;
-    /** Solve the Backward-Euler equation: k = f(u + dt*k, t), for the unknown k.
-        This is the only requirement for high-order SDIRK implicit integration.*/
-    virtual void ImplicitSolve(const double dt, const Vector &u, Vector &k);
-
-    /// Update the diffusion BilinearForm K using the given true-dof vector `u`.
-    void SetSourceTime(const double t);
-
-    /// Update the diffusion BilinearForm K using the given true-dof vector `u`.
-    void SetParameters(const Vector &u);
-
-    virtual ~ConductionOperator();
-};
-
-double InitialTemperature(const Vector &x);
-double TimeWindowFunction(const double t, const double t_begin,
-                          const double t_end);
-double Amplitude(const double t, const int index);
-double SourceFunction(const Vector &x, double t);
-
-Vector bb_min, bb_max; // Mesh bounding box
-double amp_in = 0.2;
-double t_end_in = 0.1;
-double amp_out = 0.1;
-double t_end_out = 0.3;
-double dt = 1.0e-2;
-
-int main(int argc, char *argv[])
-{
-    // 1. Initialize MPI.
-    int num_procs, myid;
-    MPI_Init(&argc, &argv);
-    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
-    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
-
-    // 2. Parse command-line options.
-    const char *mesh_file = "";
-    int ser_ref_levels = 2;
-    int par_ref_levels = 1;
-    int order = 2;
-    int ode_solver_type = 1;
-    double t_final = 0.5;
-    double alpha = 1.0e-2;
-    double kappa = 0.5;
-    double ef = 0.9999;
-    int rdim = -1;
-    bool visualization = true;
-    bool visit = false;
-    int vis_steps = 5;
-    bool adios2 = false;
-    bool offline = false;
-    bool online = false;
-    
-    double closest_rbf_val = 0.9;
-    bool predict = false;
-    bool save_dofs = false;
-    bool csvFormat = true;
-    const char *basename = "";
-
-    int precision = 8;
-    cout.precision(precision);
-
-    OptionsParser args(argc, argv);
-    args.AddOption(&mesh_file, "-m", "--mesh",
-                   "Mesh file to use.");
-    args.AddOption(&ser_ref_levels, "-rs", "--refine-serial",
-                   "Number of times to refine the mesh uniformly in serial.");
-    args.AddOption(&par_ref_levels, "-rp", "--refine-parallel",
-                   "Number of times to refine the mesh uniformly in parallel.");
-    args.AddOption(&order, "-o", "--order",
-                   "Order (degree) of the finite elements.");
-    args.AddOption(&ode_solver_type, "-s", "--ode-solver",
-                   "ODE solver: 1 - Backward Euler, 2 - SDIRK2, 3 - SDIRK3,\n\t"
-                   "\t   11 - Forward Euler, 12 - RK2, 13 - RK3 SSP, 14 - RK4.");
-    args.AddOption(&t_final, "-tf", "--t-final",
-                   "Final time; start time is 0.");
-    args.AddOption(&dt, "-dt", "--time-step",
-                   "Time step.");
-    args.AddOption(&alpha, "-a", "--alpha",
-                   "Alpha coefficient.");
-    args.AddOption(&kappa, "-k", "--kappa",
-                   "Kappa coefficient offset.");
-    args.AddOption(&amp_in, "-amp-in", "--amplitude-in",
-                   "Amplitude of inlet source at (0,0).");
-    args.AddOption(&amp_out, "-amp-out", "--amplitude-out",
-                   "Amplitude of outlet source at (0.5,0.5).");
-    args.AddOption(&t_end_in, "-t-end-in", "--t-end-in",
-                   "End time of inlet source at (0,0).");
-    args.AddOption(&t_end_out, "-t-end-out", "--t-end-out",
-                   "End time of outlet source at (0.5,0.5).");
-    args.AddOption(&visualization, "-vis", "--visualization", "-no-vis",
-                   "--no-visualization",
-                   "Enable or disable GLVis visualization.");
-    args.AddOption(&visit, "-visit", "--visit-datafiles", "-no-visit",
-                   "--no-visit-datafiles",
-                   "Save data files for VisIt (visit.llnl.gov) visualization.");
-    args.AddOption(&vis_steps, "-vs", "--visualization-steps",
-                   "Visualize every n-th timestep.");
-    args.AddOption(&adios2, "-adios2", "--adios2-streams", "-no-adios2",
-                   "--no-adios2-streams",
-                   "Save data using adios2 streams.");
-    args.AddOption(&ef, "-ef", "--energy_fraction",
-                   "Energy fraction for DMDc.");
-    args.AddOption(&rdim, "-rdim", "--rdim",
-                   "Reduced dimension for DMDc.");
-    
-    args.AddOption(&offline, "-offline", "--offline", "-no-offline", "--no-offline",
-                   "Enable or disable the offline phase.");
-    args.AddOption(&online, "-online", "--online", "-no-online", "--no-online",
-                   "Enable or disable the online phase.");
-    args.AddOption(&closest_rbf_val, "-crv", "--crv",
-                   "DMD Closest RBF Value.");
-    args.AddOption(&predict, "-predict", "--predict", "-no-predict", "--no-predict",
-                   "Enable or disable DMD prediction.");
-    args.AddOption(&save_dofs, "-save", "--save", "-no-save", "--no-save",
-                   "Enable or disable MFEM DOF solution snapshot files).");
-    args.AddOption(&csvFormat, "-csv", "--csv", "-hdf", "--hdf",
-                   "Use CSV or HDF format for files output by -save option.");
-    args.AddOption(&basename, "-out", "--outputfile-name",
-                   "Name of the sub-folder to dump files within the run directory.");
-    
-    args.Parse();
-    if (!args.Good())
-    {
-        args.PrintUsage(cout);
-        MPI_Finalize();
-        return 1;
-    }
-
-    if (myid == 0)
-    {
-        args.PrintOptions(cout);
-    }
-    
-    string outputPath = ".";
-    if (save_dofs)
-    {
-        outputPath = "run";
-        if (string(basename) != "") {
-            outputPath += "/" + string(basename);
-        }
-        if (myid == 0)
-        {
-            const char path_delim = '/';
-            string::size_type pos = 0;
-            do {
-                pos = outputPath.find(path_delim, pos+1);
-                string subdir = outputPath.substr(0, pos);
-                mkdir(subdir.c_str(), 0777);
-            }
-            while (pos != string::npos);
-        }
-    }
-
-    // 3. Read the serial mesh from the given mesh file on all processors. We can
-    //    handle triangular, quadrilateral, tetrahedral and hexahedral meshes
-    //    with the same code.
-    Mesh *mesh;
-    if (mesh_file == "")
-    {
-        mesh = new Mesh(Mesh::MakeCartesian2D(2, 2, Element::QUADRILATERAL));
-    }
-    else
-    {
-        mesh = new Mesh(mesh_file, 1, 1);
-    }
-    int dim = mesh->Dimension();
-
-    mesh->GetBoundingBox(bb_min, bb_max, max(order, 1));
-
-    // 4. Define the ODE solver used for time integration. Several implicit
-    //    singly diagonal implicit Runge-Kutta (SDIRK) methods, as well as
-    //    explicit Runge-Kutta methods are available.
-    ODESolver *ode_solver;
-    bool explicit_solver;
-    switch (ode_solver_type)
-    {
-    // Implicit L-stable methods
-    case 1:
-        ode_solver = new BackwardEulerSolver;
-        explicit_solver = false;
-        break;
-    case 2:
-        ode_solver = new SDIRK23Solver(2);
-        explicit_solver = false;
-        break;
-    case 3:
-        ode_solver = new SDIRK33Solver;
-        explicit_solver = false;
-        break;
-    // Explicit methods
-    case 11:
-        ode_solver = new ForwardEulerSolver;
-        explicit_solver = true;
-        break;
-    case 12:
-        ode_solver = new RK2Solver(0.5);
-        explicit_solver = true;
-        break; // midpoint method
-    case 13:
-        ode_solver = new RK3SSPSolver;
-        explicit_solver = true;
-        break;
-    case 14:
-        ode_solver = new RK4Solver;
-        explicit_solver = true;
-        break;
-    case 15:
-        ode_solver = new GeneralizedAlphaSolver(0.5);
-        explicit_solver = true;
-        break;
-    // Implicit A-stable methods (not L-stable)
-    case 22:
-        ode_solver = new ImplicitMidpointSolver;
-        explicit_solver = false;
-        break;
-    case 23:
-        ode_solver = new SDIRK23Solver;
-        explicit_solver = false;
-        break;
-    case 24:
-        ode_solver = new SDIRK34Solver;
-        explicit_solver = false;
-        break;
-    default:
-        cout << "Unknown ODE solver type: " << ode_solver_type << '\n';
-        delete mesh;
-        return 3;
-    }
-
-    // 5. Refine the mesh in serial to increase the resolution. In this example
-    //    we do 'ser_ref_levels' of uniform refinement, where 'ser_ref_levels' is
-    //    a command-line parameter.
-    for (int lev = 0; lev < ser_ref_levels; lev++)
-    {
-        mesh->UniformRefinement();
-    }
-
-    // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine
-    //    this mesh further in parallel to increase the resolution. Once the
-    //    parallel mesh is defined, the serial mesh can be deleted.
-    ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh);
-    delete mesh;
-    for (int lev = 0; lev < par_ref_levels; lev++)
-    {
-        pmesh->UniformRefinement();
-    }
-
-    // 7. Define the vector finite element space representing the current and the
-    //    initial temperature, u_ref.
-    H1_FECollection fe_coll(order, dim);
-    ParFiniteElementSpace fespace(pmesh, &fe_coll);
-
-    int fe_size = fespace.GlobalTrueVSize();
-    if (myid == 0)
-    {
-        cout << "Number of temperature unknowns: " << fe_size << endl;
-    }
-
-    ParGridFunction u_gf(&fespace);
-
-    // 8. Set the initial conditions for u. All boundaries are considered
-    //    natural.
-    FunctionCoefficient u_0(InitialTemperature);
-    u_gf.ProjectCoefficient(u_0);
-    Vector u;
-    u_gf.GetTrueDofs(u);
-
-    // 9. Initialize the conduction operator and the VisIt visualization.
-    FunctionCoefficient s(SourceFunction);
-    ConductionOperator oper(fespace, s, alpha, kappa, u);
-
-    u_gf.SetFromTrueDofs(u);
-    {
-        ostringstream mesh_name, sol_name;
-        mesh_name << outputPath << "/parametric_dmdc_heat_conduction_" << to_string(
-                      amp_in) << "_" << to_string(amp_out) << "-mesh." << setfill('0') << setw(6) << myid;
-        sol_name << outputPath << "/parametric_dmdc_heat_conduction_" << to_string(
-                     amp_in) << "_" << to_string(amp_out) <<  "-init." << setfill('0') << setw(6) << myid;
-        ofstream omesh(mesh_name.str().c_str());
-        omesh.precision(precision);
-        pmesh->Print(omesh);
-        ofstream osol(sol_name.str().c_str());
-        osol.precision(precision);
-        u_gf.Save(osol);
-    }
-//    {
-//        ostringstream mesh_name, sol_name;
-//        mesh_name << "heat_conduction_dmdc-mesh." << setfill('0') << setw(6) << myid;
-//        sol_name << "heat_conduction_dmdc-init." << setfill('0') << setw(6) << myid;
-//        ofstream omesh(mesh_name.str().c_str());
-//        omesh.precision(precision);
-//        pmesh->Print(omesh);
-//        ofstream osol(sol_name.str().c_str());
-//        osol.precision(precision);
-//        u_gf.Save(osol);
-//    }
-    
-
-//    VisItDataCollection visit_dc("Heat_Conduction", pmesh);
-    VisItDataCollection visit_dc(outputPath + "/parametric_dmdc_Heat_Conduction_" +
-                                 to_string(amp_in) + "_" + to_string(amp_out), pmesh);
-    visit_dc.RegisterField("temperature", &u_gf);
-    if (visit)
-    {
-        visit_dc.SetCycle(0);
-        visit_dc.SetTime(0.0);
-        visit_dc.Save();
-    }
-
-    // Optionally output a BP (binary pack) file using ADIOS2. This can be
-    // visualized with the ParaView VTX reader.
-#ifdef MFEM_USE_ADIOS2
-    ADIOS2DataCollection* adios2_dc = NULL;
-    if (adios2)
-    {
-        std::string postfix(mesh_file);
-        postfix.erase(0, std::string("../data/").size() );
-        postfix += "_o" + std::to_string(order);
-        postfix += "_solver" + std::to_string(ode_solver_type);
-        const std::string collection_name = "heat_conduction_dmdc-p-" + postfix + ".bp";
-
-        adios2_dc = new ADIOS2DataCollection(MPI_COMM_WORLD, collection_name, pmesh);
-        adios2_dc->SetParameter("SubStreams", std::to_string(num_procs/2) );
-        adios2_dc->RegisterField("temperature", &u_gf);
-        adios2_dc->SetCycle(0);
-        adios2_dc->SetTime(0.0);
-        adios2_dc->Save();
-    }
-#endif
-
-    socketstream sout;
-    if (visualization)
-    {
-        char vishost[] = "localhost";
-        int  visport   = 19916;
-        sout.open(vishost, visport);
-        sout << "parallel " << num_procs << " " << myid << endl;
-        int good = sout.good(), all_good;
-        MPI_Allreduce(&good, &all_good, 1, MPI_INT, MPI_MIN, pmesh->GetComm());
-        if (!all_good)
-        {
-            sout.close();
-            visualization = false;
-            if (myid == 0)
-            {
-                cout << "Unable to connect to GLVis server at "
-                     << vishost << ':' << visport << endl;
-                cout << "GLVis visualization disabled.\n";
-            }
-        }
-        else
-        {
-            sout.precision(precision);
-            sout << "solution\n" << *pmesh << u_gf;
-            sout << flush;
-        }
-    }
-
-    StopWatch fom_timer, dmd_training_timer, dmd_prediction_timer;
-
-    fom_timer.Start();
-
-    // 10. Perform time-integration (looping over the time iterations, ti, with a
-    //     time-step dt).
-    ode_solver->Init(oper);
-    double t = 0.0;
-    vector<double> ts;
-    double f[2];
-    
-    CAROM::Vector* init = NULL;
-
-    CAROM::Database *db = NULL;
-    if (csvFormat)
-        db = new CAROM::CSVDatabase();
-    else
-        db = new CAROM::HDFDatabase();
-
-    vector<int> snap_list;
-
-    fom_timer.Stop();
-
-//    dmd_training_timer.Start();
-    
-    CAROM::DMDc* dmd_u = NULL;
-    
-    if (offline)
-    {
-        dmd_training_timer.Start();
-        
-        // 11. Create DMDc object and take initial sample.
-        u_gf.SetFromTrueDofs(u);
-        f[0] = Amplitude(t, 0);
-        f[1] = Amplitude(t, 1);
-        
-        std::cout << "t =  " << t << std::endl;
-        
-//        CAROM::DMDc* dmd_u; //redefined this on 494
-        dmd_u = new CAROM::DMDc(u.Size(), 2, dt);
-        dmd_u->takeSample(u.GetData(), t, f, false);
-//        ts.push_back(t);
-        
-        dmd_training_timer.Stop();
-    }
-    
-    if (online)
-    {
-        u_gf.SetFromTrueDofs(u);
-        init = new CAROM::Vector(u.GetData(), u.Size(), true);
-    }
-    
-    if (save_dofs && myid == 0)
-    {
-        if (csvFormat)
-        {
-            mkdir((outputPath + "/step0").c_str(), 0777);
-            db->putDoubleArray(outputPath + "/step0/sol.csv", u.GetData(), u.Size());
-        }
-        else
-        {
-            db->create(outputPath + "/dmd_0.hdf");
-            db->putDoubleArray("step0sol", u.GetData(), u.Size());
-        }
-    }
-    
-    ts.push_back(t);
-    snap_list.push_back(0);
-    
-    bool last_step = false;
-    for (int ti = 1; !last_step; ti++)
-    {
-        fom_timer.Start();
-
-        if (t + dt >= t_final - dt/2)
-        {
-            last_step = true;
-        }
-
-        // Assuming Euler method is used
-        if (explicit_solver)
-        {
-            oper.SetSourceTime(t);
-        }
-        else
-        {
-            oper.SetSourceTime(t + dt);
-        }
-        ode_solver->Step(u, t, dt);
-
-        fom_timer.Stop();
-        
-        if (offline)
-        {
-            dmd_training_timer.Start();
-            
-            u_gf.SetFromTrueDofs(u);
-            f[0] = Amplitude(t, 0);
-            f[1] = Amplitude(t, 1);
-            dmd_u->takeSample(u.GetData(), t, f, last_step);
-//            ts.push_back(t);
-            
-            if (myid == 0)
-            {
-                std::cout << "Taking snapshot at: " << t << std::endl;
-            }
-            
-            dmd_training_timer.Stop();
-        }
-        
-        if (save_dofs && myid == 0)
-        {
-            if (csvFormat)
-            {
-                mkdir((outputPath + "/step" + to_string(ti)).c_str(), 0777);
-                db->putDoubleArray(outputPath + "/step" + to_string(ti) + "/sol.csv",
-                                   u.GetData(), u.Size());
-            }
-            else
-                db->putDoubleArray("step" + to_string(ti) + "sol",
-                                   u.GetData(), u.Size());
-        }
-        
-        ts.push_back(t);
-        snap_list.push_back(ti);
-        
-        if (last_step || (ti % vis_steps) == 0)
-        {
-            if (myid == 0)
-            {
-                cout << "step " << ti << ", t = " << t << endl;
-            }
-
-            u_gf.SetFromTrueDofs(u);
-            if (visualization)
-            {
-                sout << "parallel " << num_procs << " " << myid << "\n";
-                sout << "solution\n" << *pmesh << u_gf << flush;
-            }
-
-            if (visit)
-            {
-                visit_dc.SetCycle(ti);
-                visit_dc.SetTime(t);
-                visit_dc.Save();
-            }
-
-#ifdef MFEM_USE_ADIOS2
-            if (adios2)
-            {
-                adios2_dc->SetCycle(ti);
-                adios2_dc->SetTime(t);
-                adios2_dc->Save();
-            }
-#endif
-        }
-        oper.SetParameters(u);
-    }
-
-#ifdef MFEM_USE_ADIOS2
-    if (adios2)
-    {
-        delete adios2_dc;
-    }
-#endif
-    
-    if (save_dofs && myid == 0)
-    {
-        if (csvFormat)
-        {
-            db->putDoubleVector(outputPath + "/tval.csv", ts, ts.size());
-            db->putInteger(outputPath + "/numsnap", snap_list.size());
-            db->putIntegerArray(outputPath + "/snap_list.csv", snap_list.data(),
-                                snap_list.size());
-        }
-        else
-        {
-            db->putDoubleVector("tval", ts, ts.size());
-            db->putInteger("numsnap", snap_list.size());
-            db->putInteger("snap_bound_size", 0);
-            db->putIntegerArray("snap_list", snap_list.data(),
-                                snap_list.size());
-        }
-    }
-
-    // 12. Save the final solution in parallel. This output can be viewed later
-    //     using GLVis: "glvis -np <np> -m heat_conduction_dmdc-mesh -g heat_conduction_dmdc-final".
-    {
-        ostringstream sol_name;
-        sol_name << outputPath << "parametric_dmdc_Heat_Conduction" << to_string(amp_in) << "_" << to_string(amp_out) << "-final." << setfill('0') << setw(6) << myid;
-        ofstream osol(sol_name.str().c_str());
-        osol.precision(precision);
-        u_gf.Save(osol);
-//        ostringstream sol_name;
-//        sol_name << "heat_conduction_dmdc-final." << setfill('0') << setw(6) << myid;
-//        ofstream osol(sol_name.str().c_str());
-//        osol.precision(precision);
-//        u_gf.Save(osol);
-    }
-
-    // 13. Calculate the DMDc modes.
-    // pretty sure it is easier to get rid of EF because each snapshot must have same rdim
-    if (offline || online)
-    {
-        if (offline)
-        {
-            if (myid == 0 && rdim != -1 && ef != -1)
-            {
-                std::cout << "Both rdim and ef are set. ef will be ignored." << std::endl;
-            }
-            
-            dmd_training_timer.Start();
-            
-            if (rdim != -1)
-            {
-                if (myid == 0)
-                {
-                    std::cout << "Creating DMDc with rdim: " << rdim << std::endl;
-                }
-
-                dmd_u->train(rdim);
-
-            }
-            else if (ef != -1)
-            {
-                if (myid == 0)
-                {
-                    std::cout << "Creating DMDc with energy fraction: " << ef << std::endl;
-                }
-                dmd_u->train(ef);
-            }
-            
-            dmd_training_timer.Stop();
-            
-            dmd_u->save(outputPath + "/" + to_string(amp_in) + "_" + to_string(
-                                                                               amp_out));
-            
-            if (myid == 0)
-            {
-                std::ofstream fout;
-                fout.open("parameters.txt", std::ios::app);
-                fout << amp_in << " " << amp_out << std::endl;
-                fout.close();
-            }
-        }
-        
-//         probably will need to change a lot here
-        if (online)
-        {
-            if (myid == 0)
-            {
-                std::cout << "Creating DMD using the rdim of the offline phase" << std::endl;
-            }
-
-            std::fstream fin("parameters.txt", std::ios_base::in);
-            double curr_param;
-            std::vector<std::string> dmd_paths;
-            std::vector<CAROM::Vector*> param_vectors;
-
-            while (fin >> curr_param)
-            {
-                double curr_amp_in = curr_param;
-                fin >> curr_param;
-                double curr_amp_out = curr_param;
-
-                dmd_paths.push_back(outputPath + "/" + to_string(curr_amp_in) + "_" + to_string(curr_amp_out) );
-                CAROM::Vector* param_vector = new CAROM::Vector(2, false);
-                param_vector->item(0) = curr_amp_in;
-                param_vector->item(1) = curr_amp_out;
-                param_vectors.push_back(param_vector);
-            }
-            fin.close();
-
-            CAROM::Vector* desired_param = new CAROM::Vector(2, false);
-            desired_param->item(0) = amp_in;
-            desired_param->item(1) = amp_out;
-
-            dmd_training_timer.Start();
-
-            CAROM::getParametricDMD(dmd_u, param_vectors, dmd_paths, desired_param,
-                                    "G", "LS", closest_rbf_val);
-
-            dmd_u->project(init,param_vectors);
-
-            dmd_training_timer.Stop();
-            delete desired_param;
-        }
-        
-        if (predict)
-        {
-            Vector true_solution_u(u.Size());
-            true_solution_u = u.GetData();
-            
-            dmd_prediction_timer.Start();
-            
-            // 14. Predict the state at t_final using DMDc.
-            if (myid == 0)
-            {
-                std::cout << "Predicting temperature using DMDc" << std::endl;
-            }
-            
-            CAROM::Vector* result_u = dmd_u->predict(ts[0]);
-            Vector initial_dmd_solution_u(result_u->getData(), result_u->dim());
-            u_gf.SetFromTrueDofs(initial_dmd_solution_u);
-            
-            VisItDataCollection dmd_visit_dc("parametric_dmdc_Heat_Conduction", pmesh);
-            dmd_visit_dc.RegisterField("temperature", &u_gf);
-            
-            if (visit)
-            {
-                dmd_visit_dc.SetCycle(0);
-                dmd_visit_dc.SetTime(0.0);
-                dmd_visit_dc.Save();
-            }
-            
-            delete result_u;
-            
-            if (visit)
-            {
-                for (int i = 1; i < ts.size(); i++)
-                {
-                    if (i == ts.size() - 1 || (i % vis_steps) == 0)
-                    {
-                        result_u = dmd_u->predict(ts[i]);
-                        Vector dmd_solution_u(result_u->getData(), result_u->dim());
-                        u_gf.SetFromTrueDofs(dmd_solution_u);
-                        
-                        dmd_visit_dc.SetCycle(i);
-                        dmd_visit_dc.SetTime(ts[i]);
-                        dmd_visit_dc.Save();
-                        
-                        delete result_u;
-                    }
-                }
-            }
-            
-            dmd_prediction_timer.Stop();
-            
-            result_u = dmd_u->predict(t_final);
-            
-            // 15. Calculate the relative error between the DMDc final solution and the true solution.
-            Vector dmd_solution_u(result_u->getData(), result_u->dim());
-            Vector diff_u(true_solution_u.Size());
-            subtract(dmd_solution_u, true_solution_u, diff_u);
-            
-            double tot_diff_norm_u = sqrt(InnerProduct(MPI_COMM_WORLD, diff_u, diff_u));
-            double tot_true_solution_u_norm = sqrt(InnerProduct(MPI_COMM_WORLD,
-                                                                true_solution_u, true_solution_u));
-            
-            if (myid == 0)
-            {
-                
-                std::cout << "Relative error of DMDc temperature (u) at t_final: " << t_final <<
-                " is " << tot_diff_norm_u / tot_true_solution_u_norm << std::endl;
-                printf("Elapsed time for solving FOM: %e second\n", fom_timer.RealTime());
-                printf("Elapsed time for training DMDc: %e second\n",
-                       dmd_training_timer.RealTime());
-                printf("Elapsed time for predicting DMDc: %e second\n",
-                       dmd_prediction_timer.RealTime());
-            }
-            
-            delete result_u;
-            
-        }
-
-    }
-
-    // 16. Free the used memory.
-    delete ode_solver;
-    delete pmesh;
-//    delete result_u;
-    if (offline)
-    {
-        delete dmd_u;
-    }
-
-    MPI_Finalize();
-
-    return 0;
-}
-
-ConductionOperator::ConductionOperator(ParFiniteElementSpace &f,
-                                       FunctionCoefficient &s,
-                                       double al, double kap, const Vector &u)
-    : TimeDependentOperator(f.GetTrueVSize(), 0.0), fespace(f), src_func(s),
-      M(NULL), K(NULL), T(NULL), current_dt(0.0),
-      M_solver(f.GetComm()), T_solver(f.GetComm()), z(height)
-{
-    const double rel_tol = 1e-8;
-
-    M = new ParBilinearForm(&fespace);
-    M->AddDomainIntegrator(new MassIntegrator());
-    M->Assemble(0); // keep sparsity pattern of M and K the same
-    M->FormSystemMatrix(ess_tdof_list, Mmat);
-
-    M_solver.iterative_mode = false;
-    M_solver.SetRelTol(rel_tol);
-    M_solver.SetAbsTol(0.0);
-    M_solver.SetMaxIter(100);
-    M_solver.SetPrintLevel(0);
-    M_prec.SetType(HypreSmoother::Jacobi);
-    M_solver.SetPreconditioner(M_prec);
-    M_solver.SetOperator(Mmat);
-
-    alpha = al;
-    kappa = kap;
-
-    T_solver.iterative_mode = false;
-    T_solver.SetRelTol(rel_tol);
-    T_solver.SetAbsTol(0.0);
-    T_solver.SetMaxIter(100);
-    T_solver.SetPrintLevel(0);
-    T_solver.SetPreconditioner(T_prec);
-
-    SetParameters(u);
-
-    B = new ParLinearForm(&fespace);
-    B->AddDomainIntegrator(new DomainLFIntegrator(src_func));
-}
-
-void ConductionOperator::Mult(const Vector &u, Vector &du_dt) const
-{
-    // Compute:
-    //    du_dt = M^{-1}*-K(u)
-    // for du_dt
-    Kmat.Mult(u, z);
-    z.Neg(); // z = -z
-    z += *b;
-    M_solver.Mult(z, du_dt);
-}
-
-void ConductionOperator::ImplicitSolve(const double dt,
-                                       const Vector &u, Vector &du_dt)
-{
-    // Solve the equation:
-    //    du_dt = M^{-1}*[-K(u + dt*du_dt)]
-    // for du_dt
-    if (!T)
-    {
-        T = Add(1.0, Mmat, dt, Kmat);
-        current_dt = dt;
-        T_solver.SetOperator(*T);
-    }
-    MFEM_VERIFY(dt == current_dt, ""); // SDIRK methods use the same dt
-    Kmat.Mult(u, z);
-    z.Neg();
-    z += *b;
-    T_solver.Mult(z, du_dt);
-}
-
-void ConductionOperator::SetSourceTime(double t)
-{
-    src_func.SetTime(t);
-    B->Assemble();
-    b = B->ParallelAssemble();
-}
-
-void ConductionOperator::SetParameters(const Vector &u)
-{
-    ParGridFunction u_alpha_gf(&fespace);
-    u_alpha_gf.SetFromTrueDofs(u);
-    for (int i = 0; i < u_alpha_gf.Size(); i++)
-    {
-        u_alpha_gf(i) = kappa + alpha*u_alpha_gf(i);
-    }
-
-    delete K;
-    K = new ParBilinearForm(&fespace);
-
-    GridFunctionCoefficient u_coeff(&u_alpha_gf);
-
-    K->AddDomainIntegrator(new DiffusionIntegrator(u_coeff));
-    K->Assemble(0); // keep sparsity pattern of M and K the same
-    K->FormSystemMatrix(ess_tdof_list, Kmat);
-    delete T;
-    T = NULL; // re-compute T on the next ImplicitSolve
-}
-
-ConductionOperator::~ConductionOperator()
-{
-    delete T;
-    delete M;
-    delete K;
-    delete B;
-}
-
-double InitialTemperature(const Vector &x)
-{
-    if (x.Norml2() < 0.5)
-    {
-        return 2.0;
-    }
-    else
-    {
-        return 1.0;
-    }
-}
-
-double TimeWindowFunction(const double t, const double t_begin,
-                          const double t_end)
-{
-    return 0.5 * (tanh((t - t_begin) / (5*dt)) - tanh((t - t_end) / (5*dt)));
-}
-
-double Amplitude(const double t, const int index)
-{
-    if (index == 0)
-    {
-        return amp_in * TimeWindowFunction(t, 0.0, t_end_in);
-    }
-    else
-    {
-        return amp_out * TimeWindowFunction(t, 0.0, t_end_out);
-    }
-}
-
-double SourceFunction(const Vector &x, double t)
-{
-    // map to the reference [-1,1] domain
-    Vector X(x.Size()), Y(x.Size());
-    for (int i = 0; i < x.Size(); i++)
-    {
-        double center = (bb_min[i] + bb_max[i]) * 0.5;
-        X(i) = 2 * (x(i) - center) / (bb_max[i] - bb_min[i]);
-        Y(i) = X(i) - 0.5;
-    }
-
-    double r1 = X.Norml2() / 0.01;
-    double r2 = Y.Norml2() / 0.01;
-    return Amplitude(t,0) * exp(-0.5*r1*r1) - Amplitude(t,1) * exp(-0.5*r2*r2);
-}

From 368f0f363788633031f6b6aef245fbfa72995a72 Mon Sep 17 00:00:00 2001
From: Anderson <anderson316@llnl.gov>
Date: Thu, 4 Jan 2024 08:36:09 -0800
Subject: [PATCH 07/13] Changed some comments to be clearer and deleted pointer
 after using

---
 examples/prom/maxwell_local_rom_greedy.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/examples/prom/maxwell_local_rom_greedy.cpp b/examples/prom/maxwell_local_rom_greedy.cpp
index ccbd97cdb..a00e80876 100644
--- a/examples/prom/maxwell_local_rom_greedy.cpp
+++ b/examples/prom/maxwell_local_rom_greedy.cpp
@@ -20,7 +20,7 @@
 //               controlled by the parameter kappa.
 //
 //               The example highlights the greedy algorithm. The build_database phase
-//               builds a global ROM database using different frequncies and a latin-hypercube
+//               builds a global ROM database using different frequencies and a latin-hypercube
 //               sampling procedure. The use_database phase uses the global ROM database,
 //               builds the ROM operator, solves thereduced order system, and
 //               lifts the solution to the full order space.
@@ -158,7 +158,8 @@ int main(int argc, char *argv[])
     if (fom)
     {
         MFEM_VERIFY(fom && !offline
-                    && !online, "everything must be turned off if fom is used.");
+                    && !online,
+                    "The FOM phase cannot be run with the offline or online ROM phase.");
     }
 
     CAROM::GreedySampler* greedy_sampler = NULL;
@@ -280,7 +281,7 @@ int main(int argc, char *argv[])
                     continue;
                 }
             }
-            // 4b. Set the correct frequency as commanded by the greedy algorithm.
+            // 4b. Set the frequency as commanded by the greedy algorithm.
             curr_basis_identifier += "_" + to_string(curr_freq);
             freq = curr_freq;
         }
@@ -680,6 +681,9 @@ int main(int argc, char *argv[])
         delete pmesh;
 
     } while(build_database);
+
+    delete greedy_sampler;
+
     MPI_Finalize();
 
     return 0;

From 743dda61348585d91b8703a48f320765cb626900 Mon Sep 17 00:00:00 2001
From: William Anderson <53445030+andersonw1@users.noreply.github.com>
Date: Mon, 8 Jan 2024 07:21:29 -0800
Subject: [PATCH 08/13] fixed typo

---
 examples/prom/maxwell_local_rom_greedy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/prom/maxwell_local_rom_greedy.cpp b/examples/prom/maxwell_local_rom_greedy.cpp
index a00e80876..f3dc4453d 100644
--- a/examples/prom/maxwell_local_rom_greedy.cpp
+++ b/examples/prom/maxwell_local_rom_greedy.cpp
@@ -22,7 +22,7 @@
 //               The example highlights the greedy algorithm. The build_database phase
 //               builds a global ROM database using different frequencies and a latin-hypercube
 //               sampling procedure. The use_database phase uses the global ROM database,
-//               builds the ROM operator, solves thereduced order system, and
+//               builds the ROM operator, solves the reduced order system, and
 //               lifts the solution to the full order space.
 //
 // build_database phase: maxwell_local_rom_greedy -build_database -greedy-param-min 1.0 -greedy-param-max 1.2 -greedy-param-size 5 -greedysubsize 2 -greedyconvsize 3 -greedyrelerrortol 0.01

From 617a815b9456a351ac2738608135d43888045b51 Mon Sep 17 00:00:00 2001
From: Dylan Copeland <dylancopeland@gmail.com>
Date: Wed, 15 Nov 2023 16:21:01 -0800
Subject: [PATCH 09/13] Comment that maxwell example runs in parallel. (#248)

---
 examples/prom/maxwell_global_rom.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/prom/maxwell_global_rom.cpp b/examples/prom/maxwell_global_rom.cpp
index 842e76ec2..fa5d030c4 100644
--- a/examples/prom/maxwell_global_rom.cpp
+++ b/examples/prom/maxwell_global_rom.cpp
@@ -42,6 +42,8 @@
 //
 // Online phase:  maxwell_global_rom -online -f 1.15
 //
+// This example runs in parallel with MPI, by using the same number of MPI ranks
+// in all phases (offline, merge, fom, online).
 
 #include "mfem.hpp"
 #include <fstream>

From 583881c585daa1662f305fbe794127e7ba824fd9 Mon Sep 17 00:00:00 2001
From: Chris Vales <cv1038@unh.edu>
Date: Fri, 15 Dec 2023 10:52:35 -0500
Subject: [PATCH 10/13] Changes in Gram-Schmidt functions (#239)

* fix issues in Matrix::orthogonalize

* add Matrix::orthogonalize_last method

* run astyle

* add another test for Matrix::orthogonalize_last

* rerun astyle properly

* review n.1 changes

* review n.2
---
 lib/linalg/Matrix.cpp      | 105 ++++++++++++++++++--------
 lib/linalg/Matrix.h        |  27 ++++++-
 unit_tests/test_Matrix.cpp | 147 +++++++++++++++++++++++++++++++++++++
 3 files changed, 245 insertions(+), 34 deletions(-)

diff --git a/lib/linalg/Matrix.cpp b/lib/linalg/Matrix.cpp
index d0b6514d3..4918b1ba1 100644
--- a/lib/linalg/Matrix.cpp
+++ b/lib/linalg/Matrix.cpp
@@ -1788,47 +1788,88 @@ const
 }
 
 void
-Matrix::orthogonalize()
+Matrix::orthogonalize(double zero_tol)
 {
-    for (int work = 1; work < d_num_cols; ++work) {
-        double tmp;
-        for (int col = 0; col < work; ++col) {
+    for (int work = 0; work < d_num_cols; ++work)
+    {
+        // Orthogonalize the column.
+        for (int col = 0; col < work; ++col)
+        {
             double factor = 0.0;
-            tmp = 0.0;
-            for (int i = 0; i < d_num_rows; ++i) {
-                tmp += item(i, col)*item(i, work);
-            }
-            if (d_num_procs > 1) {
-                MPI_Allreduce(&tmp,
-                              &factor,
-                              1,
-                              MPI_DOUBLE,
-                              MPI_SUM,
-                              MPI_COMM_WORLD);
-            }
-            else {
-                factor = tmp;
-            }
 
-            for (int i = 0; i < d_num_rows; ++i) {
-                item(i, work) -= factor*item(i, col);
+            for (int i = 0; i < d_num_rows; ++i)
+                factor += item(i, col) * item(i, work);
+
+            if (d_distributed && d_num_procs > 1)
+            {
+                CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &factor, 1,
+                                            MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) == MPI_SUCCESS );
             }
+            for (int i = 0; i < d_num_rows; ++i)
+                item(i, work) -= factor * item(i, col);
         }
+
+        // Normalize the column.
         double norm = 0.0;
-        tmp = 0.0;
-        for (int i = 0; i < d_num_rows; ++i) {
-            tmp += item(i, work)*item(i, work);
-        }
-        if (d_num_procs > 1) {
-            MPI_Allreduce(&tmp, &norm, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+        for (int i = 0; i < d_num_rows; ++i)
+            norm += item(i, work) * item(i, work);
+
+        if (d_distributed && d_num_procs > 1)
+        {
+            CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &norm, 1, MPI_DOUBLE,
+                                        MPI_SUM, MPI_COMM_WORLD) == MPI_SUCCESS );
         }
-        else {
-            norm = tmp;
+        if (norm > zero_tol)
+        {
+            norm = 1.0 / sqrt(norm);
+            for (int i = 0; i < d_num_rows; ++i)
+                item(i, work) *= norm;
         }
-        norm = sqrt(norm);
-        for (int i = 0; i < d_num_rows; ++i) {
-            item(i, work) /= norm;
+    }
+}
+
+void
+Matrix::orthogonalize_last(int ncols, double zero_tol)
+{
+    if (ncols == -1) ncols = d_num_cols;
+    CAROM_VERIFY((ncols > 0) && (ncols <= d_num_cols));
+
+    const int last_col = ncols - 1; // index of column to be orthonormalized
+
+    // Orthogonalize the column.
+    for (int col = 0; col < last_col; ++col)
+    {
+        double factor = 0.0;
+
+        for (int i = 0; i < d_num_rows; ++i)
+            factor += item(i, col) * item(i, last_col);
+
+        if (d_distributed && d_num_procs > 1)
+        {
+            CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &factor, 1, MPI_DOUBLE,
+                                        MPI_SUM, MPI_COMM_WORLD) == MPI_SUCCESS );
         }
+        for (int i = 0; i < d_num_rows; ++i)
+            item(i, last_col) -= factor * item(i, col);
+    }
+
+    // Normalize the column.
+    double norm = 0.0;
+
+    for (int i = 0; i < d_num_rows; ++i)
+        norm += item(i, last_col) * item(i, last_col);
+
+    if (d_distributed && d_num_procs > 1)
+    {
+        CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &norm, 1, MPI_DOUBLE,
+                                    MPI_SUM, MPI_COMM_WORLD) == MPI_SUCCESS );
+    }
+    if (norm > zero_tol)
+    {
+        norm = 1.0 / sqrt(norm);
+        for (int i = 0; i < d_num_rows; ++i)
+            item(i, last_col) *= norm;
     }
 }
 
diff --git a/lib/linalg/Matrix.h b/lib/linalg/Matrix.h
index 4f7f74aba..e5f5bc988 100644
--- a/lib/linalg/Matrix.h
+++ b/lib/linalg/Matrix.h
@@ -941,10 +941,33 @@ class Matrix
                           int  pivots_requested) const;
 
     /**
-     * @brief Orthogonalizes the matrix.
+     * @brief Orthonormalizes the matrix.
+     *
+     * If the norm of a matrix column is below the value of zero_tol then it
+     * is considered to be zero, and we do not divide by it.
+     * Therefore, that column is considered to be zero and is not normalized.
+     * By default, zero_tol = 1.0e-15.
+     */
+    void
+    orthogonalize(double zero_tol = 1.0e-15);
+
+    /**
+     * @brief Orthonormalizes the matrix's last column, assuming the previous
+     * columns are already orthonormal.
+     *
+     * By default, ncols == -1, and the function considers the whole matrix.
+     * If ncols != -1 and ncols < d_num_cols, then a subset of the matrix
+     * is considered.
+     * This allows one to reorthonormalize the matrix every time a new column
+     * is added, assuming the previous columns have remained unchanged.
+     *
+     * If the norm of a matrix column is below the value of zero_tol then it
+     * is considered to be zero, and we do not divide by it.
+     * Therefore, that column is considered to be zero and is not normalized.
+     * By default, zero_tol = 1.0e-15.
      */
     void
-    orthogonalize();
+    orthogonalize_last(int ncols = -1, double zero_tol = 1.0e-15);
 
     /**
      * @brief Rescale every matrix row by its maximum absolute value.
diff --git a/unit_tests/test_Matrix.cpp b/unit_tests/test_Matrix.cpp
index 7534d5b19..8f3d77503 100644
--- a/unit_tests/test_Matrix.cpp
+++ b/unit_tests/test_Matrix.cpp
@@ -457,6 +457,153 @@ TEST(MatrixSerialTest, Test_Matrix_rescale_cols_max2)
                     "(i, j) = (" << i << ", " << j << ")";
 }
 
+TEST(MatrixSerialTest, Test_Matrix_orthogonalize)
+{
+    // Matrix data to orthonormalize.
+    double d_mat[16] = {3.5, 7.1, 0.0, 0.0,
+                        0.0, 1.9, 8.3, 0.0,
+                        0.0, 0.0, 5.7, 4.6,
+                        0.0, 0.0, 0.0, 3.2
+                       };
+
+    // Target matrix data.
+    double d_mat2[16] = {1.0, 0.0, 0.0, 0.0,
+                         0.0, 1.0, 0.0, 0.0,
+                         0.0, 0.0, 1.0, 0.0,
+                         0.0, 0.0, 0.0, 1.0
+                        };
+
+    CAROM::Matrix matrix(d_mat, 4, 4, false);
+    CAROM::Matrix target(d_mat2, 4, 4, false);
+
+    double abs_error = 1.0e-15; // absolute error threshold
+
+    matrix.orthogonalize();
+
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            EXPECT_NEAR(matrix.item(i, j), target.item(i, j), abs_error) <<
+                    "(i, j) = (" << i << ", " << j << ")";
+}
+
+TEST(MatrixSerialTest, Test_Matrix_orthogonalize2)
+{
+    // Matrix data to orthonormalize.
+    double d_mat[16] = {3.5, 7.1, 0.0, 0.0,
+                        0.0, 1.9, 8.3, 1e-14,
+                        0.0, 0.0, 5.7, 1.0+1.0e-14,
+                        0.0, 0.0, 0.0, 0.0
+                       };
+
+    // Target matrix data.
+    double d_mat2[16] = {1.0, 0.0, 0.0, 0.0,
+                         0.0, 1.0, 0.0, 0.0,
+                         0.0, 0.0, 1.0, 0.0,
+                         0.0, 0.0, 0.0, 0.0
+                        };
+
+    CAROM::Matrix matrix(d_mat, 4, 4, false);
+    CAROM::Matrix target(d_mat2, 4, 4, false);
+
+    double abs_error = 1.0e-15; // absolute error threshold
+
+    matrix.orthogonalize();
+
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            EXPECT_NEAR(matrix.item(i, j), target.item(i, j), abs_error) <<
+                    "(i, j) = (" << i << ", " << j << ")";
+}
+
+TEST(MatrixSerialTest, Test_Matrix_orthogonalize_last)
+{
+    // Matrix data to orthonormalize.
+    double d_mat[16] = {1.0, 0.0, 0.0, 1.3,
+                        0.0, 1.0, 0.0, 4.7,
+                        0.0, 0.0, 1.0, 2.5,
+                        0.0, 0.0, 0.0, 7.3
+                       };
+
+    // Target matrix data.
+    double d_mat2[16] = {1.0, 0.0, 0.0, 0.0,
+                         0.0, 1.0, 0.0, 0.0,
+                         0.0, 0.0, 1.0, 0.0,
+                         0.0, 0.0, 0.0, 1.0
+                        };
+
+    CAROM::Matrix matrix(d_mat, 4, 4, false);
+    CAROM::Matrix target(d_mat2, 4, 4, false);
+
+    double abs_error = 1.0e-15; // absolute error threshold
+
+    matrix.orthogonalize_last();
+
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            EXPECT_NEAR(matrix.item(i, j), target.item(i, j), abs_error) <<
+                    "(i, j) = (" << i << ", " << j << ")";
+}
+
+TEST(MatrixSerialTest, Test_Matrix_orthogonalize_last2)
+{
+    // Matrix data to orthonormalize.
+    double d_mat[16] = {1.0, 0.0, 3.8, 1.3,
+                        0.0, 1.0, 5.6, 4.7,
+                        0.0, 0.0, 9.8, 2.5,
+                        0.0, 0.0, 0.0, 7.3
+                       };
+
+    // Target matrix data.
+    double d_mat2[16] = {1.0, 0.0, 0.0, 0.0,
+                         0.0, 1.0, 0.0, 0.0,
+                         0.0, 0.0, 1.0, 0.0,
+                         0.0, 0.0, 0.0, 1.0
+                        };
+
+    CAROM::Matrix matrix(d_mat, 4, 4, false);
+    CAROM::Matrix target(d_mat2, 4, 4, false);
+
+    double abs_error = 1.0e-15; // absolute error threshold
+
+    matrix.orthogonalize_last(3);
+    matrix.orthogonalize_last(4);
+
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            EXPECT_NEAR(matrix.item(i, j), target.item(i, j), abs_error) <<
+                    "(i, j) = (" << i << ", " << j << ")";
+}
+
+TEST(MatrixSerialTest, Test_Matrix_orthogonalize_last3)
+{
+    // Matrix data to orthonormalize.
+    double d_mat[16] = {3.5, 7.1, 0.0, 0.0,
+                        0.0, 1.9, 8.3, 0.0,
+                        0.0, 0.0, 5.7, 4.6,
+                        0.0, 0.0, 0.0, 3.2
+                       };
+
+    // Target matrix data.
+    double d_mat2[16] = {1.0, 0.0, 0.0, 0.0,
+                         0.0, 1.0, 0.0, 0.0,
+                         0.0, 0.0, 1.0, 0.0,
+                         0.0, 0.0, 0.0, 1.0
+                        };
+
+    CAROM::Matrix matrix(d_mat, 4, 4, false);
+    CAROM::Matrix target(d_mat2, 4, 4, false);
+
+    for (int i = 0; i < 4; i++)
+        matrix.orthogonalize_last(i+1);
+
+    double abs_error = 1.0e-15; // absolute error threshold
+
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            EXPECT_NEAR(matrix.item(i, j), target.item(i, j), abs_error) <<
+                    "(i, j) = (" << i << ", " << j << ")";
+}
+
 TEST(MatrixSerialTest, Test_pMatrix_mult_reference)
 {
     /**

From 8d1b7d4c286b48a2acff205b28978169e7f7a62a Mon Sep 17 00:00:00 2001
From: Coleman Kendrick <6309092+ckendrick@users.noreply.github.com>
Date: Mon, 18 Dec 2023 08:23:57 -0700
Subject: [PATCH 11/13] Add timing info to DG local advection prom example
 (#251)

* Add assemble timer and timing prints

* Fix issue with -online_interp when running in parallel

* Update timing print

* Fix minor typo in examples
---
 .../dg_advection_local_rom_matrix_interp.cpp  | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/examples/prom/dg_advection_local_rom_matrix_interp.cpp b/examples/prom/dg_advection_local_rom_matrix_interp.cpp
index 244b42908..d0db96f14 100644
--- a/examples/prom/dg_advection_local_rom_matrix_interp.cpp
+++ b/examples/prom/dg_advection_local_rom_matrix_interp.cpp
@@ -32,7 +32,7 @@
 //    dg_advection_local_rom_matrix_interp -interp_prep -ff 1.08 -rdim 40
 //    dg_advection_local_rom_matrix_interp -fom -ff 1.05
 //    dg_advection_local_rom_matrix_interp -online_interp -ff 1.05 -rdim 40 (interpolate using a linear solve)
-//    dg_advection_local_rom_matrix_interp -online_interp -ff 1.05 -rdim 40 -im "LP" (interpolate using lagragian polynomials)
+//    dg_advection_local_rom_matrix_interp -online_interp -ff 1.05 -rdim 40 -im "LP" (interpolate using lagrangian polynomials)
 //    dg_advection_local_rom_matrix_interp -online_interp -ff 1.05 -rdim 40 -im "IDW" (interpolate using inverse distance weighting)
 //
 // Sample runs:
@@ -527,7 +527,9 @@ int main(int argc, char *argv[])
     VectorFunctionCoefficient velocity(dim, velocity_function);
     FunctionCoefficient inflow(inflow_function);
     FunctionCoefficient u0(u0_function);
+    StopWatch fom_timer, assemble_timer;
 
+    assemble_timer.Start();
     ParBilinearForm *m = new ParBilinearForm(fes);
     ParBilinearForm *k = new ParBilinearForm(fes);
     if (pa)
@@ -574,6 +576,8 @@ int main(int argc, char *argv[])
     u->ProjectCoefficient(u0);
     HypreParVector *U = u->GetTrueDofs();
 
+    assemble_timer.Stop();
+
     {
         ostringstream mesh_name, sol_name;
         mesh_name << "dg_advection_local_rom_matrix_interp-mesh." << setfill('0') <<
@@ -681,9 +685,7 @@ int main(int argc, char *argv[])
         }
     }
 
-    StopWatch fom_timer;
     double t = 0.0;
-
     int max_num_snapshots = t_final / dt + 1;
     bool update_right_SV = false;
     bool isIncremental = false;
@@ -714,6 +716,8 @@ int main(int argc, char *argv[])
 
     if (online)
     {
+        assemble_timer.Start();
+
         if (!online_interp)
         {
             CAROM::BasisReader reader(basisName);
@@ -805,14 +809,11 @@ int main(int argc, char *argv[])
             std::vector<CAROM::Matrix*> M_hats;
             std::vector<CAROM::Vector*> b_hats;
             std::vector<CAROM::Vector*> u_init_hats;
-            std::ofstream fout;
-            fout.open("frequencies.txt");
             for(auto it = frequencies.begin(); it != frequencies.end(); it++)
             {
                 CAROM::Vector* point = new CAROM::Vector(1, false);
                 point->item(0) = *it;
                 parameter_points.push_back(point);
-                fout << *it << std::endl;
 
                 std::string parametricBasisName = "basis_" + std::to_string(*it);
                 CAROM::BasisReader reader(parametricBasisName);
@@ -839,7 +840,6 @@ int main(int argc, char *argv[])
                 parametricuinithat->read("u_init_hat_" + std::to_string(*it));
                 u_init_hats.push_back(parametricuinithat);
             }
-            fout.close();
             if (myid == 0) printf("spatial basis dimension is %d x %d\n", numRowRB,
                                       numColumnRB);
 
@@ -884,6 +884,8 @@ int main(int argc, char *argv[])
 
         u_in = new Vector(numColumnRB);
         *u_in = 0.0;
+
+        assemble_timer.Stop();
     }
 
     TimeDependentOperator* adv;
@@ -1037,6 +1039,16 @@ int main(int argc, char *argv[])
         osol.close();
     }
 
+    // 13. print timing info
+    if (myid == 0)
+    {
+        const std::string type = online ? "ROM" : "FOM";
+        std::cout << "Elapsed time for assembling " << type << ": " << std::scientific
+                  << std::setprecision(8) << assemble_timer.RealTime() << " second\n";
+        std::cout << "Elapsed time for solving " << type << ": " << std::scientific <<
+                  std::setprecision(8) << fom_timer.RealTime() << " second\n";
+    }
+
     // 16. Free the used memory.
     delete U;
     delete u;

From 646ffe83d0845507191e852a3bb47d0fe4a5aa4e Mon Sep 17 00:00:00 2001
From: Dylan Copeland <dylancopeland@gmail.com>
Date: Tue, 2 Jan 2024 18:57:14 -0800
Subject: [PATCH 12/13] Sample mesh manager bug fix (#258)

* Bug fix.

* Cleaning up a few things.
---
 examples/prom/dg_advection_global_rom.cpp     |  1 -
 .../prom/linear_elasticity_global_rom.cpp     |  1 -
 examples/prom/maxwell_global_rom.cpp          |  1 -
 examples/prom/poisson_global_rom.cpp          |  1 -
 examples/prom/poisson_local_rom_greedy.cpp    |  1 -
 lib/mfem/SampleMesh.cpp                       | 64 ++++++++++---------
 6 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/examples/prom/dg_advection_global_rom.cpp b/examples/prom/dg_advection_global_rom.cpp
index 6855b8743..3d159a1e5 100644
--- a/examples/prom/dg_advection_global_rom.cpp
+++ b/examples/prom/dg_advection_global_rom.cpp
@@ -704,7 +704,6 @@ int main(int argc, char *argv[])
     if (merge)
     {
         mergeTimer.Start();
-        std::unique_ptr<CAROM::BasisGenerator> basis_generator;
         options = new CAROM::Options(U->Size(), max_num_snapshots, 1, update_right_SV);
         generator = new CAROM::BasisGenerator(*options, isIncremental, basisName);
         for (int paramID=0; paramID<nsets; ++paramID)
diff --git a/examples/prom/linear_elasticity_global_rom.cpp b/examples/prom/linear_elasticity_global_rom.cpp
index 6bf81a497..0de7a3005 100644
--- a/examples/prom/linear_elasticity_global_rom.cpp
+++ b/examples/prom/linear_elasticity_global_rom.cpp
@@ -244,7 +244,6 @@ int main(int argc, char* argv[])
     if (merge)
     {
         mergeTimer.Start();
-        std::unique_ptr<CAROM::BasisGenerator> basis_generator;
         options = new CAROM::Options(fespace->GetTrueVSize(), max_num_snapshots, 1,
                                      update_right_SV);
         generator = new CAROM::BasisGenerator(*options, isIncremental, basisName);
diff --git a/examples/prom/maxwell_global_rom.cpp b/examples/prom/maxwell_global_rom.cpp
index fa5d030c4..cdc081960 100644
--- a/examples/prom/maxwell_global_rom.cpp
+++ b/examples/prom/maxwell_global_rom.cpp
@@ -238,7 +238,6 @@ int main(int argc, char *argv[])
     if (merge)
     {
         mergeTimer.Start();
-        std::unique_ptr<CAROM::BasisGenerator> basis_generator;
         options = new CAROM::Options(fespace->GetTrueVSize(), max_num_snapshots, 1,
                                      update_right_SV);
         generator = new CAROM::BasisGenerator(*options, isIncremental, basisName);
diff --git a/examples/prom/poisson_global_rom.cpp b/examples/prom/poisson_global_rom.cpp
index 73f72aeb0..a5a393124 100644
--- a/examples/prom/poisson_global_rom.cpp
+++ b/examples/prom/poisson_global_rom.cpp
@@ -247,7 +247,6 @@ int main(int argc, char *argv[])
     if (merge)
     {
         mergeTimer.Start();
-        std::unique_ptr<CAROM::BasisGenerator> basis_generator;
         options = new CAROM::Options(fespace.GetTrueVSize(), max_num_snapshots, 1,
                                      update_right_SV);
         generator = new CAROM::BasisGenerator(*options, isIncremental, basisName);
diff --git a/examples/prom/poisson_local_rom_greedy.cpp b/examples/prom/poisson_local_rom_greedy.cpp
index 761901d7c..6633110f5 100644
--- a/examples/prom/poisson_local_rom_greedy.cpp
+++ b/examples/prom/poisson_local_rom_greedy.cpp
@@ -617,7 +617,6 @@ int main(int argc, char *argv[])
         if (calc_rel_error || (offline && basisIdentifiers.size() == 1))
         {
             mergeTimer.Start();
-            std::unique_ptr<CAROM::BasisGenerator> basis_generator;
             options = new CAROM::Options(fespace.GetTrueVSize(), max_num_snapshots, 1,
                                          update_right_SV);
             generator = new CAROM::BasisGenerator(*options, isIncremental, loadBasisName);
diff --git a/lib/mfem/SampleMesh.cpp b/lib/mfem/SampleMesh.cpp
index cd5df7c52..fcc51a770 100644
--- a/lib/mfem/SampleMesh.cpp
+++ b/lib/mfem/SampleMesh.cpp
@@ -29,7 +29,6 @@ void FindStencilElements(const vector<int>& sample_dofs_gid,
             for (int i = 0; i < dofs.Size(); i++)
             {
                 const int dof_i = dofs[i] >= 0 ? dofs[i] : -1 - dofs[i];
-                //int ltdof = fespace.GetLocalTDofNumber(dof_i);
                 int global_dof = fespace.GetGlobalTDofNumber(dof_i);
                 if (global_dof == *it)
                 {
@@ -115,7 +114,6 @@ void GetLocalSampleMeshElements(ParMesh& pmesh, ParFiniteElementSpace& fespace,
 
     // Construct the map of local true dofs to global true dofs.
     map<int, int> ltdof2gtdof;
-    //const int ndofs = fespace.GetNDofs();
     const int ndofs = fespace.GetVSize();
     for (int i = 0; i < fespace.GetVSize(); ++i) {
         int ltdof = fespace.GetLocalTDofNumber(i);
@@ -390,8 +388,8 @@ void BuildSampleMesh(ParMesh& pmesh, vector<ParFiniteElementSpace*> & fespace,
     int numElVert = elVert.Size();  // number of vertices per element
     MFEM_VERIFY(numElVert > 0, "");
 
-    vector<int> my_element_vgid(
-        numElVert*local_num_elems);  // vertex global indices, for each element
+    // vertex global indices, for each element
+    vector<int> my_element_vgid(numElVert*local_num_elems);
     vector<double> my_element_coords(d*numElVert*local_num_elems);
 
     for (set<int>::iterator it = elems.begin(); it != elems.end(); ++it) {
@@ -411,7 +409,7 @@ void BuildSampleMesh(ParMesh& pmesh, vector<ParFiniteElementSpace*> & fespace,
         Array<int> dofs;
         H1DummySpace.GetElementDofs(elId, dofs);
         MFEM_VERIFY(numElVert == dofs.Size(),
-                    "");  // Assuming a bijection between vertices and H1 dummy space DOF's.
+                    "Assuming a bijection between vertices and H1 dummy space DOF's");
 
         for (int i = 0; i < numElVert; ++i) {
             my_element_vgid[conn_idx++] = H1DummySpace.GetGlobalTDofNumber(dofs[i]);
@@ -464,10 +462,10 @@ void BuildSampleMesh(ParMesh& pmesh, vector<ParFiniteElementSpace*> & fespace,
     delete [] cts;
     delete [] offsets;
 
-    // element_vgid holds vertices as global ids.  Vertices may be shared
+    // element_vgid holds vertices as global ids. Vertices may be shared
     // between elements so we don't know the number of unique vertices in the
-    // sample mesh.  Find all the unique vertices and construct the map of
-    // global dof ids to local dof ids (vertices).  Keep track of the number of
+    // sample mesh. Find all the unique vertices and construct the map of
+    // global dof ids to local dof ids (vertices). Keep track of the number of
     // unique vertices.
     set<int> unique_gdofs;
     map<int, int> unique_gdofs_first_appearance;
@@ -534,8 +532,12 @@ void BuildSampleMesh(ParMesh& pmesh, vector<ParFiniteElementSpace*> & fespace,
 }
 
 void GetLocalDofsToLocalElementMap(ParFiniteElementSpace& fespace,
-                                   const vector<int>& dofs, const vector<int>& localNumDofs, const set<int>& elems,
-                                   vector<int>& dofToElem, vector<int>& dofToElemDof, const bool useTDof)
+                                   const vector<int>& dofs,
+                                   const vector<int>& localNumDofs,
+                                   const set<int>& elems,
+                                   vector<int>& dofToElem,
+                                   vector<int>& dofToElemDof,
+                                   const bool useTDof)
 {
     int myid;
     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
@@ -568,8 +570,9 @@ void GetLocalDofsToLocalElementMap(ParFiniteElementSpace& fespace,
                 if (ltdof == dofs[myoffset + i])  // dofs contains true DOF's.
 #endif
                 {
-                    dofToElem[i] =
-                        elId;  // Possibly overwrite another element index, which is fine since we just want any element index.
+                    // Possibly overwrite another element index, which is fine
+                    // since we just want any element index.
+                    dofToElem[i] = elId;
                     dofToElemDof[i] = j;
                 }
             }
@@ -622,8 +625,8 @@ void Set_s2sp(const int myid, const int num_procs, vector<int> const& spNtrue,
     // Gather all the sample DOF to element and element DOF indices.
     vector<int> mySampleToElement(2*local_num_sample_dofs[myid]);
 
-    mySampleToElement.assign(mySampleToElement.size(),
-                             -1);  // Initialize with invalid values, to verify later that everything was set.
+    // Initialize with invalid values, to verify later that everything was set.
+    mySampleToElement.assign(mySampleToElement.size(), -1);
 
     for (int s=0; s<nspaces; ++s)  // Loop over subspaces
     {
@@ -673,9 +676,8 @@ void Set_s2sp(const int myid, const int num_procs, vector<int> const& spNtrue,
         spaceOS[i] = spaceOS[i-1] + spfespace[i-1]->GetVSize();
 
     s2sp.resize(global_num_sample_dofs);
-
-    s2sp.assign(s2sp.size(),
-                -1);  // Initialize with invalid values, to verify later that everything was set.
+    // Initialize with invalid values, to verify later that everything was set.
+    s2sp.assign(s2sp.size(), -1);
 
     for (int s=0; s<nspaces; ++s)
         os[s] = 0;
@@ -852,8 +854,6 @@ void Finish_s2sp_augmented(const int rank, const int nprocs,
                             MFEM_VERIFY(it->second == sp, "");
                         }
                     }
-
-                    //gid[(2*i) + 1] = s2sp[dofs_sub_to_sdofs[s][os + i]];
                 }
             }
 
@@ -863,7 +863,6 @@ void Finish_s2sp_augmented(const int rank, const int nprocs,
                 for (int i=0; i<local_num_dofs_sub[s][p]; ++i)
                 {
                     const int g = allgid[offsets[p] + (2*i)];
-                    //const int sp = allgid[offsets[p] + (2*i) + 1];  ?? //why isn't this used? because it already set gi2sp, which is now used.
 
                     map<int, int>::const_iterator it = gi2sp.find(g);
                     MFEM_VERIFY(it != gi2sp.end() && it->first == g, "");
@@ -883,11 +882,14 @@ void Finish_s2sp_augmented(const int rank, const int nprocs,
             {
                 if (s2sp_[i] == -1)
                     s2sp_[i] = s2sp[i];
-
-                MFEM_VERIFY(s2sp_[i] >= 0 && s2sp_[i] == s2sp[i], "");
+                else
+                    MFEM_VERIFY(s2sp_[i] == s2sp[i], "Consistency check");
             }
         }
     }
+
+    for (int i=0; i<s2sp_.size(); ++i)
+        MFEM_VERIFY(s2sp_[i] >= 0 && s2sp_[i] == s2sp[i], "");
 }
 #endif
 
@@ -1162,7 +1164,8 @@ void SampleMeshManager::SetSampleMaps()
             }
         }
 
-        // For each variable v and each of the num_sample_dofs_per_proc[v][p] samples, set s2sp_var[v][] to be its index in sample_dofs
+        // For each variable v and each of the num_sample_dofs_per_proc[v][p]
+        // samples, set s2sp_var[v][] to be its index in sample_dofs.
         s2sp_var.resize(nvar);
         for (int v=0; v<nvar; ++v)
         {
@@ -1179,7 +1182,8 @@ void SampleMeshManager::SetSampleMaps()
             {
                 const int sample = sample_dofs_var[v][os + j];
 
-                // Note: this has quadratic complexity and could be improved with a std::map<int, int>, but it should not be a bottleneck.
+                // Note: this has quadratic complexity and could be improved
+                // with a std::map<int, int>, but it should not be a bottleneck.
                 int k = -1;
                 int cnt = 0;
                 for (set<int>::const_iterator it = sample_dofs_proc[space][p].begin();
@@ -1440,16 +1444,16 @@ void GatherDistributedMatrixRows_aux(const CAROM::Matrix& B, const int rdim,
                 if (allos0[i] <= all_sprows[sti] && all_sprows[sti] < allos1[i])
                 {
                     MFEM_VERIFY(0 <= st2sp[sti] - ossp && st2sp[sti] - ossp < Bsp.numRows(), "");
+                    // Note that this may redundantly overwrite some rows corresponding to shared DOF's.
                     MPI_Recv(&Bsp(st2sp[sti] - ossp, 0), rdim, MPI_DOUBLE,
-                             i, offsets[i]+j, MPI_COMM_WORLD,
-                             &status);  // Note that this may redundantly overwrite some rows corresponding to shared DOF's.
+                             i, offsets[i]+j, MPI_COMM_WORLD, &status);
                 }
 #else
                 if (allos0[i] <= all_sprows[Bsp_row] && all_sprows[Bsp_row] < allos1[i])
                 {
+                    // Note that this may redundantly overwrite some rows corresponding to shared DOF's.
                     MPI_Recv(&Bsp(st2sp[Bsp_row], 0), rdim, MPI_DOUBLE,
-                             i, offsets[i]+j, MPI_COMM_WORLD,
-                             &status);  // Note that this may redundantly overwrite some rows corresponding to shared DOF's.
+                             i, offsets[i]+j, MPI_COMM_WORLD, &status);
                 }
 #endif
                 ++Bsp_row;
@@ -1623,8 +1627,8 @@ void SampleMeshManager::CreateSampleMesh()
                    MPI_INT, &all_sprows[0], &local_num_stencil_dofs[0],
                    offsets, MPI_INT, MPI_COMM_WORLD);
 
-    // Note that all_stencil_dofs may contain DOF's on different processes that are identical (shared DOF's), which is fine
-    // (see comments in Set_s2sp).
+    // Note that all_stencil_dofs may contain DOF's on different processes that
+    // are identical (shared DOF's), which is fine (see comments in Set_s2sp).
 
     delete [] offsets;
 

From da32af8c0403e7a00f973fff1b5bfda07a433aaa Mon Sep 17 00:00:00 2001
From: Axel Larsson <65452706+axla-io@users.noreply.github.com>
Date: Mon, 8 Jan 2024 15:10:08 -0500
Subject: [PATCH 13/13] Enable EQP for nonlinear elastic example (#218)

* initial commit

* Updated ROM operator class definition
 and input arguments

* EQP hyperreduction setup

* Added hyperreduction until line 1000

* Setting BCs

* Finished implementation in main

* Started ROM operator initialization
Need to review before finishing

* Gone through the code
and implemented what was straightforward

* Mult_hyperreduced

* small fixes
to make consistent with class definition

* removed eqp_lifting code, since it's not necessary

* Started eqp coefficient computation implementation
And made fast calculation TODO

* Updated SetupEQP to make tests run

* Some more fixes for the compiler

* Changed rrdim -> rxdim

* Started SetupEqp_snapshots
for hyperelastic nonlinear operator

* Added sketch for compute reduced eqp

* Code compiles

* Added element function to G row computation

* Testing element vector error

* Debugging.
Vshape not implemented for this class...

* fixed bug

* Explicitly adding model to eqp loop

* segfault fix

* segfault fix 2

* debug segfault

* actual debug fix

* perhaps segfault fix

* small fix

* New test.

* testing

* changed back

* testing back again

* Hopefully

* Finished writing the test.

* Fixed compilation

* Debugging elvect size

* passing by reference

* Fix for element vector

* check all errors

* Synced with hyperelasticintegrator.

* Divide error by two

* changed points to check error

* Clean up

* testing SetupEQPSnapshots

* testing errors for NNLS

* small fix

* removed testing

* Remove normalize constraints

* Fixed segfault

* Reduced EQP is building

* Running but wrong

* Runs!

* Fixed some issues

* rank-myid fix

* Syntax improvements

* fixed bug for v0

* small fix

* Implemented time windowing

* Added command line option for time windowing

* Added EQP command line examples

* Cleaned up code

* Sketch for optimized EQP

* Continued fast eqp implementation

* Improved regular EQP

* V-basis speedup

* Storing DS now

* Solved segmentation fault in non eqp hyperreduction

* Adding QDEIM option to some examples.

* updated gitignore

* Matrix multiplication optimization

* Tested precompute optimization

* Numbering works now.

* Testing .hpp fix again

* Fixed .hpp style issue

* Delete the RomOperator to remove a memory leak

* Updated RomOperator structure
to have less leaks in timestepping

* Cause of segfault error found

* Segfault fixed

* Removed EQP bug

* Add x0 to setupEQP

* add matrix row/col rescaling methods

* normalize NNLS constraints

* Added x0

* Fixed bug in NNLS constraint forming loop.

* Max iters increased

* improved eqp speed

* resetting temp variable now

* Moved temporary variable to the correct position

* Minor formatting changes

* Removed duplicate lines in examples

* build succeeds

* Updated formatting

* Astyle 3.1.

* updated .gitignore

* ++ increment

* Removed ReducedSystemOperator

* Removed unnecessary comments

* Added consts

* Added more ++ increments

* Removed unnecessary HSINV

* Comments

* LibROM native read / write

* Added pointer deletion

* librom runs

* New benchmarks

* elementmatrix first implementation

* Construct em outside loop

* Added element matrices to ROM class

* Reduced EQP fast uses em

* Em on everything

* Debugged EQP

* Formatting

* Removed unnecessary comment

* Removed more unnecessary comments

* Implemented subsampling

* measure elapsed time for EQP

* Measure elapsed time for all methods

* Update .gitignore

* Optimized lifting of FOM dimension in Nonlinear elasticity EQP (#259)

* implementation works and gives speed up

* Removed unnecessary variables

* Final benchmark

---------

Co-authored-by: Dylan Copeland <dylancopeland@gmail.com>
Co-authored-by: Chris Vales <cv1038@unh.edu>
Co-authored-by: Dylan Copeland <copeland11@llnl.gov>
---
 .../prom/nonlinear_elasticity_global_rom.cpp  | 1792 +++++++++++++----
 .../nonlinear_elasticity_global_rom_eqp.hpp   |  137 ++
 2 files changed, 1574 insertions(+), 355 deletions(-)
 create mode 100644 examples/prom/nonlinear_elasticity_global_rom_eqp.hpp

diff --git a/examples/prom/nonlinear_elasticity_global_rom.cpp b/examples/prom/nonlinear_elasticity_global_rom.cpp
index 984f1624d..7936d143c 100644
--- a/examples/prom/nonlinear_elasticity_global_rom.cpp
+++ b/examples/prom/nonlinear_elasticity_global_rom.cpp
@@ -31,7 +31,6 @@
 //
 // Offline phase:
 //      ./nonlinear_elasticity_global_rom -offline -dt 0.01 -tf 5.0 -s 14 -vs 100 -sc 0.9 -id 0
-//
 //      ./nonlinear_elasticity_global_rom -offline -dt 0.01 -tf 5.0 -s 14 -vs 100 -sc 1.1 -id 1
 //
 // Merge phase:
@@ -39,27 +38,49 @@
 //
 // Create FOM comparison data:
 //      ./nonlinear_elasticity_global_rom -offline -dt 0.01 -tf 5.0 -s 14 -vs 100 -sc 1.0 -id 2
+// Output message:
+//      Elapsed time for time integration loop 9.17153
+//      Elapsed time for entire simulation 10.0001
 //
 // Online phase with full sampling:
 //      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -hyp -hrtype gnat -rvdim 40 -rxdim 10 -hdim 71 -nsr 1170 -sc 1.0
 // Output message:
-//      Elapsed time for time integration loop 1.80759
+//      Elapsed time for time integration loop 7.03877
 //      Relative error of ROM position (x) at t_final: 5 is 0.000231698
 //      Relative error of ROM velocity (v) at t_final: 5 is 0.466941
+//      Elapsed time for entire simulation 8.98334
 //
 // Online phase with strong hyper-reduction, using GNAT (over-sampled DEIM):
 //      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -hyp -hrtype gnat -rvdim 40 -rxdim 10 -hdim 71 -nsr 100 -sc 1.0
 // Output message:
-//      Elapsed time for time integration loop 1.0111
+//      Elapsed time for time integration loop 4.18114
 //      Relative error of ROM position (x) at t_final: 5 is 0.00209877
 //      Relative error of ROM velocity (v) at t_final: 5 is 1.39472
+//      Elapsed time for entire simulation 4.52802
 //
 // Online phase with strong hyper-reduction, using QDEIM:
 //      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -hyp -hrtype qdeim -rvdim 40 -rxdim 10 -hdim 71 -nsr 100 -sc 1.0
 // Output message:
-//      Elapsed time for time integration loop 1.02559
+//      Elapsed time for time integration loop 4.2972
 //      Relative error of ROM position (x) at t_final: 5 is 0.00188458
 //      Relative error of ROM velocity (v) at t_final: 5 is 0.978726
+//      Elapsed time for entire simulation 5.33154
+//
+// Online phase with EQP hyper-reduction
+//      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -eqp -ns 2 -ntw 50 -rvdim 40 -rxdim 10 -hdim 1 -sc 1.0
+// Output message:
+//      Elapsed time for time integration loop 2.70006
+//      Relative error of ROM position (x) at t_final: 5 is 0.00169666
+//      Relative error of ROM velocity (v) at t_final: 5 is 17.71
+//      Elapsed time for entire simulation 831.216
+//
+// Online phase with EQP hyper-reduction and subsampling of snapshots
+//      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -eqp -ns 2 -ntw 50 -rvdim 40 -rxdim 10 -hdim 1 -sc 1.0 -sbs 10
+// Output message:
+//      Elapsed time for time integration loop 0.552546
+//      Relative error of ROM position (x) at t_final: 5 is 0.00247479
+//      Relative error of ROM velocity (v) at t_final: 5 is 1.33349
+//      Elapsed time for entire simulation 11.469
 //
 // =================================================================================
 //
@@ -74,27 +95,47 @@
 //
 // Create FOM comparison data:
 //      ./nonlinear_elasticity_global_rom -offline -dt 0.01 -tf 5.0 -s 14 -vs 100 -sc 1.0 -xbo -def-ic -id 2
+// Output message:
+//      Elapsed time for time integration loop 9.61062
+//      Elapsed time for entire simulation 10.3806
 //
 // Online phase with full sampling:
 //      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -hyp -hrtype gnat -rxdim 57 -hdim 183 -nsr 1170 -sc 1.0 -xbo -def-ic
 // Output message:
-//      Elapsed time for time integration loop 18.9874
+//      Elapsed time for time integration loop 8.43858
 //      Relative error of ROM position (x) at t_final: 5 is 7.08272e-05
 //      Relative error of ROM velocity (v) at t_final: 5 is 0.00387647
+//      Elapsed time for entire simulation 24.7245
 //
 // Online phase with strong hyper reduction, using GNAT (over-sampled DEIM):
 //      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -hyp -hrtype gnat -rxdim 2 -hdim 4 -nsr 10 -sc 1.0 -xbo -def-ic
 // Output message:
-//      Elapsed time for time integration loop 0.120194
+//      Elapsed time for time integration loop 0.507339
 //      Relative error of ROM position (x) at t_final: 5 is 0.0130818
 //      Relative error of ROM velocity (v) at t_final: 5 is 0.979978
+//      Elapsed time for entire simulation 0.583618
 //
 // Online phase with strong hyper reduction, using QDEIM:
 //      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -hyp -hrtype qdeim -rxdim 2 -hdim 4 -nsr 10 -sc 1.0 -xbo -def-ic
 // Output message:
-//      Elapsed time for time integration loop 0.10806
-//      Relative error of ROM position (x) at t_final: 5 is 0.0108709
-//      Relative error of ROM velocity (v) at t_final: 5 is 1.30704
+//      Elapsed time for time integration loop 0.504657
+//      Relative error of ROM position (x) at t_final: 5 is 0.00883079
+//      Relative error of ROM velocity (v) at t_final: 5 is 1.26721
+//      Elapsed time for entire simulation 0.581446
+//
+// Online phase with EQP hyper reduction:
+//      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -eqp -ns 2 -rxdim 2 -hdim 1 -ntw 25 -sc 1.00 -xbo -def-ic
+// Elapsed time for time integration loop 0.144368
+//      Relative error of ROM position (x) at t_final: 5 is 0.0189361
+//      Relative error of ROM velocity (v) at t_final: 5 is 0.830724
+//      Elapsed time for entire simulation 4.97996
+//
+// Online phase with EQP hyper reduction and subsampling of snapshots:
+//      ./nonlinear_elasticity_global_rom -online -dt 0.01 -tf 5.0 -s 14 -vs 100 -eqp -ns 2 -rxdim 2 -hdim 1 -ntw 25 -sc 1.00 -xbo -def-ic -sbs 10
+// Elapsed time for time integration loop 0.0141128
+//      Relative error of ROM position (x) at t_final: 5 is 0.0243952
+//      Relative error of ROM velocity (v) at t_final: 5 is 0.986434
+//      Elapsed time for entire simulation 0.782108
 //
 // This example runs in parallel with MPI, by using the same number of MPI ranks
 // in all phases (offline, merge, online).
@@ -103,6 +144,7 @@
 #include "linalg/Vector.h"
 #include "linalg/BasisGenerator.h"
 #include "linalg/BasisReader.h"
+#include "linalg/NNLS.h"
 #include "hyperreduction/Hyperreduction.h"
 #include "mfem/SampleMesh.hpp"
 #include "mfem/Utilities.hpp"
@@ -114,44 +156,7 @@
 using namespace std;
 using namespace mfem;
 
-class ReducedSystemOperator;
-
-class HyperelasticOperator : public TimeDependentOperator
-{
-
-protected:
-    ParBilinearForm *M, *S;
-
-    CGSolver M_solver;    // Krylov solver for inverting the mass matrix M
-    HypreSmoother M_prec; // Preconditioner for the mass matrix M
-
-public:
-    HyperelasticOperator(ParFiniteElementSpace& f, Array<int>& ess_tdof_list_,
-                         double visc, double mu, double K);
-
-    /// Compute the right-hand side of the ODE system.
-    virtual void Mult(const Vector& vx, Vector& dvx_dt) const;
-
-    double ElasticEnergy(const ParGridFunction& x) const;
-    double KineticEnergy(const ParGridFunction& v) const;
-    void GetElasticEnergyDensity(const ParGridFunction& x,
-                                 ParGridFunction& w) const;
-
-    mutable Vector H_sp;
-    mutable Vector dvxdt_sp;
-
-    ParFiniteElementSpace &fespace;
-    double viscosity;
-    Array<int> ess_tdof_list;
-    ParNonlinearForm* H;
-    HyperelasticModel* model;
-    mutable Vector z; // auxiliary vector
-    mutable Vector z2; // auxiliary vector
-    HypreParMatrix* Mmat; // Mass matrix from ParallelAssemble()
-    HypreParMatrix Smat;
-
-    virtual ~HyperelasticOperator();
-};
+#include "nonlinear_elasticity_global_rom_eqp.hpp"
 
 class RomOperator : public TimeDependentOperator
 {
@@ -160,70 +165,99 @@ class RomOperator : public TimeDependentOperator
     int nsamp_H;
     double current_dt;
     bool oversampling;
-    CAROM::Matrix* V_v_sp, * V_x_sp;
-    CAROM::Matrix* V_v_sp_dist;
-    CAROM::Vector* psp_librom, * psp_v_librom;
-    Vector* psp;
-    Vector* psp_x;
-    Vector* psp_v;
+    CAROM::Matrix *V_v_sp, *V_x_sp;
+    CAROM::Vector *V_xTv_0_sp;
+    CAROM::Matrix *V_xTV_v_sp;
+    CAROM::Matrix *V_v_sp_dist;
+    CAROM::Vector *psp_librom, *psp_v_librom;
+    CAROM::Vector *Vx_librom_temp;
+    Vector *psp;
+    Vector *psp_x;
+    Vector *psp_v;
+    Vector *Vx_temp;
     mutable Vector zH;
     mutable CAROM::Vector zX;
+    mutable Vector zX_MFEM;
     mutable CAROM::Vector zN;
-    const CAROM::Matrix* Hsinv;
-    mutable CAROM::Vector* z_librom;
+    const CAROM::Matrix *Hsinv;
+    mutable CAROM::Vector *z_librom;
     mutable Vector z;
     mutable Vector z_x;
     mutable Vector z_v;
 
+    CAROM::Vector *v0_fom_librom, *v0_librom, *V_xTv_0;
+    CAROM::Matrix *V_xTV_v;
+
     bool hyperreduce;
     bool x_base_only;
 
     CAROM::Vector *pfom_librom, *pfom_v_librom;
-    Vector* pfom;
-    Vector* pfom_x;
-    Vector* pfom_v;
-    mutable Vector* zfom_x;
-    mutable Vector* zfom_v;
-    CAROM::Vector* zfom_x_librom;
+    Vector *pfom;
+    Vector *pfom_x;
+    Vector *pfom_v;
+    mutable Vector *zfom_x;
+    mutable Vector *zfom_v;
+    CAROM::Vector *zfom_x_librom;
+
+    CAROM::SampleMeshManager *smm;
+
+    CAROM::Vector *z_v_librom;
+    CAROM::Vector *z_x_librom;
+
+    // Data for EQP
+    bool eqp;
+    const IntegrationRule *ir_eqp;
+    std::vector<double> eqp_rw;
+    std::vector<int> eqp_qp;
+    Vector eqp_coef;
+    Vector eqp_DS_coef;
+    const bool fastIntegration = true;
+    ElemMatrices *em;
 
-    CAROM::SampleMeshManager* smm;
+    CAROM::Matrix eqp_lifting;
+    std::vector<int> eqp_liftDOFs;
+    mutable CAROM::Vector eqp_lifted;
 
-    CAROM::Vector* z_v_librom;
-    CAROM::Vector* z_x_librom;
+    int rank;
+
+    NeoHookeanModel *model;
 
 protected:
-    CAROM::Matrix* S_hat;
-    CAROM::Vector* S_hat_v0;
-    Vector* S_hat_v0_temp;
-    CAROM::Vector* S_hat_v0_temp_librom;
-    CAROM::Matrix* M_hat;
-    CAROM::Matrix* M_hat_inv;
+    CAROM::Matrix *S_hat;
+    CAROM::Vector *S_hat_v0;
+    Vector *S_hat_v0_temp;
+    CAROM::Vector *S_hat_v0_temp_librom;
+    CAROM::Matrix *M_hat;
+    CAROM::Matrix *M_hat_inv;
 
-    const CAROM::Matrix* U_H;
+    const CAROM::Matrix *U_H;
 
-    HyperelasticOperator* fomSp;
+    HyperelasticOperator *fomSp;
 
     CGSolver M_hat_solver;    // Krylov solver for inverting the reduced mass matrix M_hat
     HypreSmoother M_hat_prec; // Preconditioner for the reduced mass matrix M_hat
 
 public:
-    HyperelasticOperator* fom;
-
-    RomOperator(HyperelasticOperator* fom_,
-                HyperelasticOperator* fomSp_, const int rvdim_, const int rxdim_,
-                const int hdim_,CAROM::SampleMeshManager* smm_, const Vector* v0_,
-                const Vector* x0_,const Vector v0_fom_,const CAROM::Matrix* V_v_,
-                const CAROM::Matrix* V_x_, const CAROM::Matrix* U_H_,
-                const CAROM::Matrix* Hsinv_,const int myid, const bool oversampling_,
-                const bool hyperreduce_,const bool x_base_only_);
-
-    virtual void Mult(const Vector& y, Vector& dy_dt) const;
-    void Mult_Hyperreduced(const Vector& y, Vector& dy_dt) const;
-    void Mult_FullOrder(const Vector& y, Vector& dy_dt) const;
+    HyperelasticOperator *fom;
+
+    RomOperator(HyperelasticOperator *fom_,
+                HyperelasticOperator *fomSp_, const int rvdim_, const int rxdim_,
+                const int hdim_, CAROM::SampleMeshManager *smm_, const Vector *v0_,
+                const Vector *x0_, const Vector v0_fom_, const CAROM::Matrix *V_v_,
+                const CAROM::Matrix *V_x_, const CAROM::Matrix *U_H_,
+                const CAROM::Matrix *Hsinv_, const int myid, const bool oversampling_,
+                const bool hyperreduce_, const bool x_base_only_, const bool use_eqp,
+                CAROM::Vector *eqpSol,
+                const IntegrationRule *ir_eqp_, NeoHookeanModel *model_);
+
+    virtual void Mult(const Vector &y, Vector &dy_dt) const;
+    void Mult_Hyperreduced(const Vector &y, Vector &dy_dt) const;
+    void Mult_FullOrder(const Vector &y, Vector &dy_dt) const;
+    void SetEQP(CAROM::Vector *eqpSol);
 
     CAROM::Matrix V_v, V_x, V_vTU_H;
-    const Vector* x0;
-    const Vector* v0;
+    const Vector *x0;
+    const Vector *v0;
     Vector v0_fom;
 
     virtual ~RomOperator();
@@ -234,33 +268,33 @@ class RomOperator : public TimeDependentOperator
 class ElasticEnergyCoefficient : public Coefficient
 {
 private:
-    HyperelasticModel& model;
-    const ParGridFunction& x;
-    DenseMatrix            J;
+    HyperelasticModel &model;
+    const ParGridFunction &x;
+    DenseMatrix J;
 
 public:
-    ElasticEnergyCoefficient(HyperelasticModel& m, const ParGridFunction& x_)
-        : model(m), x(x_) { }
-    virtual double Eval(ElementTransformation& T, const IntegrationPoint& ip);
-    virtual ~ElasticEnergyCoefficient() { }
+    ElasticEnergyCoefficient(HyperelasticModel &m, const ParGridFunction &x_)
+        : model(m), x(x_) {}
+    virtual double Eval(ElementTransformation &T, const IntegrationPoint &ip);
+    virtual ~ElasticEnergyCoefficient() {}
 };
 
-void InitialDeformationIC1(const Vector& x, Vector& y);
+void InitialDeformationIC1(const Vector &x, Vector &y);
 
-void InitialVelocityIC1(const Vector& x, Vector& v);
+void InitialVelocityIC1(const Vector &x, Vector &v);
 
-void InitialDeformationIC2(const Vector& x, Vector& y);
+void InitialDeformationIC2(const Vector &x, Vector &y);
 
-void InitialVelocityIC2(const Vector& x, Vector& v);
+void InitialVelocityIC2(const Vector &x, Vector &v);
 
-void visualize(ostream& out, ParMesh* mesh, ParGridFunction* deformed_nodes,
-               ParGridFunction* field, const char* field_name = NULL,
+void visualize(ostream &out, ParMesh *mesh, ParGridFunction *deformed_nodes,
+               ParGridFunction *field, const char *field_name = NULL,
                bool init_vis = false);
 
 // TODO: move this to the library?
-CAROM::Matrix* GetFirstColumns(const int N, const CAROM::Matrix* A)
+CAROM::Matrix *GetFirstColumns(const int N, const CAROM::Matrix *A)
 {
-    CAROM::Matrix* S = new CAROM::Matrix(A->numRows(), std::min(N, A->numColumns()),
+    CAROM::Matrix *S = new CAROM::Matrix(A->numRows(), std::min(N, A->numColumns()),
                                          A->distributed());
     for (int i = 0; i < S->numRows(); ++i)
     {
@@ -273,33 +307,45 @@ CAROM::Matrix* GetFirstColumns(const int N, const CAROM::Matrix* A)
 }
 
 // TODO: move this to the library?
-void BasisGeneratorFinalSummary(CAROM::BasisGenerator* bg,
-                                const double energyFraction, int& cutoff,
+void BasisGeneratorFinalSummary(CAROM::BasisGenerator *bg,
+                                const double energyFraction, int &cutoff,
                                 const std::string cutoffOutputPath)
 {
     const int rom_dim = bg->getSpatialBasis()->numColumns();
-    const CAROM::Vector* sing_vals = bg->getSingularValues();
+    const CAROM::Vector *sing_vals = bg->getSingularValues();
 
     MFEM_VERIFY(rom_dim <= sing_vals->dim(), "");
 
     double sum = 0.0;
-    for (int sv = 0; sv < sing_vals->dim(); ++sv) {
+    for (int sv = 0; sv < sing_vals->dim(); ++sv)
+    {
         sum += (*sing_vals)(sv);
     }
 
-    vector<double> energy_fractions = { 0.9999999, 0.999999, 0.99999, 0.9999, 0.999, 0.99, 0.9 };
+    vector<double> energy_fractions = {0.9999999, 0.999999, 0.99999, 0.9999, 0.999, 0.99, 0.9};
     bool reached_cutoff = false;
 
     ofstream outfile(cutoffOutputPath);
 
     double partialSum = 0.0;
-    for (int sv = 0; sv < sing_vals->dim(); ++sv) {
+    stringstream prec;
+    int ctr = 1;
+    char buffer[100];
+    for (int sv = 0; sv < sing_vals->dim(); ++sv)
+    {
         partialSum += (*sing_vals)(sv);
         for (int i = energy_fractions.size() - 1; i >= 0; i--)
         {
             if (partialSum / sum > energy_fractions[i])
             {
-                outfile << "For energy fraction: " << energy_fractions[i] << ", take first "
+                // Format string
+                prec.str(std::string());
+                prec << "%." << ctr << "f";
+                sprintf(buffer, prec.str().c_str(), energy_fractions[i]);
+                ctr++;
+
+                // Output string
+                outfile << "For energy fraction: " << string(buffer) << ", take first "
                         << sv + 1 << " of " << sing_vals->dim() << " basis vectors" << endl;
                 energy_fractions.pop_back();
             }
@@ -316,7 +362,8 @@ void BasisGeneratorFinalSummary(CAROM::BasisGenerator* bg,
         }
     }
 
-    if (!reached_cutoff) cutoff = sing_vals->dim();
+    if (!reached_cutoff)
+        cutoff = sing_vals->dim();
     outfile << "Take first " << cutoff << " of " << sing_vals->dim() <<
             " basis vectors" << endl;
     outfile.close();
@@ -347,8 +394,33 @@ void MergeBasis(const int dimFOM, const int nparam, const int max_num_snapshots,
                                "mergedSV_" + name + ".txt");
 }
 
+const CAROM::Matrix *GetSnapshotMatrix(const int dimFOM, const int nparam,
+                                       const int max_num_snapshots, std::string name)
+{
+    MFEM_VERIFY(nparam > 0, "Must specify a positive number of parameter sets");
+
+    bool update_right_SV = false;
+    bool isIncremental = false;
+
+    CAROM::Options options(dimFOM, nparam * max_num_snapshots, 1, update_right_SV);
+    CAROM::BasisGenerator generator(options, isIncremental, "basis" + name);
+
+    for (int paramID = 0; paramID < nparam; ++paramID)
+    {
+        std::string snapshot_filename = "basis" + std::to_string(
+                                            paramID) + "_" + name + "_snapshot";
+        generator.loadSamples(snapshot_filename, "snapshot");
+    }
+
+    // TODO: this deep copy is inefficient, just to get around generator owning the matrix.
+    CAROM::Matrix *s = new CAROM::Matrix(*generator.getSnapshotMatrix());
+
+    return s;
+    // return generator.getSnapshotMatrix();  // BUG: the matrix is deleted when generator goes out of scope.
+}
+
 // TODO: remove this by making online computation serial?
-void BroadcastUndistributedRomVector(CAROM::Vector* v)
+void BroadcastUndistributedRomVector(CAROM::Vector *v)
 {
     const int N = v->dim();
 
@@ -358,21 +430,21 @@ void BroadcastUndistributedRomVector(CAROM::Vector* v)
 
     MFEM_VERIFY(d != 0, "");
 
-    for (int i=0; i<N; ++i)
+    for (int i = 0; i < N; ++i)
         d[i] = (*v)(i);
 
     MPI_Bcast(d, N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
 
-    for (int i=0; i<N; ++i)
+    for (int i = 0; i < N; ++i)
         (*v)(i) = d[i];
 
-    delete [] d;
+    delete[] d;
 }
 
 // Scaling factor for parameterization
 double s = 1.0;
 
-int main(int argc, char* argv[])
+int main(int argc, char *argv[])
 {
     // 1. Initialize MPI.
     int num_procs, myid;
@@ -381,7 +453,7 @@ int main(int argc, char* argv[])
     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
 
     // 2. Parse command-line options.
-    const char* mesh_file = "../data/beam-quad.mesh";
+    const char *mesh_file = "../data/beam-quad.mesh";
     int ser_ref_levels = 2;
     int par_ref_levels = 0;
     int order = 2;
@@ -401,6 +473,8 @@ int main(int argc, char* argv[])
     bool offline = false;
     bool merge = false;
     bool online = false;
+    bool use_eqp = false;
+    bool writeSampleMesh = false;
     bool hyperreduce = true;
     bool x_base_only = false;
     int num_samples_req = -1;
@@ -414,6 +488,16 @@ int main(int argc, char* argv[])
     int rvdim = -1;
     int hdim = -1;
 
+    bool preconditionNNLS = false;
+    double tolNNLS = 1.0e-14;
+    int maxNNLSnnz = 0;
+
+    // Number of time windows
+    int n_windows = 0;
+
+    // Subsampling steps
+    int snap_step = 1;
+
     OptionsParser args(argc, argv);
     args.AddOption(&mesh_file, "-m", "--mesh",
                    "Mesh file to use.");
@@ -476,6 +560,20 @@ int main(int argc, char* argv[])
                    "--velocity-ic",
                    "Use a deformation-, or velocity initial condition. Default is velocity IC.");
     args.AddOption(&s, "-sc", "--scaling", "Scaling factor for initial condition.");
+    args.AddOption(&use_eqp, "-eqp", "--eqp", "-no-eqp", "--no-eqp",
+                   "Use EQP instead of DEIM for the hyperreduction.");
+    args.AddOption(&writeSampleMesh, "-smesh", "--sample-mesh", "-no-smesh",
+                   "--no-sample-mesh", "Write the sample mesh to file.");
+    args.AddOption(&preconditionNNLS, "-preceqp", "--preceqp", "-no-preceqp",
+                   "--no-preceqp", "Precondition the NNLS system for EQP.");
+    args.AddOption(&tolNNLS, "-tolnnls", "--tol-nnls",
+                   "Tolerance for NNLS solver.");
+    args.AddOption(&maxNNLSnnz, "-maxnnls", "--max-nnls",
+                   "Maximum nnz for NNLS");
+    args.AddOption(&n_windows, "-ntw", "--n-time-windows",
+                   "Number of time windows. Default is 0");
+    args.AddOption(&snap_step, "-sbs", "--subsampling-step",
+                   "Subsamble every n-th snapshot. Default is 1, meaning no subsampling");
 
     args.Parse();
     if (!args.Good())
@@ -502,13 +600,13 @@ int main(int argc, char* argv[])
     // 3. Read the serial mesh from the given mesh file on all processors. We can
     //    handle triangular, quadrilateral, tetrahedral and hexahedral meshes
     //    with the same code.
-    Mesh* mesh = new Mesh(mesh_file, 1, 1);
+    Mesh *mesh = new Mesh(mesh_file, 1, 1);
     int dim = mesh->Dimension();
 
     // 4. Define the ODE solver used for time integration. Several implicit
     //    singly diagonal implicit Runge-Kutta (SDIRK) methods, as well as
     //    explicit Runge-Kutta methods are available.
-    ODESolver* ode_solver;
+    ODESolver *ode_solver;
     switch (ode_solver_type)
     {
     // Implicit L-stable methods
@@ -553,7 +651,7 @@ int main(int argc, char* argv[])
     // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine
     //    this mesh further in parallel to increase the resolution. Once the
     //    parallel mesh is defined, the serial mesh can be deleted.
-    ParMesh* pmesh = new ParMesh(MPI_COMM_WORLD, *mesh);
+    ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh);
     delete mesh;
     for (int lev = 0; lev < par_ref_levels; lev++)
     {
@@ -626,8 +724,8 @@ int main(int argc, char* argv[])
 
     // 8. Set the initial conditions for v_gf, x_gf and vx, and define the
     //    boundary conditions on a beam-like mesh (see description above).
-    VectorFunctionCoefficient* velo = 0;
-    VectorFunctionCoefficient* deform = 0;
+    VectorFunctionCoefficient *velo = 0;
+    VectorFunctionCoefficient *deform = 0;
 
     if (def_ic)
     {
@@ -668,24 +766,25 @@ int main(int argc, char* argv[])
     BlockVector vx_diff = BlockVector(vx);
 
     // Reduced order solution
-    Vector* wMFEM = 0;
-    CAROM::Vector* w = 0;
-    CAROM::Vector* w_v = 0;
-    CAROM::Vector* w_x = 0;
+    Vector *wMFEM = 0;
+    CAROM::Vector *w = 0;
+    CAROM::Vector *w_v = 0;
+    CAROM::Vector *w_x = 0;
 
     // Initialize reconstructed solution
-    Vector * v_rec = new Vector(v_gf.GetTrueVector());
-    Vector * x_rec = new Vector(x_gf.GetTrueVector());
+    Vector *v_rec = new Vector(v_gf.GetTrueVector());
+    Vector *x_rec = new Vector(x_gf.GetTrueVector());
 
-    CAROM::Vector* v_rec_librom = new CAROM::Vector(v_rec->GetData(), v_rec->Size(),
+    CAROM::Vector *v_rec_librom = new CAROM::Vector(v_rec->GetData(), v_rec->Size(),
             true, false);
-    CAROM::Vector* x_rec_librom = new CAROM::Vector(x_rec->GetData(), x_rec->Size(),
+    CAROM::Vector *x_rec_librom = new CAROM::Vector(x_rec->GetData(), x_rec->Size(),
             true, false);
 
     // 9. Initialize the hyperelastic operator, the GLVis visualization and print
     //    the initial energies.
     HyperelasticOperator oper(fespace, ess_tdof_list, visc, mu, K);
-    HyperelasticOperator* soper = 0;
+    NeoHookeanModel *model = new NeoHookeanModel(mu, K);
+    HyperelasticOperator *soper = 0;
 
     // Fill dvdt and dxdt
     Vector dvxdt(true_size * 2);
@@ -696,7 +795,7 @@ int main(int argc, char* argv[])
     if (visualization)
     {
         char vishost[] = "localhost";
-        int  visport = 19916;
+        int visport = 19916;
         vis_v.open(vishost, visport);
         vis_v.precision(8);
         visualize(vis_v, pmesh, &x_gf, &v_gf, "Velocity", true);
@@ -714,7 +813,7 @@ int main(int argc, char* argv[])
 
     // Create data collection for solution output: either VisItDataCollection for
     // ascii data files, or SidreDataCollection for binary data files.
-    DataCollection* dc = NULL;
+    DataCollection *dc = NULL;
     if (visit)
     {
         if (offline)
@@ -743,11 +842,12 @@ int main(int argc, char* argv[])
     }
 
     // 10. Create pROM object.
-    CAROM::BasisGenerator* basis_generator_v = 0;
-    CAROM::BasisGenerator* basis_generator_x = 0;
-    CAROM::BasisGenerator* basis_generator_H = 0;
+    CAROM::BasisGenerator *basis_generator_v = 0;
+    CAROM::BasisGenerator *basis_generator_x = 0;
+    CAROM::BasisGenerator *basis_generator_H = 0;
 
-    if (offline) {
+    if (offline)
+    {
         CAROM::Options options(fespace.GetTrueVSize(), max_num_snapshots, 1,
                                update_right_SV);
 
@@ -764,23 +864,30 @@ int main(int argc, char* argv[])
                 basisFileName + "_H");
     }
 
-    RomOperator* romop = 0;
+    RomOperator *romop = 0;
 
-    const CAROM::Matrix* BV_librom = 0;
-    const CAROM::Matrix* BX_librom = 0;
-    const CAROM::Matrix* H_librom = 0;
-    const CAROM::Matrix* Hsinv = 0;
+    const CAROM::Matrix *BV_librom = 0;
+    const CAROM::Matrix *BX_librom = 0;
+    const CAROM::Matrix *H_librom = 0;
+    const CAROM::Matrix *Hsinv = 0;
 
     int nsamp_H = -1;
 
-    CAROM::SampleMeshManager* smm = nullptr;
+    CAROM::SampleMeshManager *smm = nullptr;
+
+    CAROM::Vector *eqpSol = nullptr;
+    CAROM::Vector *eqpSol_S = nullptr;
+
+    CAROM::Vector *window_ids = nullptr;
+    CAROM::Vector *load_eqpsol = new CAROM::Vector(1,
+            false); // Will be resized later
 
     // 11. Initialize ROM operator
     // I guess most of this should be done on id =0
     if (online)
     {
         // Read bases
-        CAROM::BasisReader* readerV = 0;
+        CAROM::BasisReader *readerV = 0;
 
         if (x_base_only)
         {
@@ -829,7 +936,7 @@ int main(int argc, char* argv[])
         {
             hdim = H_librom->numColumns();
         }
-
+        CAROM::Matrix *Hsinv = new CAROM::Matrix(hdim, hdim, false);
         MFEM_VERIFY(H_librom->numColumns() >= hdim, "");
 
         if (H_librom->numColumns() > hdim)
@@ -838,52 +945,84 @@ int main(int argc, char* argv[])
         if (myid == 0)
             printf("reduced H dim = %d\n", hdim);
 
-        vector<int> num_sample_dofs_per_proc(num_procs);
+        // Setup hyperreduction, using either EQP or sampled DOFs and a sample mesh.
+        ParFiniteElementSpace *sp_XV_space;
+        const IntegrationRule *ir0 = NULL;
 
-        if (num_samples_req != -1)
+        if (ir0 == NULL)
         {
-            nsamp_H = num_samples_req;
+            const FiniteElement &fe = *fespace.GetFE(0);
+            ir0 = &(IntRules.Get(fe.GetGeomType(), 2 * fe.GetOrder() + 3));
         }
-        else
-        {
-            nsamp_H = hdim;
-        }
-
-        CAROM::Matrix* Hsinv = new CAROM::Matrix(nsamp_H, hdim, false);
-        vector<int> sample_dofs(nsamp_H);
 
-        // Setup hyperreduction using DEIM, GNAT, or S-OPT
-        CAROM::Hyperreduction hr(samplingType);
-        hr.ComputeSamples(H_librom,
-                          hdim,
-                          sample_dofs,
-                          num_sample_dofs_per_proc,
-                          *Hsinv,
-                          myid,
-                          num_procs,
-                          nsamp_H);
+        // Timewindowing setup for EQP
+        int n_step = int(t_final / dt);
 
-        // Construct sample mesh
-        const int nspaces = 1;
-        std::vector<ParFiniteElementSpace*> spfespace(nspaces);
-        spfespace[0] = &fespace;
-
-        ParFiniteElementSpace* sp_XV_space;
-
-        smm = new CAROM::SampleMeshManager(spfespace);
+        if (n_windows > 1)
+        {
+            window_ids = new CAROM::Vector(n_windows + 1, false);
+            get_window_ids(n_step, n_windows, window_ids);
+        }
 
-        vector<int> sample_dofs_empty;
-        vector<int> num_sample_dofs_per_proc_empty;
-        num_sample_dofs_per_proc_empty.assign(num_procs, 0);
+        if (use_eqp)
+        {
+            // EQP setup
+            eqpSol = new CAROM::Vector(ir0->GetNPoints() * fespace.GetNE(), true);
+            SetupEQP_snapshots(ir0, myid, &fespace, nsets, BV_librom,
+                               GetSnapshotMatrix(fespace.GetTrueVSize(), nsets, max_num_snapshots, "X"),
+                               vx0.GetBlock(0),
+                               preconditionNNLS, tolNNLS, maxNNLSnnz, model, *eqpSol, window_ids, snap_step);
+
+            if (writeSampleMesh)
+                WriteMeshEQP(pmesh, myid, ir0->GetNPoints(), *eqpSol);
+        }
+        else
+        {
+            vector<int> num_sample_dofs_per_proc(num_procs);
 
-        smm->RegisterSampledVariable("V", 0, sample_dofs,
-                                     num_sample_dofs_per_proc);
-        smm->RegisterSampledVariable("X", 0, sample_dofs,
-                                     num_sample_dofs_per_proc);
-        smm->RegisterSampledVariable("H", 0, sample_dofs,
-                                     num_sample_dofs_per_proc);
+            if (num_samples_req != -1)
+            {
+                nsamp_H = num_samples_req;
+            }
+            else
+            {
+                nsamp_H = hdim;
+            }
 
-        smm->ConstructSampleMesh();
+            Hsinv->setSize(nsamp_H, hdim);
+            vector<int> sample_dofs(nsamp_H);
+
+            // Setup hyperreduction using DEIM, GNAT, or S-OPT
+            CAROM::Hyperreduction hr(samplingType);
+            hr.ComputeSamples(H_librom,
+                              hdim,
+                              sample_dofs,
+                              num_sample_dofs_per_proc,
+                              *Hsinv,
+                              myid,
+                              num_procs,
+                              nsamp_H);
+
+            // Construct sample mesh
+            const int nspaces = 1;
+            std::vector<ParFiniteElementSpace *> spfespace(nspaces);
+            spfespace[0] = &fespace;
+
+            smm = new CAROM::SampleMeshManager(spfespace);
+
+            vector<int> sample_dofs_empty;
+            vector<int> num_sample_dofs_per_proc_empty;
+            num_sample_dofs_per_proc_empty.assign(num_procs, 0);
+
+            smm->RegisterSampledVariable("V", 0, sample_dofs,
+                                         num_sample_dofs_per_proc);
+            smm->RegisterSampledVariable("X", 0, sample_dofs,
+                                         num_sample_dofs_per_proc);
+            smm->RegisterSampledVariable("H", 0, sample_dofs,
+                                         num_sample_dofs_per_proc);
+
+            smm->ConstructSampleMesh();
+        }
 
         w = new CAROM::Vector(rxdim + rvdim, false);
         w_v = new CAROM::Vector(rvdim, false);
@@ -895,12 +1034,13 @@ int main(int argc, char* argv[])
         wMFEM = new Vector(&((*w)(0)), rxdim + rvdim);
 
         // Initial conditions
-        Vector*  w_v0 = 0;
-        Vector*  w_x0 = 0;
+        Vector *w_v0 = 0;
+        Vector *w_x0 = 0;
 
         int sp_size = 0;
+        Array<int> ess_tdof_list_sp; // Initialize sample essential boundary conditions
 
-        if (myid == 0)
+        if (myid == 0 && !use_eqp)
         {
             sp_XV_space = smm->GetSampleFESpace(0);
 
@@ -921,8 +1061,8 @@ int main(int argc, char* argv[])
             sp_v_gf.MakeTRef(sp_XV_space, sp_vx, sp_offset[0]);
             sp_x_gf.MakeTRef(sp_XV_space, sp_vx, sp_offset[1]);
 
-            VectorFunctionCoefficient* velo = 0;
-            VectorFunctionCoefficient* deform = 0;
+            VectorFunctionCoefficient *velo = 0;
+            VectorFunctionCoefficient *deform = 0;
 
             if (def_ic)
             {
@@ -954,61 +1094,72 @@ int main(int argc, char* argv[])
             w_x0 = new Vector(sp_x_gf.GetTrueVector());
         }
 
-        // Convert essential boundary list from FOM mesh to sample mesh
-        // Create binary list where 1 means essential boundary element, 0 means nonessential.
-        CAROM::Matrix Ess_mat(true_size, 1, true);
-        for (size_t i = 0; i < true_size; i++)
+        if (!use_eqp)
         {
-            Ess_mat(i,0) = 0;
-            for (size_t j = 0; j < ess_tdof_list.Size(); j++)
+            // Convert essential boundary list from FOM mesh to sample mesh
+            // Create binary list where 1 means essential boundary element, 0 means nonessential.
+            CAROM::Matrix Ess_mat(true_size, 1, true);
+            for (size_t i = 0; i < true_size; i++)
             {
-                if (ess_tdof_list[j] == i )
+                Ess_mat(i, 0) = 0;
+                for (size_t j = 0; j < ess_tdof_list.Size(); j++)
                 {
-                    Ess_mat(i,0) = 1;
+                    if (ess_tdof_list[j] == i)
+                    {
+                        Ess_mat(i, 0) = 1;
+                    }
                 }
             }
-        }
 
-        // Project binary FOM list onto sampling space
-        MPI_Bcast(&sp_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
-        CAROM::Matrix Ess_mat_sp(sp_size, 1, false);
-        smm->GatherDistributedMatrixRows("X", Ess_mat, 1, Ess_mat_sp);
+            // Project binary FOM list onto sampling space
+            MPI_Bcast(&sp_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
+            CAROM::Matrix Ess_mat_sp(sp_size, 1, false);
+            smm->GatherDistributedMatrixRows("X", Ess_mat, 1, Ess_mat_sp);
 
-        // Count number of true elements in new matrix
-        int num_ess_sp = 0;
+            // Count number of true elements in new matrix
+            int num_ess_sp = 0;
 
-        for (size_t i = 0; i < sp_size; i++)
-        {
-            if (Ess_mat_sp(i,0) == 1)
+            for (size_t i = 0; i < sp_size; i++)
             {
-                num_ess_sp += 1;
+                if (Ess_mat_sp(i, 0) == 1)
+                {
+                    num_ess_sp++;
+                }
             }
-        }
 
-        // Initialize essential dof list in sampling space
-        Array<int> ess_tdof_list_sp(num_ess_sp);
+            // Set essential dof list in sampling space size
+            ess_tdof_list_sp.SetSize(num_ess_sp);
 
-        // Add indices to list
-        int ctr = 0;
-        for (size_t i = 0; i < sp_size; i++)
-        {
-            if (Ess_mat_sp(i,0) == 1)
+            // Add indices to list
+            int ctr = 0;
+            for (size_t i = 0; i < sp_size; i++)
             {
-                ess_tdof_list_sp[ctr] = i;
-                ctr += 1;
+                if (Ess_mat_sp(i, 0) == 1)
+                {
+                    ess_tdof_list_sp[ctr] = i;
+                    ctr++;
+                }
             }
         }
 
         if (myid == 0)
         {
-            // Define operator in sample space
-            soper = new HyperelasticOperator(*sp_XV_space, ess_tdof_list_sp, visc, mu, K);
+            if (!use_eqp)
+            {
+                // Define operator in sample space
+                soper = new HyperelasticOperator(*sp_XV_space, ess_tdof_list_sp, visc, mu, K);
+            }
+            else
+            {
+                soper = new HyperelasticOperator(fespace, ess_tdof_list, visc, mu, K);
+            }
         }
 
-        if (hyperreduce)
-        {   romop = new RomOperator(&oper, soper, rvdim, rxdim, hdim, smm, w_v0, w_x0,
+        if (!use_eqp)
+        {
+            romop = new RomOperator(&oper, soper, rvdim, rxdim, hdim, smm, w_v0, w_x0,
                                     vx0.GetBlock(0), BV_librom, BX_librom, H_librom, Hsinv, myid,
-                                    num_samples_req != -1, hyperreduce, x_base_only);
+                                    num_samples_req != -1, hyperreduce, x_base_only, use_eqp, eqpSol, ir0, model);
         }
         else
         {
@@ -1016,16 +1167,16 @@ int main(int argc, char* argv[])
                                     &(vx0.GetBlock(0)),
                                     &(vx0.GetBlock(1)), vx0.GetBlock(0), BV_librom, BX_librom, H_librom, Hsinv,
                                     myid,
-                                    num_samples_req != -1, hyperreduce, x_base_only);
+                                    num_samples_req != -1, hyperreduce, x_base_only, use_eqp, eqpSol, ir0, model);
         }
 
         // Print lifted initial energies
         BroadcastUndistributedRomVector(w);
 
-        for (int i=0; i<rvdim; ++i)
+        for (int i = 0; i < rvdim; ++i)
             (*w_v)(i) = (*w)(i);
 
-        for (int i=0; i<rxdim; ++i)
+        for (int i = 0; i < rxdim; ++i)
             (*w_x)(i) = (*w)(rvdim + i);
 
         romop->V_v.mult(*w_v, *v_rec_librom);
@@ -1044,11 +1195,9 @@ int main(int argc, char* argv[])
         {
             cout << "Lifted initial energies, EE = " << ee
                  << ", KE = " << ke << ", ΔTE = " << (ee + ke) - (ee0 + ke0) << endl;
-
         }
 
         ode_solver->Init(*romop);
-
     }
     else
     {
@@ -1065,6 +1214,9 @@ int main(int argc, char* argv[])
     oper.SetTime(t);
 
     bool last_step = false;
+
+    int current_window = 0;
+
     for (int ti = 1; !last_step; ti++)
     {
         double dt_real = min(dt, t_final - t);
@@ -1073,6 +1225,16 @@ int main(int argc, char* argv[])
         {
             if (myid == 0)
             {
+                if (use_eqp && window_ids && current_window < n_windows
+                        && ti == window_ids->item(current_window))
+                {
+                    // Load eqp and reinitialize ROM operator
+                    cout << "Time window start at " << ti << endl;
+                    get_EQPsol(current_window, load_eqpsol);
+                    romop->SetEQP(load_eqpsol);
+                    ode_solver->Init(*romop);
+                    current_window++;
+                }
                 solveTimer.Start();
                 ode_solver->Step(*wMFEM, t, dt_real);
                 solveTimer.Stop();
@@ -1091,7 +1253,6 @@ int main(int argc, char* argv[])
 
         if (offline)
         {
-
             if (basis_generator_x->isNextSample(t) || x_base_only == false
                     && basis_generator_v->isNextSample(t))
             {
@@ -1128,10 +1289,10 @@ int main(int argc, char* argv[])
             {
                 BroadcastUndistributedRomVector(w);
 
-                for (int i=0; i<rvdim; ++i)
+                for (int i = 0; i < rvdim; ++i)
                     (*w_v)(i) = (*w)(i);
 
-                for (int i=0; i<rxdim; ++i)
+                for (int i = 0; i < rxdim; ++i)
                     (*w_x)(i) = (*w)(rvdim + i);
 
                 romop->V_v.mult(*w_v, *v_rec_librom);
@@ -1142,13 +1303,11 @@ int main(int argc, char* argv[])
 
                 v_gf.SetFromTrueDofs(*v_rec);
                 x_gf.SetFromTrueDofs(*x_rec);
-
             }
             else
             {
                 v_gf.SetFromTrueVector();
                 x_gf.SetFromTrueVector();
-
             }
 
             double ee = oper.ElasticEnergy(x_gf);
@@ -1172,7 +1331,7 @@ int main(int argc, char* argv[])
 
             if (visit)
             {
-                GridFunction* nodes = &x_gf;
+                GridFunction *nodes = &x_gf;
                 int owns_nodes = 0;
                 pmesh->SwapNodes(nodes, owns_nodes);
 
@@ -1184,13 +1343,14 @@ int main(int argc, char* argv[])
 
     } // timestep loop
 
-    if (myid == 0) cout << "Elapsed time for time integration loop " <<
-                            solveTimer.RealTime() << endl;
+    if (myid == 0)
+        cout << "Elapsed time for time integration loop " << solveTimer.RealTime() <<
+             endl;
 
     ostringstream velo_name, pos_name;
 
-    velo_name << "velocity_s"<< s << "." << setfill('0') << setw(6) << myid;
-    pos_name << "position_s"<< s << "." << setfill('0') << setw(6) << myid;
+    velo_name << "velocity_s" << s << "." << setfill('0') << setw(6) << myid;
+    pos_name << "position_s" << s << "." << setfill('0') << setw(6) << myid;
 
     if (offline)
     {
@@ -1217,7 +1377,7 @@ int main(int argc, char* argv[])
         delete basis_generator_x;
 
         // 14. Save the displaced mesh, the velocity and elastic energy.
-        GridFunction* nodes = &x_gf;
+        GridFunction *nodes = &x_gf;
         int owns_nodes = 0;
         pmesh->SwapNodes(nodes, owns_nodes);
 
@@ -1251,7 +1411,6 @@ int main(int argc, char* argv[])
         ee_ofs.precision(8);
         oper.GetElasticEnergyDensity(x_gf, w_gf);
         w_gf.Save(ee_ofs);
-
     }
 
     // 15. Calculate the relative error between the ROM final solution and the true solution.
@@ -1301,30 +1460,43 @@ int main(int argc, char* argv[])
     // 16. Free the used memory.
     delete ode_solver;
     delete pmesh;
+    delete romop;
+    delete soper;
+
+    delete BV_librom;
+    delete BX_librom;
+    delete H_librom;
+    delete Hsinv;
+    delete smm;
+    delete eqpSol;
+    delete eqpSol_S;
+    delete window_ids;
+    delete load_eqpsol;
 
     totalTimer.Stop();
-    if (myid == 0) cout << "Elapsed time for entire simulation " <<
-                            totalTimer.RealTime() << endl;
+    if (myid == 0)
+        cout << "Elapsed time for entire simulation " << totalTimer.RealTime() << endl;
 
     MPI_Finalize();
     return 0;
 }
 
-void visualize(ostream& out, ParMesh* mesh, ParGridFunction* deformed_nodes,
-               ParGridFunction* field, const char* field_name, bool init_vis)
+void visualize(ostream &out, ParMesh *mesh, ParGridFunction *deformed_nodes,
+               ParGridFunction *field, const char *field_name, bool init_vis)
 {
     if (!out)
     {
         return;
     }
 
-    GridFunction* nodes = deformed_nodes;
+    GridFunction *nodes = deformed_nodes;
     int owns_nodes = 0;
 
     mesh->SwapNodes(nodes, owns_nodes);
 
     out << "parallel " << mesh->GetNRanks() << " " << mesh->GetMyRank() << "\n";
-    out << "solution\n" << *mesh << *field;
+    out << "solution\n"
+        << *mesh << *field;
 
     mesh->SwapNodes(nodes, owns_nodes);
 
@@ -1343,14 +1515,14 @@ void visualize(ostream& out, ParMesh* mesh, ParGridFunction* deformed_nodes,
     out << flush;
 }
 
-HyperelasticOperator::HyperelasticOperator(ParFiniteElementSpace& f,
-        Array<int>& ess_tdof_list_, double visc,
+HyperelasticOperator::HyperelasticOperator(ParFiniteElementSpace &f,
+        Array<int> &ess_tdof_list_, double visc,
         double mu, double K)
     : TimeDependentOperator(2 * f.TrueVSize(), 0.0), fespace(f),
       ess_tdof_list(ess_tdof_list_),
       M(NULL), S(NULL), H(NULL),
       viscosity(visc), M_solver(f.GetComm()),
-      z(height / 2), z2(height / 2), H_sp(height/2), dvxdt_sp(height/2)
+      z(height / 2), z2(height / 2), H_sp(height / 2), dvxdt_sp(height / 2)
 {
     const double rel_tol = 1e-8;
     const int skip_zero_entries = 0;
@@ -1363,13 +1535,13 @@ HyperelasticOperator::HyperelasticOperator(ParFiniteElementSpace& f,
     M->Assemble(skip_zero_entries);
     M->Finalize(skip_zero_entries);
     Mmat = M->ParallelAssemble();
-    HypreParMatrix* Me = Mmat->EliminateRowsCols(ess_tdof_list);
+    HypreParMatrix *Me = Mmat->EliminateRowsCols(ess_tdof_list);
     delete Me;
 
     M_solver.iterative_mode = false;
     M_solver.SetRelTol(rel_tol);
     M_solver.SetAbsTol(0.0);
-    M_solver.SetMaxIter(30);
+    M_solver.SetMaxIter(1000);
     M_solver.SetPrintLevel(0);
     M_prec.SetType(HypreSmoother::Jacobi);
     M_solver.SetPreconditioner(M_prec);
@@ -1388,7 +1560,7 @@ HyperelasticOperator::HyperelasticOperator(ParFiniteElementSpace& f,
     S->FormSystemMatrix(ess_tdof_list, Smat);
 }
 
-void HyperelasticOperator::Mult(const Vector& vx, Vector& dvx_dt) const
+void HyperelasticOperator::Mult(const Vector &vx, Vector &dvx_dt) const
 {
     // Create views to the sub-vectors v, x of vx, and dv_dt, dx_dt of dvx_dt
     int sc = height / 2;
@@ -1413,12 +1585,12 @@ void HyperelasticOperator::Mult(const Vector& vx, Vector& dvx_dt) const
     dvxdt_sp = dvx_dt;
 }
 
-double HyperelasticOperator::ElasticEnergy(const ParGridFunction& x) const
+double HyperelasticOperator::ElasticEnergy(const ParGridFunction &x) const
 {
     return H->GetEnergy(x);
 }
 
-double HyperelasticOperator::KineticEnergy(const ParGridFunction& v) const
+double HyperelasticOperator::KineticEnergy(const ParGridFunction &v) const
 {
     double loc_energy = 0.5 * M->InnerProduct(v, v);
     double energy;
@@ -1428,7 +1600,7 @@ double HyperelasticOperator::KineticEnergy(const ParGridFunction& v) const
 }
 
 void HyperelasticOperator::GetElasticEnergyDensity(
-    const ParGridFunction& x, ParGridFunction& w) const
+    const ParGridFunction &x, ParGridFunction &w) const
 {
     ElasticEnergyCoefficient w_coeff(*model, x);
     w.ProjectCoefficient(w_coeff);
@@ -1443,8 +1615,8 @@ HyperelasticOperator::~HyperelasticOperator()
     delete H;
 }
 
-double ElasticEnergyCoefficient::Eval(ElementTransformation& T,
-                                      const IntegrationPoint& ip)
+double ElasticEnergyCoefficient::Eval(ElementTransformation &T,
+                                      const IntegrationPoint &ip)
 {
     model.SetTransformation(T);
     x.GetVectorGradient(T, J);
@@ -1457,64 +1629,75 @@ void InitialDeformationIC1(const Vector &x, Vector &y)
     y = x;
 }
 
-void InitialVelocityIC1(const Vector& x, Vector& v)
+void InitialVelocityIC1(const Vector &x, Vector &v)
 {
     const int dim = x.Size();
     const double s_eff = s / 80.0;
 
     v = 0.0;
-    v(dim - 1) = -s_eff * sin(s*x(0));
+    v(dim - 1) = -s_eff * sin(s * x(0));
 }
 
-void InitialDeformationIC2(const Vector &x, Vector &y) //See MFEM ex19
+void InitialDeformationIC2(const Vector &x, Vector &y) // See MFEM ex19
 {
     // Set the initial configuration. Having this different from the reference
     // configuration can help convergence
     const int dim = x.Size();
     const double s_eff = s;
     y = x;
-    y(dim-1) = x(dim - 1) + 0.25*x(0) * s_eff;
+    y(dim - 1) = x(dim - 1) + 0.25 * x(0) * s_eff;
 }
 
-void InitialVelocityIC2(const Vector& x, Vector& v)
+void InitialVelocityIC2(const Vector &x, Vector &v)
 {
     v = 0.0;
 }
 
-RomOperator::RomOperator(HyperelasticOperator* fom_,
-                         HyperelasticOperator* fomSp_, const int rvdim_, const int rxdim_,
-                         const int hdim_, CAROM::SampleMeshManager* smm_, const Vector* v0_,
-                         const Vector* x0_, const Vector v0_fom_, const CAROM::Matrix* V_v_,
-                         const CAROM::Matrix* V_x_, const CAROM::Matrix* U_H_,
-                         const CAROM::Matrix* Hsinv_, const int myid, const bool oversampling_,
-                         const bool hyperreduce_, const bool x_base_only_)
+RomOperator::RomOperator(HyperelasticOperator *fom_,
+                         HyperelasticOperator *fomSp_, const int rvdim_, const int rxdim_,
+                         const int hdim_, CAROM::SampleMeshManager *smm_, const Vector *v0_,
+                         const Vector *x0_, const Vector v0_fom_, const CAROM::Matrix *V_v_,
+                         const CAROM::Matrix *V_x_, const CAROM::Matrix *U_H_,
+                         const CAROM::Matrix *Hsinv_, const int myid, const bool oversampling_,
+                         const bool hyperreduce_, const bool x_base_only_, const bool use_eqp,
+                         CAROM::Vector *eqpSol, const IntegrationRule *ir_eqp_, NeoHookeanModel *model_)
     : TimeDependentOperator(rxdim_ + rvdim_, 0.0), fom(fom_), fomSp(fomSp_),
       rxdim(rxdim_), rvdim(rvdim_), hdim(hdim_), x0(x0_), v0(v0_), v0_fom(v0_fom_),
-      smm(smm_), nsamp_H(smm_->GetNumVarSamples("H")), V_x(*V_x_), V_v(*V_v_),
-      U_H(U_H_), Hsinv(Hsinv_), zN(std::max(nsamp_H, 1), false), zX(std::max(nsamp_H,
-              1), false), M_hat_solver(fom_->fespace.GetComm()), oversampling(oversampling_),
-      z(height / 2), hyperreduce(hyperreduce_), x_base_only(x_base_only_)
+      smm(smm_), V_x(*V_x_), V_v(*V_v_), U_H(U_H_), Hsinv(Hsinv_),
+      M_hat_solver(fom_->fespace.GetComm()), oversampling(oversampling_),
+      z(height / 2), hyperreduce(hyperreduce_), x_base_only(x_base_only_),
+      eqp(use_eqp),
+      ir_eqp(ir_eqp_), model(model_), rank(myid)
 {
-    if (myid == 0)
+    if (!eqp)
+    {
+        nsamp_H = smm_->GetNumVarSamples("H");
+        zN = CAROM::Vector(std::max(nsamp_H, 1), false);
+        zX = CAROM::Vector(std::max(nsamp_H, 1), false);
+    }
+
+    if (myid == 0 && !eqp)
     {
         V_v_sp = new CAROM::Matrix(fomSp->Height() / 2, rvdim, false);
         V_x_sp = new CAROM::Matrix(fomSp->Height() / 2, rxdim, false);
     }
 
     // Gather distributed vectors
-    if (x_base_only)
-    {
-        smm->GatherDistributedMatrixRows("X", V_v, rvdim, *V_v_sp);
-    }
-    else
+    if (!eqp)
     {
-        smm->GatherDistributedMatrixRows("V", V_v, rvdim, *V_v_sp);
-    }
-
-    smm->GatherDistributedMatrixRows("X", V_x, rxdim, *V_x_sp);
+        if (x_base_only)
+        {
+            smm->GatherDistributedMatrixRows("X", V_v, rvdim, *V_v_sp);
+        }
+        else
+        {
+            smm->GatherDistributedMatrixRows("V", V_v, rvdim, *V_v_sp);
+        }
 
-    // Create V_vTU_H, for hyperreduction
-    V_v.transposeMult(*U_H, V_vTU_H);
+        smm->GatherDistributedMatrixRows("X", V_x, rxdim, *V_x_sp);
+        // Create V_vTU_H, for hyperreduction
+        V_v.transposeMult(*U_H, V_vTU_H);
+    }
 
     S_hat = new CAROM::Matrix(rvdim, rvdim, false);
     S_hat_v0 = new CAROM::Vector(rvdim, false);
@@ -1524,59 +1707,64 @@ RomOperator::RomOperator(HyperelasticOperator* fom_,
     M_hat = new CAROM::Matrix(rvdim, rvdim, false);
     M_hat_inv = new CAROM::Matrix(rvdim, rvdim, false);
 
+    // Set the max iterations for the mass matrix solver
+    M_hat_solver.SetMaxIter(1000);
+
     // Create S_hat
     ComputeCtAB(fom->Smat, V_v, V_v, *S_hat);
-
     // Apply S_hat to the initial velocity and store
     fom->Smat.Mult(v0_fom, *S_hat_v0_temp);
     V_v.transposeMult(*S_hat_v0_temp_librom, S_hat_v0);
-
     // Create M_hat
-    ComputeCtAB(*fom->Mmat, V_v, V_v, *M_hat);
+    ComputeCtAB(*(fom->Mmat), V_v, V_v, *M_hat);
 
     // Invert M_hat and store
     M_hat->inverse(*M_hat_inv);
 
-    if (myid == 0)
+    if (myid == 0 && hyperreduce)
     {
-        const int spdim = fomSp->Height();  // Reduced height
-
-        zH.SetSize(spdim / 2); // Samples of H
-
-        // Allocate auxillary vectors
-        z.SetSize(spdim / 2);
-        z_v.SetSize(spdim / 2);
-        z_x.SetSize(spdim / 2);
-        z_librom = new CAROM::Vector(z.GetData(), z.Size(), false, false);
-        z_v_librom = new CAROM::Vector(z_v.GetData(), z_v.Size(), false, false);
-        z_x_librom = new CAROM::Vector(z_x.GetData(), z_x.Size(), false, false);
-
-        // This is for saving the recreated predictions
-        psp_librom = new CAROM::Vector(spdim, false);
-        psp = new Vector(&((*psp_librom)(0)), spdim);
-
-        // Define sub-vectors of psp.
-        psp_x = new Vector(psp->GetData(), spdim / 2);
-        psp_v = new Vector(psp->GetData() + spdim / 2, spdim / 2);
-
-        psp_v_librom = new CAROM::Vector(psp_v->GetData(), psp_v->Size(), false, false);
+        if (!eqp)
+        {
+            const int spdim = fomSp->Height(); // Reduced height
+
+            // Allocate auxillary vectors
+            z.SetSize(spdim / 2);
+            z_v.SetSize(spdim / 2);
+            z_x.SetSize(spdim / 2);
+            zH.SetSize(spdim / 2); // Samples of H
+            z_librom = new CAROM::Vector(z.GetData(), z.Size(), false, false);
+            z_v_librom = new CAROM::Vector(z_v.GetData(), z_v.Size(), false, false);
+            z_x_librom = new CAROM::Vector(z_x.GetData(), z_x.Size(), false, false);
+
+            v0_librom = new CAROM::Vector(v0->GetData(), v0->Size(), false, false);
+            V_xTV_v_sp = new CAROM::Matrix(rxdim, rvdim, false);
+            V_xTv_0_sp = new CAROM::Vector(spdim / 2, false);
+            V_x_sp->transposeMult(*V_v_sp, *V_xTV_v_sp);
+            V_x_sp->transposeMult(*v0_librom, V_xTv_0_sp);
+
+            // This is for saving the recreated predictions
+            psp_librom = new CAROM::Vector(spdim, false);
+            psp = new Vector(&((*psp_librom)(0)), spdim);
+
+            // Define sub-vectors of psp.
+            psp_x = new Vector(psp->GetData(), spdim / 2);
+            psp_v = new Vector(psp->GetData() + spdim / 2, spdim / 2);
+
+            psp_v_librom = new CAROM::Vector(psp_v->GetData(), psp_v->Size(), false, false);
+        }
+        else
+        {
+            z.SetSize(rvdim);
+            z_librom = new CAROM::Vector(z.GetData(), z.Size(), false, false);
+        }
     }
 
-    if (!hyperreduce)
+    const int fdim = fom->Height(); // Unreduced height
+    if (!hyperreduce || (eqp && myid == 0))
     {
-        const int fdim = fom->Height(); // Unreduced height
-
-        z.SetSize(fdim / 2);
-        z_v.SetSize(fdim / 2);
-        z_x.SetSize(fdim / 2);
-        z_librom = new CAROM::Vector(z.GetData(), z.Size(), false, false);
-        z_v_librom = new CAROM::Vector(z_v.GetData(), z_v.Size(), true, false);
-        z_x_librom = new CAROM::Vector(z_x.GetData(), z_x.Size(), true, false);
-
         // This is for saving the recreated predictions
         pfom_librom = new CAROM::Vector(fdim, false);
         pfom = new Vector(&((*pfom_librom)(0)), fdim);
-
         // Define sub-vectors of pfom.
         pfom_x = new Vector(pfom->GetData(), fdim / 2);
         pfom_v = new Vector(pfom->GetData() + fdim / 2, fdim / 2);
@@ -1584,10 +1772,96 @@ RomOperator::RomOperator(HyperelasticOperator* fom_,
         zfom_x_librom = new CAROM::Vector(zfom_x->GetData(), zfom_x->Size(), true,
                                           false);
 
-        pfom_v_librom = new CAROM::Vector(pfom_v->GetData(), pfom_v->Size(), true,
+        pfom_v_librom = new CAROM::Vector(pfom_v->GetData(), pfom_v->Size(), false,
                                           false);
+
+        // Auxiliary vectors
+        z_v.SetSize(fdim / 2);
+        z_v_librom = new CAROM::Vector(z_v.GetData(), z_v.Size(), true, false);
+        v0_fom_librom = new CAROM::Vector(v0_fom.GetData(), v0_fom.Size(), true, false);
+        V_xTV_v = new CAROM::Matrix(rxdim, rvdim, false);
+        V_xTv_0 = new CAROM::Vector(fdim / 2, false);
+        V_x.transposeMult(V_v, V_xTV_v);
+        V_x.transposeMult(*v0_fom_librom, V_xTv_0);
+        Vx_librom_temp = new CAROM::Vector(fdim / 2, true);
+        Vx_temp = new Vector(&((*Vx_librom_temp)(0)), fdim / 2);
     }
 
+    if (!hyperreduce)
+    {
+        z.SetSize(fdim / 2);
+
+        z_x.SetSize(fdim / 2);
+        z_librom = new CAROM::Vector(z.GetData(), z.Size(), false, false);
+
+        z_x_librom = new CAROM::Vector(z_x.GetData(), z_x.Size(), true, false);
+    }
+
+    if (eqp)
+    {
+        std::set<int> elements;
+        const int nqe = ir_eqp->GetWeights().Size();
+
+        for (int i = 0; i < eqpSol->dim(); ++i)
+        {
+            if ((*eqpSol)(i) > 1.0e-12)
+            {
+                const int e = i / nqe; // Element index
+                elements.insert(e);
+                eqp_rw.push_back((*eqpSol)(i));
+                eqp_qp.push_back(i);
+            }
+        }
+
+        cout << myid << ": EQP using " << elements.size() << " elements out of "
+             << fom->fespace.GetNE() << endl;
+        const FiniteElement &fe1 = *(fom->fespace).GetFE(0);
+        const int dof = fe1.GetDof();
+        const int dim = fe1.GetDim();
+        em = new ElemMatrices(dof, dim);
+
+        GetEQPCoefficients_HyperelasticNLFIntegrator(&(fom->fespace), eqp_rw, eqp_qp,
+                ir_eqp, model, V_v, rank, eqp_coef, eqp_DS_coef, em);
+
+        // Set the matrix of the linear map from the ROM X space to all DOFs on
+        // sampled elements.
+        Array<int> vdofs;
+        int numSampledDofs = 0;
+        for (auto e : elements)
+        {
+            fom->fespace.GetElementVDofs(e, vdofs);
+            numSampledDofs += vdofs.Size();
+        }
+
+        eqp_lifting.setSize(numSampledDofs, rxdim);
+        eqp_liftDOFs.resize(numSampledDofs);
+        eqp_lifted.setSize(numSampledDofs);
+
+        int cnt = 0;
+        for (auto e : elements)
+        {
+            fom->fespace.GetElementVDofs(e, vdofs);
+            for (int i = 0; i < vdofs.Size(); ++i, ++cnt)
+                eqp_liftDOFs[cnt] =
+                    vdofs[i]; // eqp_liftDOFs maps an index of eqp_lifted to the FOM dof index
+        }
+
+        MFEM_VERIFY(cnt == numSampledDofs, "");
+
+        CAROM::Vector ej(rxdim, false);
+
+        for (int j = 0; j < rxdim; ++j)
+        {
+            ej = 0.0;
+            ej(j) = 1.0;
+            V_x.mult(ej, *zfom_x_librom);
+
+            for (int i = 0; i < numSampledDofs; ++i)
+            {
+                eqp_lifting(i, j) = (*zfom_x)[eqp_liftDOFs[i]];
+            }
+        }
+    }
 }
 
 RomOperator::~RomOperator()
@@ -1597,7 +1871,7 @@ RomOperator::~RomOperator()
     delete M_hat_inv;
 }
 
-void RomOperator::Mult_Hyperreduced(const Vector& vx, Vector& dvx_dt) const
+void RomOperator::Mult_Hyperreduced(const Vector &vx, Vector &dvx_dt) const
 {
     // Check that the sizes match
     MFEM_VERIFY(vx.Size() == rvdim + rxdim && dvx_dt.Size() == rvdim + rxdim, "");
@@ -1611,33 +1885,69 @@ void RomOperator::Mult_Hyperreduced(const Vector& vx, Vector& dvx_dt) const
     CAROM::Vector dv_dt_librom(dv_dt.GetData(), rvdim, false, false);
     CAROM::Vector dx_dt_librom(dx_dt.GetData(), rxdim, false, false);
 
-    // Lift the x-, and v-vectors
-    // I.e. perform v = v0 + V_v v^, where v^ is the input
-    V_v_sp->mult(v_librom, *z_v_librom);
-    V_x_sp->mult(x_librom, *z_x_librom);
+    if (eqp)
+    {   // Lift v-vector and save
+        V_xTV_v->mult(v_librom, *pfom_v_librom);
+        pfom_v_librom->plus(*V_xTv_0, dx_dt_librom);
+        Vector resEQP;
+        if (fastIntegration)
+        {
+            HyperelasticNLFIntegrator_ComputeReducedEQP_Fast(&(fom->fespace), eqp_rw,
+                    eqp_qp, ir_eqp, model, x0, rvdim, x_librom,eqp_coef, eqp_DS_coef, rank, resEQP,
+                    em, eqp_lifting, eqp_liftDOFs, eqp_lifted);
+        }
+        else
+            HyperelasticNLFIntegrator_ComputeReducedEQP(&(fom->fespace), eqp_rw, eqp_qp,
+                    ir_eqp, model, x0, V_v, x_librom, rank, resEQP, em, eqp_lifting, eqp_liftDOFs,
+                    eqp_lifted);
+        Vector recv(resEQP);
+        MPI_Allreduce(resEQP.GetData(), recv.GetData(), resEQP.Size(), MPI_DOUBLE,
+                      MPI_SUM, MPI_COMM_WORLD);
+        resEQP = recv;
+
+        // NOTE: in the hyperreduction case, the residual is of dimension nldim,
+        // which is the dimension of the ROM space for the nonlinear term.
+        // In the EQP case, there is no use of a ROM space for the nonlinear
+        // term. Instead, the FOM computation of the nonlinear term is
+        // approximated by the reduced quadrature rule in the FOM space.
+        // Therefore, the residual here is of dimension rxdim.
+        z = 0.0;
+        MFEM_VERIFY(resEQP.Size() == rvdim, "");
+        for (int i = 0; i < rvdim; ++i)
+            z[i] += resEQP[i];
+    }
+    else
+    {
+        // Lift x- and v-vector
+        // I.e. perform v = v0 + V_v v^, where v^ is the input
+        V_xTV_v_sp->mult(v_librom, *z_v_librom);
+        z_v_librom->plus(*V_xTv_0_sp, dx_dt_librom); // Store dx/dt
 
-    add(z_v, *v0, *psp_v); // Store liftings
-    add(z_x, *x0, *psp_x);
+        V_x_sp->mult(x_librom, *z_x_librom);
 
-    // Hyperreduce H
-    // Apply H to x to get zH
-    fomSp->H->Mult(*psp_x, zH);
+        // Store liftings
+        add(z_x, *x0, *psp_x);
 
-    // Sample the values from zH
-    smm->GetSampledValues("H", zH, zN);
+        // Hyperreduce H
+        // Apply H to x to get zH
+        fomSp->H->Mult(*psp_x, zH);
 
-    // Apply inverse H-basis
-    if (oversampling)
-    {
-        Hsinv->transposeMult(zN, zX);
-    }
-    else
-    {
-        Hsinv->mult(zN, zX);
-    }
+        // Sample the values from zH
+        smm->GetSampledValues("H", zH, zN);
+
+        // Apply inverse H-basis
+        if (oversampling)
+        {
+            Hsinv->transposeMult(zN, zX);
+        }
+        else
+        {
+            Hsinv->mult(zN, zX);
+        }
 
-    // Multiply by V_v^T * U_H
-    V_vTU_H.mult(zX, z_librom);
+        // Multiply by V_v^T * U_H
+        V_vTU_H.mult(zX, z_librom);
+    }
 
     if (fomSp->viscosity != 0.0)
     {
@@ -1645,16 +1955,12 @@ void RomOperator::Mult_Hyperreduced(const Vector& vx, Vector& dvx_dt) const
         S_hat->multPlus(*z_librom, v_librom, 1.0);
         *z_librom += *S_hat_v0;
     }
-
     z.Neg(); // z = -z
     M_hat_inv->mult(*z_librom,
                     dv_dt_librom); // to invert reduced mass matrix operator.
-
-    V_x_sp->transposeMult(*psp_v_librom, dx_dt_librom);
-
 }
 
-void RomOperator::Mult_FullOrder(const Vector& vx, Vector& dvx_dt) const
+void RomOperator::Mult_FullOrder(const Vector &vx, Vector &dvx_dt) const
 {
     // Check that the sizes match
     MFEM_VERIFY(vx.Size() == rvdim + rxdim && dvx_dt.Size() == rvdim + rxdim, "");
@@ -1670,11 +1976,13 @@ void RomOperator::Mult_FullOrder(const Vector& vx, Vector& dvx_dt) const
 
     // Lift the x-, and v-vectors
     // I.e. perform v = v0 + V_v v^, where v^ is the input
+
+    V_xTV_v->mult(v_librom, *pfom_v_librom);
+    pfom_v_librom->plus(*V_xTv_0, dx_dt_librom);
+
     V_x.mult(x_librom, *z_x_librom);
-    V_v.mult(v_librom, *z_v_librom);
 
     add(z_x, *x0, *pfom_x); // Store liftings
-    add(z_v, *v0, *pfom_v);
 
     // Apply H to x to get z
     fom->H->Mult(*pfom_x, *zfom_x);
@@ -1691,16 +1999,790 @@ void RomOperator::Mult_FullOrder(const Vector& vx, Vector& dvx_dt) const
     z.Neg(); // z = -z
     M_hat_inv->mult(*z_librom,
                     dv_dt_librom); // to invert reduced mass matrix operator.
-
-    V_x.transposeMult(*pfom_v_librom, dx_dt_librom);
-
 }
 
-void RomOperator::Mult(const Vector& vx, Vector& dvx_dt) const
+void RomOperator::Mult(const Vector &vx, Vector &dvx_dt) const
 {
-
     if (hyperreduce)
         Mult_Hyperreduced(vx, dvx_dt);
     else
         Mult_FullOrder(vx, dvx_dt);
 }
+
+void RomOperator::SetEQP(CAROM::Vector *eqpSol)
+{
+    std::set<int> elements;
+    const int nqe = ir_eqp->GetWeights().Size();
+    eqp_rw.clear();
+    eqp_qp.clear();
+
+    for (int i = 0; i < eqpSol->dim(); ++i)
+    {
+        if ((*eqpSol)(i) > 1.0e-12)
+        {
+            const int e = i / nqe; // Element index
+            elements.insert(e);
+            eqp_rw.push_back((*eqpSol)(i));
+            eqp_qp.push_back(i);
+        }
+    }
+
+    cout << rank << ": EQP using " << elements.size() << " elements out of "
+         << fom->fespace.GetNE() << endl;
+
+    GetEQPCoefficients_HyperelasticNLFIntegrator(&(fom->fespace), eqp_rw, eqp_qp,
+            ir_eqp, model, V_v, rank, eqp_coef, eqp_DS_coef, em);
+
+    // Set the matrix of the linear map from the ROM X space to all DOFs on
+    // sampled elements.
+    Array<int> vdofs;
+    int numSampledDofs = 0;
+    for (auto e : elements)
+    {
+        fom->fespace.GetElementVDofs(e, vdofs);
+        numSampledDofs += vdofs.Size();
+    }
+
+    eqp_lifting.setSize(numSampledDofs, rxdim);
+    eqp_liftDOFs.resize(numSampledDofs);
+    eqp_lifted.setSize(numSampledDofs);
+
+    int cnt = 0;
+    for (auto e : elements)
+    {
+        fom->fespace.GetElementVDofs(e, vdofs);
+        for (int i = 0; i < vdofs.Size(); ++i, ++cnt)
+            eqp_liftDOFs[cnt] =
+                vdofs[i]; // eqp_liftDOFs maps an index of eqp_lifted to the FOM dof index
+    }
+
+    MFEM_VERIFY(cnt == numSampledDofs, "");
+
+    CAROM::Vector ej(rxdim, false);
+
+    for (int j = 0; j < rxdim; ++j)
+    {
+        ej = 0.0;
+        ej(j) = 1.0;
+        V_x.mult(ej, *zfom_x_librom);
+
+        for (int i = 0; i < numSampledDofs; ++i)
+        {
+            eqp_lifting(i, j) = (*zfom_x)[eqp_liftDOFs[i]];
+        }
+    }
+}
+
+// Functions for EQP functionality
+
+// Compute coefficients of the reduced integrator with respect to inputs Q and x
+// in HyperelasticNLFIntegrator_ComputeReducedEQP.
+void GetEQPCoefficients_HyperelasticNLFIntegrator(ParFiniteElementSpace *fesR,
+        std::vector<double> const &rw, std::vector<int> const &qp,
+        const IntegrationRule *ir, NeoHookeanModel *model,
+        CAROM::Matrix const &V_v, const int rank, Vector &coef, Vector &DS_coef,
+        ElemMatrices *em)
+{
+    const int rvdim = V_v.numColumns();
+    const int fomdim = V_v.numRows();
+    MFEM_VERIFY(rw.size() == qp.size(), "");
+    MFEM_VERIFY(fomdim == fesR->GetTrueVSize(), "");
+
+    MFEM_VERIFY(rank == 0,
+                "TODO: generalize to parallel. This uses full dofs in V, which has true dofs");
+
+    const int nqe = ir->GetWeights().Size();
+
+    DofTransformation *doftrans;
+    ElementTransformation *eltrans;
+    Array<int> vdofs;
+
+    int eprev = -1;
+    int dof = 0;
+    int dim = 0;
+
+    // Get prolongation matrix
+    const Operator *P = fesR->GetProlongationMatrix();
+
+    // Vector to be prolongated
+    Vector vj(fomdim);
+
+    // Prolongated vector
+    Vector p_vj(P->Height());
+
+    // Element vector
+    Vector vj_e;
+    // Get the element vector size
+    doftrans = fesR->GetElementVDofs(0, vdofs);
+    int elvect_size = vdofs.Size();
+
+    // Coefficient vector
+    coef.SetSize(elvect_size * rw.size() * rvdim);
+    coef = 0.0;
+
+    // Vector for storing DS
+    const FiniteElement *fe = fesR->GetFE(0);
+    dof = fe->GetDof();
+    dim = fe->GetDim();
+    DS_coef.SetSize(dof * dim * rw.size());
+    DS_coef = 0.0;
+    int index = 0;
+    eprev = -1;
+    // For every quadrature weight
+    for (int i = 0; i < rw.size(); ++i)
+    {
+        const int e = qp[i] / nqe; // Element index
+        // Local (element) index of the quadrature point
+        const int qpi = qp[i] - (e * nqe);
+        const IntegrationPoint &ip = ir->IntPoint(qpi);
+
+        if (e != eprev) // Update element transformation
+        {
+            doftrans = fesR->GetElementVDofs(e, vdofs);
+            eltrans = fesR->GetElementTransformation(e);
+
+            if (doftrans)
+            {
+                MFEM_ABORT("TODO: Implementation for when `doftrans` is not NULL");
+            }
+            eprev = e;
+        }
+
+        // Set integration point in the element transformation
+        eltrans->SetIntPoint(&ip);
+        model->SetTransformation(*eltrans);
+        // Get the transformation weight
+        const double t = eltrans->Weight();
+
+        // Calculate DS and store
+        CalcInverse(eltrans->Jacobian(), em->Jrt);
+        fe->CalcDShape(ip, em->DSh);
+        Mult(em->DSh, em->Jrt, em->DS);
+        for (int ii = 0; ii < dof; ++ii)
+        {
+            for (int jj = 0; jj < dim; ++jj)
+            {
+                index = jj + ii * dim;
+                DS_coef[index + (i * dof * dim)] = em->DS.Elem(ii, jj);
+            }
+        }
+        // For every basis vector
+        for (int j = 0; j < rvdim; ++j)
+        {
+            // Get basis vector and prolongate
+            for (int k = 0; k < V_v.numRows(); ++k)
+                vj[k] = V_v(k, j);
+            P->Mult(vj, p_vj);
+
+            // Get element vectors
+            p_vj.GetSubVector(vdofs, vj_e);
+
+            // Calculate coeff
+            for (int k = 0; k < elvect_size; k++)
+            {
+                coef[k + (i * elvect_size) + (j * rw.size() * elvect_size)] = vj_e[k] * rw[i] *
+                        t;
+            }
+        }
+    }
+}
+
+void HyperelasticNLFIntegrator_ComputeReducedEQP(ParFiniteElementSpace *fesR,
+        std::vector<double> const &rw, std::vector<int> const &qp,
+        const IntegrationRule *ir, NeoHookeanModel *model, const Vector *x0,
+        CAROM::Matrix const &V_v, CAROM::Vector const &x, const int rank, Vector &res,
+        ElemMatrices *em, const CAROM::Matrix eqp_lifting,
+        const std::vector<int> eqp_liftDOFs,CAROM::Vector eqp_lifted)
+
+{
+    const int rvdim = V_v.numColumns();
+    const int fomdim = V_v.numRows();
+    MFEM_VERIFY(rw.size() == qp.size(), "");
+
+    MFEM_VERIFY(rank == 0,
+                "TODO: generalize to parallel. This uses full dofs in V, which has true dofs");
+
+    const int nqe = ir->GetWeights().Size();
+
+    ElementTransformation *eltrans;
+    DofTransformation *doftrans;
+    const FiniteElement *fe = NULL;
+    Array<int> vdofs;
+
+    res.SetSize(rvdim);
+    res = 0.0;
+
+    int eprev = -1;
+    int dof = 0;
+    int dim = 0;
+
+    // Basis vector
+    Vector vj(fomdim);
+
+    // Element vectors
+    Vector Vx_e;
+    Vector vj_e;
+
+    // Lift x, add x0
+    eqp_lifting.mult(x, eqp_lifted);
+
+    for (int i = 0; i < eqp_liftDOFs.size(); ++i)
+        eqp_lifted(i) += x0->Elem(eqp_liftDOFs[i]);
+
+    // Initialize nonlinear operator storage
+    // Assuming all elements have the same dim and n_dof
+    fe = fesR->GetFE(0);
+    dof = fe->GetDof();
+    dim = fe->GetDim();
+    Vx_e.SetSize(dof * dim);
+
+    eprev = -1;
+    double temp = 0.0;
+    int eqp_ctr = 0;
+    // For every quadrature weight
+    for (int i = 0; i < rw.size(); ++i)
+    {
+        const int e = qp[i] / nqe; // Element index
+        // Local (element) index of the quadrature point
+        const int qpi = qp[i] - (e * nqe);
+        const IntegrationPoint &ip = ir->IntPoint(qpi);
+
+        if (e != eprev) // Update element transformation
+        {
+            doftrans = fesR->GetElementVDofs(e, vdofs);
+            fe = fesR->GetFE(e);
+            eltrans = fesR->GetElementTransformation(e);
+
+            dof = fe->GetDof(); // Get number of dofs in element
+            dim = fe->GetDim();
+
+            if (doftrans)
+            {
+                MFEM_ABORT("TODO: Implementation for when `doftrans` is not NULL");
+            }
+
+            // Get element vectors
+            for (int i = 0; i < dof * dim; ++i)
+            {
+                Vx_e.Elem(i) = eqp_lifted(eqp_ctr * dof * dim + i);
+            }
+            eqp_ctr++;
+            eprev = e;
+        }
+
+        // Integration at ip
+        em->PMatI.UseExternalData(Vx_e.GetData(), dof, dim);
+
+        em->elvect = 0.0;
+
+        // Set integration point in the element transformation
+        eltrans->SetIntPoint(&ip);
+        model->SetTransformation(*eltrans);
+
+        // Get the transformation weight
+        double t = eltrans->Weight();
+
+        // Compute action of nonlinear operator
+        CalcInverse(eltrans->Jacobian(), em->Jrt);
+        fe->CalcDShape(ip, em->DSh);
+        Mult(em->DSh, em->Jrt, em->DS);
+        MultAtB(em->PMatI, em->DS, em->Jpt);
+        model->EvalP(em->Jpt, em->P_f);
+        em->P_f *= (t * rw[i]); // NB: Not by ip.weight
+        AddMultABt(em->DS, em->P_f, em->PMatO);
+
+        // For every basis vector
+        for (int j = 0; j < rvdim; ++j)
+        {
+            // Get basis vector and prolongate
+            for (int k = 0; k < V_v.numRows(); ++k)
+                vj[k] = V_v(k, j);
+
+            vj.GetSubVector(vdofs, vj_e);
+
+            temp = 0.0;
+
+            // Calculate r[i] = ve_j^T * elvect
+            for (int k = 0; k < em->elvect.Size(); k++)
+            {
+                temp += vj_e[k] * em->elvect[k];
+            }
+            res[j] += temp;
+        }
+    }
+}
+
+void HyperelasticNLFIntegrator_ComputeReducedEQP_Fast(ParFiniteElementSpace
+        *fesR,std::vector<double> const &rw, std::vector<int> const &qp,
+        const IntegrationRule *ir, NeoHookeanModel *model, const Vector *x0,
+        const int rvdim,
+        CAROM::Vector const &x, Vector const &coef, Vector const &DS_coef,
+        const int rank, Vector &res,
+        ElemMatrices *em, const CAROM::Matrix eqp_lifting,
+        const std::vector<int> eqp_liftDOFs,CAROM::Vector eqp_lifted)
+{
+
+    MFEM_VERIFY(rank == 0,
+                "TODO: generalize to parallel. This uses full dofs in X, which has true dofs");
+
+    const int nqe = ir->GetWeights().Size();
+
+    ElementTransformation *eltrans;
+    DofTransformation *doftrans;
+    const FiniteElement *fe = NULL;
+    Array<int> vdofs;
+
+    res.SetSize(rvdim);
+    res = 0.0;
+
+    int eprev = -1;
+    int dof = 0;
+    int dim = 0;
+
+    // Element vectors
+    Vector Vx_e;
+
+    // Lift x, add x0
+    eqp_lifting.mult(x, eqp_lifted);
+
+    for (int i = 0; i < eqp_liftDOFs.size(); ++i)
+        eqp_lifted(i) += x0->Elem(eqp_liftDOFs[i]);
+
+    // Initialize nonlinear operator storage
+    // Assuming all elements have the same dim and n_dof
+    fe = fesR->GetFE(0);
+    dof = fe->GetDof();
+    dim = fe->GetDim();
+    Vx_e.SetSize(dof * dim);
+    int index = 0;
+
+    eprev = -1;
+    double temp = 0.0;
+    int eqp_ctr = 0;
+
+    // For every quadrature weight
+    for (int i = 0; i < qp.size(); ++i)
+    {
+        const int e = qp[i] / nqe; // Element index
+        // Local (element) index of the quadrature point
+        const int qpi = qp[i] - (e * nqe);
+        const IntegrationPoint &ip = ir->IntPoint(qpi);
+
+        if (e != eprev) // Update element transformation
+        {
+            doftrans = fesR->GetElementVDofs(e, vdofs);
+            fe = fesR->GetFE(e);
+            eltrans = fesR->GetElementTransformation(e);
+
+            dof = fe->GetDof(); // Get number of dofs in element
+            dim = fe->GetDim();
+
+            if (doftrans)
+            {
+                MFEM_ABORT("TODO: Implementation for when `doftrans` is not NULL");
+            }
+
+            // Get element vectors
+            for (int i = 0; i < dof * dim; ++i)
+            {
+                Vx_e.Elem(i) = eqp_lifted(eqp_ctr * dof * dim + i);
+            }
+            eqp_ctr++;
+
+            eprev = e;
+        }
+
+        // Integration at ip
+        em->elvect = 0.0;
+        em->PMatI.UseExternalData(Vx_e.GetData(), dof, dim);
+
+        // Set integration point in the element transformation
+        eltrans->SetIntPoint(&ip);
+        model->SetTransformation(*eltrans);
+
+        const int elvec_size = em->elvect.Size();
+        for (int ii = 0; ii < dof; ++ii)
+        {
+            for (int jj = 0; jj < dim; ++jj)
+            {
+                index = jj + ii * dim;
+                em->DS.Elem(ii, jj) = DS_coef[index + (i * elvec_size)];
+            }
+        }
+
+        MultAtB(em->PMatI, em->DS, em->Jpt);
+        model->EvalP(em->Jpt, em->P_f);
+        AddMultABt(em->DS, em->P_f, em->PMatO);
+
+        // Calculate r[i] = ve_j^T * elvect
+        // coef is size len(vdofs) * rvdim * rw.size
+        const int qp_size = qp.size();
+        for (int j = 0; j < rvdim; ++j)
+        {
+            temp = 0.0;
+            for (int k = 0; k < elvec_size; k++)
+            {
+                temp += coef[k + (i * elvec_size) + (j * qp_size * elvec_size)] *
+                        em->elvect[k];
+            }
+            res[j] += temp;
+        }
+    }
+}
+
+void ComputeElementRowOfG(const IntegrationRule *ir, Array<int> const &vdofs,
+                          Vector const &ve_j, NeoHookeanModel *model, Vector const &elfun,
+                          FiniteElement const &fe, ElementTransformation &Trans, Vector &r, const int dof,
+                          const int dim,
+                          ElemMatrices &em)
+{
+    MFEM_VERIFY(r.Size() == ir->GetNPoints(), "");
+
+    em.PMatI.UseExternalData(elfun.GetData(), dof, dim);
+    model->SetTransformation(Trans);
+
+    // For each integration point
+    for (int i = 0; i < ir->GetNPoints(); i++)
+    {
+        em.elvect = 0.0;
+        // Get integration point
+        const IntegrationPoint &ip = ir->IntPoint(i);
+
+        // Set integration point in the element transformation
+        Trans.SetIntPoint(&ip);
+
+        // Get the transformation weight
+        double t = Trans.Weight();
+
+        // Compute action of nonlinear operator
+        CalcInverse(Trans.Jacobian(), em.Jrt);
+        fe.CalcDShape(ip, em.DSh);
+        Mult(em.DSh, em.Jrt, em.DS);
+        MultAtB(em.PMatI, em.DS, em.Jpt);
+        model->EvalP(em.Jpt, em.P);
+        em.P *= t; // NB: Not by ip.weight
+        AddMultABt(em.DS, em.P, em.PMatO);
+
+        r[i] = 0.0;
+
+        // Calculate r[i] = ve_j^T * elvect
+        for (int k = 0; k < em.elvect.Size(); k++)
+        {
+            r[i] += ve_j[k] * em.elvect[k];
+        }
+    }
+}
+
+void SolveNNLS(const int rank, const double nnls_tol, const int maxNNLSnnz,
+               CAROM::Vector const &w, CAROM::Matrix &Gt,
+               CAROM::Vector &sol)
+{
+    CAROM::NNLSSolver nnls(nnls_tol, 0, maxNNLSnnz, 2);
+
+    CAROM::Vector rhs_ub(Gt.numColumns(), false);
+    // G.mult(w, rhs_ub);  // rhs = Gw
+    // rhs = Gw. Note that by using Gt and multTranspose, we do parallel communication.
+    Gt.transposeMult(w, rhs_ub);
+
+    CAROM::Vector rhs_lb(rhs_ub);
+    CAROM::Vector rhs_Gw(rhs_ub);
+
+    const double delta = 1.0e-11;
+    for (int i = 0; i < rhs_ub.dim(); ++i)
+    {
+        rhs_lb(i) -= delta;
+        rhs_ub(i) += delta;
+    }
+
+    // nnls.normalize_constraints(Gt, rhs_lb, rhs_ub);
+    nnls.solve_parallel_with_scalapack(Gt, rhs_lb, rhs_ub, sol);
+
+    int nnz = 0;
+    for (int i = 0; i < sol.dim(); ++i)
+    {
+        if (sol(i) != 0.0)
+        {
+            nnz++;
+        }
+    }
+
+    cout << rank << ": Number of nonzeros in NNLS solution: " << nnz
+         << ", out of " << sol.dim() << endl;
+
+    MPI_Allreduce(MPI_IN_PLACE, &nnz, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+
+    if (rank == 0)
+        cout << "Global number of nonzeros in NNLS solution: " << nnz << endl;
+
+    // Check residual of NNLS solution
+    CAROM::Vector res(Gt.numColumns(), false);
+    Gt.transposeMult(sol, res);
+
+    const double normGsol = res.norm();
+    const double normRHS = rhs_Gw.norm();
+
+    res -= rhs_Gw;
+    const double relNorm = res.norm() / std::max(normGsol, normRHS);
+    cout << rank << ": relative residual norm for NNLS solution of Gs = Gw: " <<
+         relNorm << endl;
+}
+
+int getSteps(const int nsnap, const int snap_step)
+{
+    if (nsnap % snap_step != 0.0)
+    {
+        return int(nsnap / snap_step) + 1;
+    }
+    else
+    {
+        return nsnap / snap_step;
+    }
+}
+
+// Compute EQP solution from constraints on snapshots.
+void SetupEQP_snapshots(const IntegrationRule *ir0, const int rank,
+                        ParFiniteElementSpace *fespace_X,
+                        const int nsets, const CAROM::Matrix *BV,
+                        const CAROM::Matrix *BX_snapshots, const Vector x0,
+                        const bool precondition, const double nnls_tol,
+                        const int maxNNLSnnz, NeoHookeanModel *model,
+                        CAROM::Vector &sol, CAROM::Vector *window_ids, const int snap_step)
+{
+    const int nqe = ir0->GetNPoints();
+    const int ne = fespace_X->GetNE();
+    const int NB = BV->numColumns();
+    const int NQ = ne * nqe;
+    const int nsnap = BX_snapshots->numColumns();
+
+    MFEM_VERIFY(nsnap == BX_snapshots->numColumns() ||
+                nsnap + nsets == BX_snapshots->numColumns(),
+                "");
+    MFEM_VERIFY(BV->numRows() == BX_snapshots->numRows(), "");
+    MFEM_VERIFY(BV->numRows() == fespace_X->GetTrueVSize(), "");
+
+    const bool skipFirstW = (nsnap + nsets == BX_snapshots->numColumns());
+
+    // Compute G of size (NB * nsnap) x NQ, but only store its transpose Gt.
+    CAROM::Matrix Gt(NQ, NB * getSteps(nsnap, snap_step), true);
+    int current_size = 0;
+    int i_start = 0;
+    int i_end = 0;
+    int outer_loop_length = 0;
+    if (!window_ids)
+    {
+        current_size = nsnap;
+        outer_loop_length = 1;
+        i_end = nsnap;
+    }
+    else
+    {
+        outer_loop_length = window_ids->dim() - 1;
+    }
+
+    // For 0 <= j < NB, 0 <= i < nsnap, 0 <= e < ne, 0 <= m < nqe,
+    // G(j + (i*NB), (e*nqe) + m)
+    // is the coefficient of v_j^T M(p_i) V v_i at point m of element e,
+    // with respect to the integration rule weight at that point,
+    // where the "exact" quadrature solution is ir0->GetWeights().
+
+    Vector x_i(BX_snapshots->numRows());
+    Vector v_j(BV->numRows());
+
+    Vector r(nqe);
+
+    int skip = 0;
+    const int nsnapPerSet = nsnap / nsets;
+    if (skipFirstW)
+    {
+        MFEM_VERIFY(nsets * nsnapPerSet == nsnap, "");
+        skip = 1;
+    }
+
+    // Get prolongation matrix
+    const Operator *P = fespace_X->GetProlongationMatrix();
+    if (!P)
+    {
+        MFEM_ABORT("P is null, generalize to serial case")
+    }
+
+    // Outer loop for time windowing
+    // NB: Currently time windowing is done using the same velocity basis for every time window.
+    // A possible next step is to also do time windowing on the velocity basis, which should lead to smaller EQP systems.
+    for (int oi = 0; oi < outer_loop_length; ++oi)
+    {
+        if (window_ids)
+        {
+            i_start = window_ids->item(oi) - 1;
+            i_end = window_ids->item(oi + 1) - 1;
+            current_size = i_end - i_start;
+            cout << "i_start = " << i_start << endl;
+            cout << "Number of NNLS equations: " << NB * current_size << endl;
+            Gt.setSize(NQ, NB * getSteps(current_size, snap_step));
+        }
+
+        // Assuming dof and dim are constant, initialize element matrices once
+        const FiniteElement &fe1 = *fespace_X->GetFE(0);
+        const int dof = fe1.GetDof();
+        const int dim = fe1.GetDim();
+        ElemMatrices em(dof, dim);
+
+        // For every snapshot in batch
+        for (int i = i_start; i < i_end; i += snap_step)
+        {
+            // Set the sampled dofs from the snapshot matrix
+            for (int j = 0; j < BX_snapshots->numRows(); ++j)
+                x_i[j] = (*BX_snapshots)(j, i) + x0[j];
+
+            // Get prolongated dofs
+            Vector px_i;
+            Vector elfun;
+
+            px_i.SetSize(P->Height());
+            P->Mult(x_i, px_i);
+
+            if (skipFirstW && i > 0 && i % nsnapPerSet == 0)
+                skip++;
+
+            // TODO: is it better to make the element loop the outer loop?
+            // For each element
+            for (int e = 0; e < ne; ++e)
+            {
+                // Get element and its dofs and transformation.
+                Array<int> vdofs;
+                DofTransformation *doftrans = fespace_X->GetElementVDofs(e, vdofs);
+                const FiniteElement &fe = *fespace_X->GetFE(e);
+
+                ElementTransformation *eltrans = fespace_X->GetElementTransformation(e);
+                px_i.GetSubVector(vdofs, elfun);
+
+                if (doftrans)
+                {
+                    MFEM_ABORT("Doftrans is true, make corresponding edits")
+                }
+
+                // For each basis vector
+                for (int j = 0; j < NB; ++j)
+                {
+                    // Get basis vector
+                    for (int k = 0; k < BV->numRows(); ++k)
+                        v_j[k] = (*BV)(k, j);
+
+                    // Get prolongated dofs
+                    Vector pv_j;
+                    Vector ve_j;
+
+                    pv_j.SetSize(P->Height());
+                    P->Mult(v_j, pv_j);
+                    pv_j.GetSubVector(vdofs, ve_j);
+                    // Compute the row of G corresponding to element e, store in r
+                    ComputeElementRowOfG(ir0, vdofs, ve_j, model, elfun, fe, *eltrans, r, dof, dim,
+                                         em);
+
+                    for (int m = 0; m < nqe; ++m)
+                        Gt((e * nqe) + m, j + (i * NB)) = r[m];
+                }
+            }
+
+            if (precondition)
+            {
+                MFEM_ABORT("TODO: Implement preconditioned NNLS for this example");
+            }
+        } // Loop (i) over snapshots
+
+        // Rescale every Gt column (NNLS equation) by its max absolute value.
+        Gt.rescale_cols_max();
+
+        Array<double> const &w_el = ir0->GetWeights();
+        MFEM_VERIFY(w_el.Size() == nqe, "");
+
+        CAROM::Vector w(ne * nqe, true);
+
+        for (int i = 0; i < ne; ++i)
+        {
+            for (int j = 0; j < nqe; ++j)
+                w((i * nqe) + j) = w_el[j];
+        }
+
+        SolveNNLS(rank, nnls_tol, maxNNLSnnz, w, Gt, sol);
+        if (window_ids)
+        {
+            sol.write("sol_" + std::to_string(oi));
+        }
+    }
+}
+
+// Utility functions
+void WriteMeshEQP(ParMesh *pmesh, const int myid, const int nqe,
+                  CAROM::Vector const &eqpSol)
+{
+    // Find the elements with quadrature points in eqpSol.
+    std::set<int> elements;
+    Vector elemCount(pmesh->GetNE());
+    elemCount = 0.0;
+
+    for (int i = 0; i < eqpSol.dim(); ++i)
+    {
+        if (eqpSol(i) > 1.0e-12)
+        {
+            const int e = i / nqe; // Element index
+            elements.insert(e);
+            elemCount[e] += 1.0;
+        }
+    }
+
+    // Empty sets, since EQP on samples inside elements.
+    std::set<int> faces, edges, vertices;
+    CAROM::SampleVisualization(pmesh, elements, elements, faces, edges,
+                               vertices, "EQPvis", &elemCount);
+}
+
+// Function to compute the indices at which to switch time windows
+void get_window_ids(int n_step, int n_window, CAROM::Vector *ids)
+{
+    int window_size = std::round(n_step / n_window);
+
+    int res = n_step - (n_window * (window_size - 1) + 1);
+    int res_up = res - n_window;
+    int ctr = -1;
+
+    for (int i = 1; i <= n_window + 1; ++i)
+    {
+        ids->item(i - 1) = (i - 1) * window_size + 1 - (i - 1);
+        if (res_up > 0)
+        {
+            if (i <= res - res_up)
+            {
+                ctr++;
+            }
+            if (i > 2 && i <= res_up + 1)
+            {
+                ctr++;
+            }
+        }
+        else
+        {
+            if (i <= res + 1)
+            {
+                ctr++;
+            }
+        }
+        ids->item(i - 1) += ctr;
+    }
+    ids->item(n_window) = n_step + 1;
+}
+
+bool fileExists(const std::string &filename)
+{
+    std::ifstream file(filename);
+    return file.good();
+}
+
+void get_EQPsol(const int current_window, CAROM::Vector *load_eqpsol)
+{
+    string filename = "sol_" + std::to_string(current_window);
+    if (fileExists(filename + ".000000"))
+    {
+        std::cout << "The length of the vector is: " << load_eqpsol->dim() << std::endl;
+        load_eqpsol->read(filename);
+    }
+}
diff --git a/examples/prom/nonlinear_elasticity_global_rom_eqp.hpp b/examples/prom/nonlinear_elasticity_global_rom_eqp.hpp
new file mode 100644
index 000000000..5cd4ef1d6
--- /dev/null
+++ b/examples/prom/nonlinear_elasticity_global_rom_eqp.hpp
@@ -0,0 +1,137 @@
+/******************************************************************************
+ *
+ * Copyright (c) 2013-2023, Lawrence Livermore National Security, LLC
+ * and other libROM project developers. See the top-level COPYRIGHT
+ * file for details.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ *****************************************************************************/
+
+// Functions used by nonlinear_elastic_global_rom.cpp with EQP.
+#include "mfem/Utilities.hpp"
+#include <cmath>
+using namespace mfem;
+using namespace std;
+
+class HyperelasticOperator : public TimeDependentOperator
+{
+
+protected:
+    ParBilinearForm *M, *S;
+
+    CGSolver M_solver;    // Krylov solver for inverting the mass matrix M
+    HypreSmoother M_prec; // Preconditioner for the mass matrix M
+
+public:
+    HyperelasticOperator(ParFiniteElementSpace &f, Array<int> &ess_tdof_list_,
+                         double visc, double mu, double K);
+
+    /// Compute the right-hand side of the ODE system.
+    virtual void Mult(const Vector &vx, Vector &dvx_dt) const;
+
+    double ElasticEnergy(const ParGridFunction &x) const;
+    double KineticEnergy(const ParGridFunction &v) const;
+    void GetElasticEnergyDensity(const ParGridFunction &x,
+                                 ParGridFunction &w) const;
+
+    mutable Vector H_sp;
+    mutable Vector dvxdt_sp;
+
+    ParFiniteElementSpace &fespace;
+    double viscosity;
+    Array<int> ess_tdof_list;
+    ParNonlinearForm *H;
+    HyperelasticModel *model;
+    mutable Vector z;     // auxiliary vector
+    mutable Vector z2;    // auxiliary vector
+    HypreParMatrix *Mmat; // Mass matrix from ParallelAssemble()
+    HypreParMatrix Smat;
+
+    virtual ~HyperelasticOperator();
+};
+
+
+struct ElemMatrices
+{
+    DenseMatrix DSh;
+    DenseMatrix DS;
+    DenseMatrix Jrt;
+    DenseMatrix Jpt;
+    DenseMatrix P;
+    DenseMatrix P_f;
+    DenseMatrix PMatI;
+    DenseMatrix PMatO;
+    Vector elvect;
+    // Constructor for matrices struct
+    ElemMatrices(int dof, int dim) : DSh(dof, dim),
+        DS(dof, dim),
+        Jrt(dim),
+        Jpt(dim),
+        P(dim),
+        P_f(dim),
+        PMatI(),
+        PMatO(),
+        elvect(dof * dim)
+    {
+        // Set dimensions for PMatI and PMatO
+        PMatO.UseExternalData(elvect.GetData(), dof, dim);
+    }
+};
+
+// Compute coefficients of the reduced integrator with respect to inputs Q and x
+// in HyperelasticNLFIntegrator_ComputeReducedEQP.
+void GetEQPCoefficients_HyperelasticNLFIntegrator(ParFiniteElementSpace *fesR,
+        std::vector<double> const &rw, std::vector<int> const &qp,
+        const IntegrationRule *ir, NeoHookeanModel *model,
+        CAROM::Matrix const &V_v, const int rank, Vector &coef, Vector &DS_coef,
+        ElemMatrices *em);
+
+// Perform hyperreduction with EQP
+void HyperelasticNLFIntegrator_ComputeReducedEQP(ParFiniteElementSpace *fesR,
+        std::vector<double> const &rw, std::vector<int> const &qp,
+        const IntegrationRule *ir, NeoHookeanModel *model, const Vector *x0,
+        CAROM::Matrix const &V_v, CAROM::Vector const &x, const int rank, Vector &res,
+        ElemMatrices *em, const CAROM::Matrix eqp_lifting,
+        const std::vector<int> eqp_liftDOFs,CAROM::Vector eqp_lifted);
+
+// Optimized EQP hyperreduction routine with preallocated arrays
+void HyperelasticNLFIntegrator_ComputeReducedEQP_Fast(ParFiniteElementSpace
+        *fesR,std::vector<double> const &rw, std::vector<int> const &qp,
+        const IntegrationRule *ir, NeoHookeanModel *model, const Vector *x0,
+        const int rvdim, CAROM::Vector const &x, Vector const &coef,
+        Vector const &DS_coef, const int rank, Vector &res, ElemMatrices *em,
+        const CAROM::Matrix eqp_lifting, const std::vector<int> eqp_liftDOFs,
+        CAROM::Vector eqp_lifted);
+
+// Compute a row in the G matrix which corresponds to a given FE element
+void ComputeElementRowOfG(const IntegrationRule *ir, Array<int> const &vdofs,
+                          Vector const &ve_j, NeoHookeanModel *model, Vector const &elfun,
+                          FiniteElement const &fe, ElementTransformation &Trans, Vector &r, const int dof,
+                          const int dim,
+                          ElemMatrices &em);
+
+void SolveNNLS(const int rank, const double nnls_tol, const int maxNNLSnnz,
+               CAROM::Vector const &w, CAROM::Matrix &Gt,
+               CAROM::Vector &sol);
+
+// Compute EQP solution from constraints on snapshots.
+void SetupEQP_snapshots(const IntegrationRule *ir0, const int rank,
+                        ParFiniteElementSpace *fespace_X,
+                        const int nsets, const CAROM::Matrix *BV,
+                        const CAROM::Matrix *BX_snapshots, const Vector x0,
+                        const bool precondition, const double nnls_tol,
+                        const int maxNNLSnnz, NeoHookeanModel *model,
+                        CAROM::Vector &sol, CAROM::Vector *window_ids, const int snap_step);
+
+void WriteMeshEQP(ParMesh *pmesh, const int myid, const int nqe,
+                  CAROM::Vector const &eqpSol);
+
+// Function to compute the indices at which to switch time windows
+void get_window_ids(int n_step, int n_window, CAROM::Vector *ids);
+
+// Helper function to check if a file exists
+bool fileExists(const std::string &filename);
+
+// Load a EQP solution
+void get_EQPsol(const int current_window, CAROM::Vector *load_eqpsol);