diff --git a/backends/ref/ceed-ref-basis.c b/backends/ref/ceed-ref-basis.c
index 18e9dd007d..d492f3910e 100644
--- a/backends/ref/ceed-ref-basis.c
+++ b/backends/ref/ceed-ref-basis.c
@@ -14,7 +14,7 @@
 #include "ceed-ref.h"
 
 //------------------------------------------------------------------------------
-// Basis Apply
+// Basis Apply H1
 //------------------------------------------------------------------------------
 static int CeedBasisApply_Ref(CeedBasis basis, CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector U, CeedVector V) {
   Ceed ceed;
diff --git a/examples/Hdiv-mass/Makefile b/examples/Hdiv-mass/Makefile
new file mode 100644
index 0000000000..eecd42bcd2
--- /dev/null
+++ b/examples/Hdiv-mass/Makefile
@@ -0,0 +1,82 @@
+# Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+# All Rights reserved. See files LICENSE and NOTICE for details.
+#
+# This file is part of CEED, a collection of benchmarks, miniapps, software
+# libraries and APIs for efficient high-order finite element and spectral
+# element discretizations for exascale applications. For more information and
+# source code availability see http://github.com/ceed.
+#
+# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+# a collaborative effort of two U.S. Department of Energy organizations (Office
+# of Science and the National Nuclear Security Administration) responsible for
+# the planning and preparation of a capable exascale ecosystem, including
+# software, applications, hardware, advanced system engineering and early
+# testbed platforms, in support of the nation's exascale computing imperative.
+
+COMMON ?= ../../common.mk
+-include $(COMMON)
+
+# Note: PETSC_ARCH can be undefined or empty for installations which do not use
+#       PETSC_ARCH - for example when using PETSc installed through Spack.
+PETSc.pc := $(PETSC_DIR)/$(PETSC_ARCH)/lib/pkgconfig/PETSc.pc
+CEED_DIR ?= ../..
+ceed.pc := $(CEED_DIR)/lib/pkgconfig/ceed.pc
+
+CC = $(call pkgconf, --variable=ccompiler $(PETSc.pc) $(ceed.pc))
+CFLAGS = -std=c99 \
+  $(call pkgconf, --variable=cflags_extra $(PETSc.pc)) \
+  $(call pkgconf, --cflags-only-other $(PETSc.pc)) \
+  $(OPT)
+CPPFLAGS = $(call pkgconf, --cflags-only-I $(PETSc.pc) $(ceed.pc)) \
+  $(call pkgconf, --variable=cflags_dep $(PETSc.pc))
+LDFLAGS = $(call pkgconf, --libs-only-L --libs-only-other $(PETSc.pc) $(ceed.pc))
+LDFLAGS += $(patsubst -L%, $(call pkgconf, --variable=ldflag_rpath $(PETSc.pc))%, $(call pkgconf, --libs-only-L $(PETSc.pc) $(ceed.pc)))
+LDLIBS = $(call pkgconf, --libs-only-l $(PETSc.pc) $(ceed.pc)) -lm
+
+OBJDIR := build
+SRCDIR := src
+PROBLEMDIR := problems
+
+src.c := main.c $(sort $(wildcard $(PROBLEMDIR)/*.c)) $(sort $(wildcard $(SRCDIR)/*.c))
+src.o = $(src.c:%.c=$(OBJDIR)/%.o)
+
+all: main
+
+main: $(src.o) | $(PETSc.pc) $(ceed.pc)
+	$(call quiet,LINK.o) $(CEED_LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@
+
+.SECONDEXPANSION: # to expand $$(@D)/.DIR
+%/.DIR :
+	@mkdir -p $(@D)
+	@touch $@
+
+# Quiet, color output
+quiet ?= $($(1))
+
+$(OBJDIR)/%.o : %.c | $$(@D)/.DIR
+	$(call quiet,CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $(abspath $<)
+
+# Rules for building the examples
+#%: %.c
+
+print: $(PETSc.pc) $(ceed.pc)
+	$(info CC      : $(CC))
+	$(info CFLAGS  : $(CFLAGS))
+	$(info CPPFLAGS: $(CPPFLAGS))
+	$(info LDFLAGS : $(LDFLAGS))
+	$(info LDLIBS  : $(LDLIBS))
+	@true
+
+clean:
+	$(RM) -r $(OBJDIR) main *.vtu *.csv
+
+$(PETSc.pc):
+	$(if $(wildcard $@),,$(error \
+	  PETSc config not found at $@. Please set PETSC_DIR and PETSC_ARCH))
+
+.PHONY: all print clean
+
+pkgconf = $(shell pkg-config $1 | sed -e 's/^"//g' -e 's/"$$//g')
+
+-include $(src.o:%.o=%.d)
diff --git a/examples/Hdiv-mass/basis/Hdiv-hex.h b/examples/Hdiv-mass/basis/Hdiv-hex.h
new file mode 100644
index 0000000000..9259bede85
--- /dev/null
+++ b/examples/Hdiv-mass/basis/Hdiv-hex.h
@@ -0,0 +1,156 @@
+// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+// All Rights reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+// To see how the nodal basis is constructed visit:
+// https://github.com/rezgarshakeri/H-div-Tests
+int NodalHdivBasisHex(CeedScalar *x, CeedScalar *Bx, CeedScalar *By, CeedScalar *Bz) {
+  Bx[0] = 0.0625 * x[0] * x[0] - 0.0625;
+  By[0] = -0.0625 * x[0] * x[1] * x[1] + 0.0625 * x[0] + 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[0] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bx[1] = 0.0625 - 0.0625 * x[0] * x[0];
+  By[1] = 0.0625 * x[0] * x[1] * x[1] - 0.0625 * x[0] + 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[1] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] - 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bx[2] = 0.0625 * x[0] * x[0] - 0.0625;
+  By[2] = 0.0625 * x[0] * x[1] * x[1] - 0.0625 * x[0] - 0.0625 * x[1] * x[1] + 0.0625;
+  Bz[2] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] - 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bx[3] = 0.0625 - 0.0625 * x[0] * x[0];
+  By[3] = -0.0625 * x[0] * x[1] * x[1] + 0.0625 * x[0] - 0.0625 * x[1] * x[1] + 0.0625;
+  Bz[3] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] - 0.125 * x[0] + 0.125 * x[1] * x[2] - 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bx[4] = 0.0625 * x[0] * x[0] - 0.0625;
+  By[4] = -0.0625 * x[0] * x[1] * x[1] + 0.0625 * x[0] + 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[4] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] - 0.125 * x[0] - 0.125 * x[1] * x[2] - 0.125 * x[1] + 0.125 * x[2] +
+          0.125;
+  Bx[5] = 0.0625 - 0.0625 * x[0] * x[0];
+  By[5] = 0.0625 * x[0] * x[1] * x[1] - 0.0625 * x[0] + 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[5] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] - 0.125 * x[1] + 0.125 * x[2] +
+          0.125;
+  Bx[6] = 0.0625 * x[0] * x[0] - 0.0625;
+  By[6] = 0.0625 * x[0] * x[1] * x[1] - 0.0625 * x[0] - 0.0625 * x[1] * x[1] + 0.0625;
+  Bz[6] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] - 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] +
+          0.125;
+  Bx[7] = 0.0625 - 0.0625 * x[0] * x[0];
+  By[7] = -0.0625 * x[0] * x[1] * x[1] + 0.0625 * x[0] - 0.0625 * x[1] * x[1] + 0.0625;
+  Bz[7] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] +
+          0.125;
+  Bx[8] = 0.0625 * x[0] * x[0] * x[2] - 0.0625 * x[0] * x[0] - 0.0625 * x[2] + 0.0625;
+  By[8] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] - 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bz[8] = 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[9] = -0.0625 * x[0] * x[0] * x[2] + 0.0625 * x[0] * x[0] + 0.0625 * x[2] - 0.0625;
+  By[9] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bz[9]  = 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[10] = -0.0625 * x[0] * x[0] * x[2] - 0.0625 * x[0] * x[0] + 0.0625 * x[2] + 0.0625;
+  By[10] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] - 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] - 0.125 * x[2] -
+           0.125;
+  Bz[10] = 0.0625 - 0.0625 * x[2] * x[2];
+  Bx[11] = 0.0625 * x[0] * x[0] * x[2] + 0.0625 * x[0] * x[0] - 0.0625 * x[2] - 0.0625;
+  By[11] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] -
+           0.125 * x[2] - 0.125;
+  Bz[11] = 0.0625 - 0.0625 * x[2] * x[2];
+  Bx[12] = 0.0625 * x[0] * x[0] * x[2] - 0.0625 * x[0] * x[0] - 0.0625 * x[2] + 0.0625;
+  By[12] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] -
+           0.125 * x[2] + 0.125;
+  Bz[12] = 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[13] = -0.0625 * x[0] * x[0] * x[2] + 0.0625 * x[0] * x[0] + 0.0625 * x[2] - 0.0625;
+  By[13] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] - 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] - 0.125 * x[2] +
+           0.125;
+  Bz[13] = 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[14] = -0.0625 * x[0] * x[0] * x[2] - 0.0625 * x[0] * x[0] + 0.0625 * x[2] + 0.0625;
+  By[14] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] +
+           0.125;
+  Bz[14] = 0.0625 - 0.0625 * x[2] * x[2];
+  Bx[15] = 0.0625 * x[0] * x[0] * x[2] + 0.0625 * x[0] * x[0] - 0.0625 * x[2] - 0.0625;
+  By[15] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] - 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] +
+           0.125 * x[2] + 0.125;
+  Bz[15] = 0.0625 - 0.0625 * x[2] * x[2];
+  Bx[16] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] - 0.125 * x[1] - 0.125 * x[2] +
+           0.125;
+  By[16] = 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[16] = -0.0625 * x[1] * x[2] * x[2] + 0.0625 * x[1] + 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[17] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] -
+           0.125 * x[2] + 0.125;
+  By[17] = 0.0625 - 0.0625 * x[1] * x[1];
+  Bz[17] = 0.0625 * x[1] * x[2] * x[2] - 0.0625 * x[1] + 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[18] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] - 0.125 * x[1] +
+           0.125 * x[2] + 0.125;
+  By[18] = 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[18] = 0.0625 * x[1] * x[2] * x[2] - 0.0625 * x[1] - 0.0625 * x[2] * x[2] + 0.0625;
+  Bx[19] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] +
+           0.125;
+  By[19] = 0.0625 - 0.0625 * x[1] * x[1];
+  Bz[19] = -0.0625 * x[1] * x[2] * x[2] + 0.0625 * x[1] - 0.0625 * x[2] * x[2] + 0.0625;
+  Bx[20] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+           0.125;
+  By[20] = 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[20] = -0.0625 * x[1] * x[2] * x[2] + 0.0625 * x[1] + 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[21] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] - 0.125 * x[1] +
+           0.125 * x[2] - 0.125;
+  By[21] = 0.0625 - 0.0625 * x[1] * x[1];
+  Bz[21] = 0.0625 * x[1] * x[2] * x[2] - 0.0625 * x[1] + 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[22] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] -
+           0.125 * x[2] - 0.125;
+  By[22] = 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[22] = 0.0625 * x[1] * x[2] * x[2] - 0.0625 * x[1] - 0.0625 * x[2] * x[2] + 0.0625;
+  Bx[23] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] - 0.125 * x[1] - 0.125 * x[2] -
+           0.125;
+  By[23] = 0.0625 - 0.0625 * x[1] * x[1];
+  Bz[23] = -0.0625 * x[1] * x[2] * x[2] + 0.0625 * x[1] - 0.0625 * x[2] * x[2] + 0.0625;
+  return 0;
+}
+static void HdivBasisHex(CeedInt Q, CeedScalar *q_ref, CeedScalar *q_weights, CeedScalar *interp, CeedScalar *div, CeedQuadMode quad_mode) {
+  // Get 1D quadrature on [-1,1]
+  CeedScalar q_ref_1d[Q], q_weight_1d[Q];
+  switch (quad_mode) {
+    case CEED_GAUSS:
+      CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+    case CEED_GAUSS_LOBATTO:
+      CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+  }
+
+  // Divergence operator; Divergence of nodal basis for ref element
+  CeedScalar D = 0.125;
+  // Loop over quadrature points
+  CeedScalar Bx[24], By[24], Bz[24];
+  CeedScalar x[3];
+  for (CeedInt k = 0; k < Q; k++) {
+    for (CeedInt i = 0; i < Q; i++) {
+      for (CeedInt j = 0; j < Q; j++) {
+        CeedInt k1            = Q * Q * k + Q * i + j;
+        q_ref[k1 + 0 * Q * Q] = q_ref_1d[j];
+        q_ref[k1 + 1 * Q * Q] = q_ref_1d[i];
+        q_ref[k1 + 2 * Q * Q] = q_ref_1d[k];
+        q_weights[k1]         = q_weight_1d[j] * q_weight_1d[i] * q_weight_1d[k];
+        x[0]                  = q_ref_1d[j];
+        x[1]                  = q_ref_1d[i];
+        x[2]                  = q_ref_1d[k];
+        NodalHdivBasisHex(x, Bx, By, Bz);
+        for (CeedInt d = 0; d < 24; d++) {
+          interp[k1 * 24 + d]                  = Bx[d];
+          interp[k1 * 24 + d + 24 * Q * Q * Q] = By[d];
+          interp[k1 * 24 + d + 48 * Q * Q * Q] = Bz[d];
+          div[k1 * 24 + d]                     = D;
+        }
+      }
+    }
+  }
+}
diff --git a/examples/Hdiv-mass/basis/Hdiv-quad.h b/examples/Hdiv-mass/basis/Hdiv-quad.h
new file mode 100644
index 0000000000..4bf155fa0a
--- /dev/null
+++ b/examples/Hdiv-mass/basis/Hdiv-quad.h
@@ -0,0 +1,82 @@
+// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+// All Rights reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+// Hdiv basis for quadrilateral element in 2D
+// Local numbering is as follow (each edge has 2 vector dof)
+//     b4     b5
+//    2---------3
+//  b7|         |b3
+//    |         |
+//  b6|         |b2
+//    0---------1
+//     b0     b1
+// Bx[0-->7] = b0_x-->b7_x, By[0-->7] = b0_y-->b7_y
+// To see how the nodal basis is constructed visit:
+// https://github.com/rezgarshakeri/H-div-Tests
+int NodalHdivBasisQuad(CeedScalar *x, CeedScalar *Bx, CeedScalar *By) {
+  Bx[0] = 0.125 * x[0] * x[0] - 0.125;
+  By[0] = -0.25 * x[0] * x[1] + 0.25 * x[0] + 0.25 * x[1] - 0.25;
+  Bx[1] = 0.125 - 0.125 * x[0] * x[0];
+  By[1] = 0.25 * x[0] * x[1] - 0.25 * x[0] + 0.25 * x[1] - 0.25;
+  Bx[2] = -0.25 * x[0] * x[1] + 0.25 * x[0] - 0.25 * x[1] + 0.25;
+  By[2] = 0.125 * x[1] * x[1] - 0.125;
+  Bx[3] = 0.25 * x[0] * x[1] + 0.25 * x[0] + 0.25 * x[1] + 0.25;
+  By[3] = 0.125 - 0.125 * x[1] * x[1];
+  Bx[4] = 0.125 * x[0] * x[0] - 0.125;
+  By[4] = -0.25 * x[0] * x[1] - 0.25 * x[0] + 0.25 * x[1] + 0.25;
+  Bx[5] = 0.125 - 0.125 * x[0] * x[0];
+  By[5] = 0.25 * x[0] * x[1] + 0.25 * x[0] + 0.25 * x[1] + 0.25;
+  Bx[6] = -0.25 * x[0] * x[1] + 0.25 * x[0] + 0.25 * x[1] - 0.25;
+  By[6] = 0.125 * x[1] * x[1] - 0.125;
+  Bx[7] = 0.25 * x[0] * x[1] + 0.25 * x[0] - 0.25 * x[1] - 0.25;
+  By[7] = 0.125 - 0.125 * x[1] * x[1];
+  return 0;
+}
+static void HdivBasisQuad(CeedInt Q, CeedScalar *q_ref, CeedScalar *q_weights, CeedScalar *interp, CeedScalar *div, CeedQuadMode quad_mode) {
+  // Get 1D quadrature on [-1,1]
+  CeedScalar q_ref_1d[Q], q_weight_1d[Q];
+  switch (quad_mode) {
+    case CEED_GAUSS:
+      CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+    case CEED_GAUSS_LOBATTO:
+      CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+  }
+
+  // Divergence operator; Divergence of nodal basis for ref element
+  CeedScalar D = 0.25;
+  // Loop over quadrature points
+  CeedScalar Bx[8], By[8];
+  CeedScalar x[2];
+
+  for (CeedInt i = 0; i < Q; i++) {
+    for (CeedInt j = 0; j < Q; j++) {
+      CeedInt k1        = Q * i + j;
+      q_ref[k1]         = q_ref_1d[j];
+      q_ref[k1 + Q * Q] = q_ref_1d[i];
+      q_weights[k1]     = q_weight_1d[j] * q_weight_1d[i];
+      x[0]              = q_ref_1d[j];
+      x[1]              = q_ref_1d[i];
+      NodalHdivBasisQuad(x, Bx, By);
+      for (CeedInt k = 0; k < 8; k++) {
+        interp[k1 * 8 + k]             = Bx[k];
+        interp[k1 * 8 + k + 8 * Q * Q] = By[k];
+        div[k1 * 8 + k]                = D;
+      }
+    }
+  }
+}
diff --git a/examples/Hdiv-mass/basis/L2-P0.h b/examples/Hdiv-mass/basis/L2-P0.h
new file mode 100644
index 0000000000..6149d4a34d
--- /dev/null
+++ b/examples/Hdiv-mass/basis/L2-P0.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+// All Rights reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+// Build L2 constant basis
+
+static void L2BasisP0(CeedInt dim, CeedInt Q, CeedScalar *q_ref, CeedScalar *q_weights, CeedScalar *interp, CeedQuadMode quad_mode) {
+  // Get 1D quadrature on [-1,1]
+  CeedScalar q_ref_1d[Q], q_weight_1d[Q];
+  switch (quad_mode) {
+    case CEED_GAUSS:
+      CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+    case CEED_GAUSS_LOBATTO:
+      CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+  }
+
+  // P0 L2 basis is just a constant
+  CeedScalar P0 = 1.0;
+  // Loop over quadrature points
+  if (dim == 2) {
+    for (CeedInt i = 0; i < Q; i++) {
+      for (CeedInt j = 0; j < Q; j++) {
+        CeedInt k1        = Q * i + j;
+        q_ref[k1]         = q_ref_1d[j];
+        q_ref[k1 + Q * Q] = q_ref_1d[i];
+        q_weights[k1]     = q_weight_1d[j] * q_weight_1d[i];
+        interp[k1]        = P0;
+      }
+    }
+  } else {
+    for (CeedInt k = 0; k < Q; k++) {
+      for (CeedInt i = 0; i < Q; i++) {
+        for (CeedInt j = 0; j < Q; j++) {
+          CeedInt k1            = Q * Q * k + Q * i + j;
+          q_ref[k1 + 0 * Q * Q] = q_ref_1d[j];
+          q_ref[k1 + 1 * Q * Q] = q_ref_1d[i];
+          q_ref[k1 + 2 * Q * Q] = q_ref_1d[k];
+          q_weights[k1]         = q_weight_1d[j] * q_weight_1d[i] * q_weight_1d[k];
+          interp[k1]            = P0;
+        }
+      }
+    }
+  }
+}
diff --git a/examples/Hdiv-mass/conv_rate.py b/examples/Hdiv-mass/conv_rate.py
new file mode 100644
index 0000000000..18aff73f3c
--- /dev/null
+++ b/examples/Hdiv-mass/conv_rate.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2017, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+# All Rights reserved. See files LICENSE and NOTICE for details.
+#
+# This file is part of CEED, a collection of benchmarks, miniapps, software
+# libraries and APIs for efficient high-order finite element and spectral
+# element discretizations for exascale applications. For more information and
+# source code availability see http://github.com/ceed.
+#
+# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+# a collaborative effort of two U.S. Department of Energy organizations (Office
+# of Science and the National Nuclear Security Administration) responsible for
+# the planning and preparation of a capable exascale ecosystem, including
+# software, applications, hardware, advanced system engineering and early
+# testbed platforms, in support of the nation's exascale computing imperative.
+
+# After ./conv_test.sh you can get the table of convergence order by
+# python conv_rate.py -f conv_test_result.csv
+
+import pandas as pd
+import argparse
+from pylab import *
+from matplotlib import use
+
+
+def convergence_rate():
+    # Define argparse for the input variables
+    parser = argparse.ArgumentParser(description='Get input arguments')
+    parser.add_argument('-f',
+                        dest='conv_result_file',
+                        type=str,
+                        required=True,
+                        help='Path to the CSV file')
+    args = parser.parse_args()
+    conv_result_file = args.conv_result_file
+
+    # Load the data
+    data = pd.read_csv(conv_result_file)
+    fig, ax = plt.subplots()
+
+    data = data.sort_values('run')
+
+    E_u = data['error_u']
+    h = 1/data['mesh_res']
+    N = data['mesh_res']
+    conv_u = []
+    conv_u.append(0)
+
+    for i in range(1,len(E_u)):
+        conv_u.append(log10(E_u[i]/E_u[i-1])/log10(h[i]/h[i-1]))
+
+    result = {'Number of element/direction':N, 'convergence order of u':conv_u}
+    df = pd.DataFrame(result)
+    print(df)
+
+
+if __name__ == "__main__":
+    convergence_rate()
diff --git a/examples/Hdiv-mass/conv_test.sh b/examples/Hdiv-mass/conv_test.sh
new file mode 100755
index 0000000000..e13f7161b4
--- /dev/null
+++ b/examples/Hdiv-mass/conv_test.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# Copyright (c) 2017, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+# All Rights reserved. See files LICENSE and NOTICE for details.
+#
+# This file is part of CEED, a collection of benchmarks, miniapps, software
+# libraries and APIs for efficient high-order finite element and spectral
+# element discretizations for exascale applications. For more information and
+# source code availability see http://github.com/ceed.
+#
+# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+# a collaborative effort of two U.S. Department of Energy organizations (Office
+# of Science and the National Nuclear Security Administration) responsible for
+# the planning and preparation of a capable exascale ecosystem, including
+# software, applications, hardware, advanced system engineering and early
+# testbed platforms, in support of the nation's exascale computing imperative.
+
+# After make the problem, you can run convergence test by: ./conv_test.sh -d 2 (or -d 3)
+# Reading arguments with getopts options
+while getopts d: flag
+do
+    case "${flag}" in
+        d) dim=${OPTARG};;
+    esac
+done
+echo "Running convergence test in ${dim}D for Projection problem in H(div) space";
+
+declare -A run_flags
+    #run_flags[pc_type]=svd
+    run_flags[ceed]=/cpu/self/ref/serial
+    if [[ $dim -eq 2 ]];
+    then
+        run_flags[problem]=mass2d
+        run_flags[dm_plex_dim]=$dim
+        run_flags[dm_plex_box_faces]=2,2
+        run_flags[dm_plex_box_lower]=0,0
+        run_flags[dm_plex_box_upper]=1,1
+    else
+        run_flags[problem]=mass3d
+        run_flags[dm_plex_dim]=$dim
+        run_flags[dm_plex_box_faces]=2,2,2
+    fi
+
+declare -A test_flags
+    test_flags[res_start]=4
+    test_flags[res_stride]=2
+    test_flags[res_end]=12
+
+file_name=conv_test_result.csv
+
+echo "run,mesh_res,error_u" > $file_name
+
+i=0
+
+for ((res=${test_flags[res_start]}; res<=${test_flags[res_end]}; res+=${test_flags[res_stride]})); do
+    if [[ $dim -eq 2 ]]; then
+        run_flags[dm_plex_box_faces]=$res,$res
+    else
+        run_flags[dm_plex_box_faces]=$res,$res,$res
+    fi
+    args=''
+    for arg in "${!run_flags[@]}"; do
+        if ! [[ -z ${run_flags[$arg]} ]]; then
+            args="$args -$arg ${run_flags[$arg]}"
+        fi
+    done
+    ./main $args | grep "L2 error of u" | awk -v i="$i" -v res="$res" '{ printf "%d,%d,%e\n", i, res, $6}' >> $file_name
+    i=$((i+1))
+done
+
+python conv_rate.py -f conv_test_result.csv
\ No newline at end of file
diff --git a/examples/Hdiv-mass/convrate_mass.png b/examples/Hdiv-mass/convrate_mass.png
new file mode 100644
index 0000000000..51859ba674
Binary files /dev/null and b/examples/Hdiv-mass/convrate_mass.png differ
diff --git a/examples/Hdiv-mass/include/cl-options.h b/examples/Hdiv-mass/include/cl-options.h
new file mode 100644
index 0000000000..79f4fb51bc
--- /dev/null
+++ b/examples/Hdiv-mass/include/cl-options.h
@@ -0,0 +1,9 @@
+#ifndef cloptions_h
+#define cloptions_h
+
+#include "structs.h"
+
+// Process general command line options
+PetscErrorCode ProcessCommandLineOptions(AppCtx app_ctx);
+
+#endif  // cloptions_h
diff --git a/examples/Hdiv-mass/include/post-processing.h b/examples/Hdiv-mass/include/post-processing.h
new file mode 100644
index 0000000000..6338a2f2c4
--- /dev/null
+++ b/examples/Hdiv-mass/include/post-processing.h
@@ -0,0 +1,12 @@
+#ifndef post_processing_h
+#define post_processing_h
+
+#include <ceed.h>
+#include <petsc.h>
+
+#include "setup-fe.h"
+#include "structs.h"
+
+PetscErrorCode PrintOutput(DM dm, Ceed ceed, AppCtx app_ctx, KSP ksp, Vec X, CeedScalar l2_error_u);
+
+#endif  // post_processing_h
\ No newline at end of file
diff --git a/examples/Hdiv-mass/include/register-problem.h b/examples/Hdiv-mass/include/register-problem.h
new file mode 100644
index 0000000000..ab15dfbe50
--- /dev/null
+++ b/examples/Hdiv-mass/include/register-problem.h
@@ -0,0 +1,18 @@
+#ifndef register_problem_h
+#define register_problem_h
+
+#include "structs.h"
+
+// Register problems to be available on the command line
+PetscErrorCode RegisterProblems_Hdiv(AppCtx app_ctx);
+
+// -----------------------------------------------------------------------------
+// Set up problems function prototype
+// -----------------------------------------------------------------------------
+// 1) poisson-quad2d
+PetscErrorCode Hdiv_POISSON_MASS2D(ProblemData problem_data, void *ctx);
+
+// 2) poisson-hex3d
+PetscErrorCode Hdiv_POISSON_MASS3D(ProblemData problem_data, void *ctx);
+
+#endif  // register_problem_h
diff --git a/examples/Hdiv-mass/include/setup-dm.h b/examples/Hdiv-mass/include/setup-dm.h
new file mode 100644
index 0000000000..192db3d16d
--- /dev/null
+++ b/examples/Hdiv-mass/include/setup-dm.h
@@ -0,0 +1,16 @@
+#ifndef setupdm_h
+#define setupdm_h
+
+#include <ceed.h>
+#include <petsc.h>
+#include <petscdmplex.h>
+#include <petscsys.h>
+
+#include "structs.h"
+
+// ---------------------------------------------------------------------------
+// Create DM
+// ---------------------------------------------------------------------------
+PetscErrorCode CreateDM(MPI_Comm comm, Ceed ceed, DM *dm);
+
+#endif  // setupdm_h
diff --git a/examples/Hdiv-mass/include/setup-fe.h b/examples/Hdiv-mass/include/setup-fe.h
new file mode 100644
index 0000000000..f30914931b
--- /dev/null
+++ b/examples/Hdiv-mass/include/setup-fe.h
@@ -0,0 +1,20 @@
+#ifndef setupfe_h
+#define setupfe_h
+
+#include <ceed.h>
+#include <petsc.h>
+#include <petscdmplex.h>
+#include <petscsys.h>
+
+#include "structs.h"
+
+// ---------------------------------------------------------------------------
+// Setup H(div) FE space
+// ---------------------------------------------------------------------------
+CeedMemType      MemTypeP2C(PetscMemType mtype);
+PetscErrorCode   SetupFEHdiv(AppCtx app_ctx, ProblemData problem_data, DM dm);
+CeedElemTopology ElemTopologyP2C(DMPolytopeType cell_type);
+PetscInt         Involute(PetscInt i);
+PetscErrorCode   CreateRestrictionFromPlex(Ceed ceed, DM dm, CeedInt height, DMLabel domain_label, CeedInt value, CeedElemRestriction *elem_restr);
+PetscErrorCode   CreateRestrictionFromPlexOriented(Ceed ceed, DM dm, CeedInt P, CeedElemRestriction *elem_restr_u, CeedElemRestriction *elem_restr_p);
+#endif  // setupfe_h
diff --git a/examples/Hdiv-mass/include/setup-libceed.h b/examples/Hdiv-mass/include/setup-libceed.h
new file mode 100644
index 0000000000..c28c543108
--- /dev/null
+++ b/examples/Hdiv-mass/include/setup-libceed.h
@@ -0,0 +1,10 @@
+#ifndef setuplibceed_h
+#define setuplibceed_h
+
+#include "setup-fe.h"
+#include "structs.h"
+
+// Destroy libCEED objects
+PetscErrorCode CeedDataDestroy(CeedData ceed_data);
+PetscErrorCode SetupLibceed(DM dm, Ceed ceed, AppCtx app_ctx, ProblemData problem_data, CeedData ceed_data, CeedVector rhs_ceed);
+#endif  // setuplibceed_h
diff --git a/examples/Hdiv-mass/include/setup-matops.h b/examples/Hdiv-mass/include/setup-matops.h
new file mode 100644
index 0000000000..4240f28ef5
--- /dev/null
+++ b/examples/Hdiv-mass/include/setup-matops.h
@@ -0,0 +1,13 @@
+#ifndef setup_matops_h
+#define setup_matops_h
+
+#include <ceed.h>
+#include <petsc.h>
+
+#include "setup-fe.h"
+#include "structs.h"
+
+PetscErrorCode ApplyLocalCeedOp(Vec X, Vec Y, OperatorApplyContext op_apply_ctx);
+PetscErrorCode ApplyAddLocalCeedOp(Vec X, Vec Y, OperatorApplyContext op_apply_ctx);
+
+#endif  // setup_matops_h
\ No newline at end of file
diff --git a/examples/Hdiv-mass/include/setup-solvers.h b/examples/Hdiv-mass/include/setup-solvers.h
new file mode 100644
index 0000000000..71110cb53b
--- /dev/null
+++ b/examples/Hdiv-mass/include/setup-solvers.h
@@ -0,0 +1,16 @@
+#ifndef setup_solvers_h
+#define setup_solvers_h
+
+#include <ceed.h>
+#include <petsc.h>
+
+#include "petscvec.h"
+#include "structs.h"
+
+PetscErrorCode SetupResidualOperatorCtx(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_residual);
+PetscErrorCode SetupErrorOperatorCtx(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_error_u);
+PetscErrorCode ApplyMatOp(Mat A, Vec X, Vec Y);
+PetscErrorCode PDESolver(CeedData ceed_data, AppCtx app_ctx, KSP ksp, Vec rhs, Vec *X);
+PetscErrorCode ComputeL2Error(Vec X, PetscScalar *l2_error, OperatorApplyContext op_error_ctx);
+PetscErrorCode CtxVecDestroy(ProblemData problem_data, AppCtx app_ctx);
+#endif  // setup_solvers_h
\ No newline at end of file
diff --git a/examples/Hdiv-mass/include/structs.h b/examples/Hdiv-mass/include/structs.h
new file mode 100644
index 0000000000..a39fb829b0
--- /dev/null
+++ b/examples/Hdiv-mass/include/structs.h
@@ -0,0 +1,51 @@
+#ifndef structs_h
+#define structs_h
+
+#include <ceed.h>
+#include <petsc.h>
+
+// PETSc operator contexts
+typedef struct OperatorApplyContext_ *OperatorApplyContext;
+struct OperatorApplyContext_ {
+  MPI_Comm     comm;
+  DM           dm;
+  Vec          X_loc, Y_loc;
+  CeedVector   x_ceed, y_ceed;
+  CeedOperator op_apply;
+  Ceed         ceed;
+};
+
+// Application context from user command line options
+typedef struct AppCtx_ *AppCtx;
+struct AppCtx_ {
+  char     ceed_resource[PETSC_MAX_PATH_LEN];  // libCEED backend
+  MPI_Comm comm;
+  // libCEED arguments
+  PetscInt degree;
+  PetscInt q_extra;
+  // Problem type arguments
+  PetscFunctionList    problems;
+  char                 problem_name[PETSC_MAX_PATH_LEN];
+  OperatorApplyContext ctx_residual, ctx_error_u;
+};
+
+// libCEED data struct
+typedef struct CeedData_ *CeedData;
+struct CeedData_ {
+  CeedBasis           basis_x, basis_u, basis_p;
+  CeedElemRestriction elem_restr_x, elem_restr_u, elem_restr_u_i, elem_restr_p;
+  CeedQFunction       qf_residual, qf_error;
+  CeedOperator        op_residual, op_error;
+  CeedVector          x_ceed, y_ceed;
+};
+
+// Problem specific data
+typedef struct ProblemData_ *ProblemData;
+struct ProblemData_ {
+  CeedQFunctionUser setup_rhs, residual, setup_error;
+  const char       *setup_rhs_loc, *residual_loc, *setup_error_loc;
+  CeedQuadMode      quadrature_mode;
+  CeedInt           elem_node, dim;
+};
+
+#endif  // structs_h
\ No newline at end of file
diff --git a/examples/Hdiv-mass/main.c b/examples/Hdiv-mass/main.c
new file mode 100644
index 0000000000..65a1cb75a8
--- /dev/null
+++ b/examples/Hdiv-mass/main.c
@@ -0,0 +1,171 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+//                        libCEED + PETSc Example: Projection problem in H(div) space
+//
+// The code uses higher level communication protocols in DMPlex.
+//
+// Build with: make
+// Run with:
+//          ./main pc_type svd
+//          ./main -pc_type svd -problem mass2d -dm_plex_dim 2 -dm_plex_box_faces 4,4
+//          ./main -pc_type svd -problem mass3d -dm_plex_dim 3 -dm_plex_box_faces 4,4,4
+
+#include <stdio.h>
+const char help[] = "Solve Projection problem in H(div) space using PETSc and libCEED\n";
+
+#include "main.h"
+
+int main(int argc, char **argv) {
+  // ---------------------------------------------------------------------------
+  // Initialize PETSc
+  // ---------------------------------------------------------------------------
+  PetscCall(PetscInitialize(&argc, &argv, NULL, help));
+  MPI_Comm comm = PETSC_COMM_WORLD;
+
+  // ---------------------------------------------------------------------------
+  // Create structs
+  // ---------------------------------------------------------------------------
+  AppCtx app_ctx;
+  PetscCall(PetscCalloc1(1, &app_ctx));
+
+  ProblemData problem_data = NULL;
+  PetscCall(PetscCalloc1(1, &problem_data));
+
+  CeedData ceed_data;
+  PetscCall(PetscCalloc1(1, &ceed_data));
+
+  OperatorApplyContext ctx_residual, ctx_error_u;
+  PetscCall(PetscCalloc1(1, &ctx_residual));
+  PetscCall(PetscCalloc1(1, &ctx_error_u));
+  // Context for residual
+  app_ctx->ctx_residual = ctx_residual;
+  // Context for computing error
+  app_ctx->ctx_error_u = ctx_error_u;
+  app_ctx->comm        = comm;
+
+  // ---------------------------------------------------------------------------
+  // Process command line options
+  // ---------------------------------------------------------------------------
+  PetscCall(ProcessCommandLineOptions(app_ctx));
+
+  // ---------------------------------------------------------------------------
+  // Initialize libCEED
+  // ---------------------------------------------------------------------------
+  // -- Initialize backend
+  Ceed ceed;
+  CeedInit(app_ctx->ceed_resource, &ceed);
+
+  // ---------------------------------------------------------------------------
+  // Choose the problem from the list of registered problems
+  // ---------------------------------------------------------------------------
+  PetscCall(RegisterProblems_Hdiv(app_ctx));
+  {
+    PetscErrorCode (*p)(ProblemData, void *);
+    PetscCall(PetscFunctionListFind(app_ctx->problems, app_ctx->problem_name, &p));
+    if (!p) SETERRQ(PETSC_COMM_SELF, 1, "Problem '%s' not found", app_ctx->problem_name);
+    PetscCall((*p)(problem_data, &app_ctx));
+  }
+
+  // ---------------------------------------------------------------------------
+  // Create DM and Setup FE space
+  // ---------------------------------------------------------------------------
+  DM dm;
+  PetscCall(CreateDM(comm, ceed, &dm));
+  PetscCall(SetupFEHdiv(app_ctx, problem_data, dm));
+
+  // ---------------------------------------------------------------------------
+  //  Create zero rhs local vector
+  // ---------------------------------------------------------------------------
+  CeedVector   rhs_ceed;
+  Vec          rhs_loc;
+  PetscScalar *r;
+  PetscMemType mem_type;
+  PetscInt     rhs_l_size;
+  // Create global and local solution vectors
+  PetscCall(DMCreateLocalVector(dm, &rhs_loc));
+  PetscCall(VecGetSize(rhs_loc, &rhs_l_size));
+  PetscCall(VecZeroEntries(rhs_loc));
+  PetscCall(VecGetArrayAndMemType(rhs_loc, &r, &mem_type));
+  CeedVectorCreate(ceed, rhs_l_size, &rhs_ceed);
+  CeedVectorSetArray(rhs_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, r);
+
+  // ---------------------------------------------------------------------------
+  // Setup libCEED qfunctions and operators
+  // ---------------------------------------------------------------------------
+  PetscCall(SetupLibceed(dm, ceed, app_ctx, problem_data, ceed_data, rhs_ceed));
+
+  // ---------------------------------------------------------------------------
+  // Setup rhs global vector entries with the computed rhs_ceed
+  // ---------------------------------------------------------------------------
+  Vec rhs;
+  PetscCall(DMCreateGlobalVector(dm, &rhs));
+  CeedVectorTakeArray(rhs_ceed, MemTypeP2C(mem_type), NULL);
+  PetscCall(VecRestoreArrayAndMemType(rhs_loc, &r));
+  PetscCall(VecZeroEntries(rhs));
+  PetscCall(DMLocalToGlobal(dm, rhs_loc, ADD_VALUES, rhs));
+  CeedVectorDestroy(&rhs_ceed);
+
+  // ---------------------------------------------------------------------------
+  // Solve A*X=rhs; setup-solver.c
+  // ---------------------------------------------------------------------------
+  Vec X;
+  KSP ksp;
+  PetscCall(SetupResidualOperatorCtx(app_ctx->comm, dm, ceed, ceed_data, app_ctx->ctx_residual));
+  // Create global and local solution vectors
+  PetscCall(DMCreateGlobalVector(dm, &X));
+  PetscCall(KSPCreate(app_ctx->comm, &ksp));
+  PetscCall(KSPSetDM(ksp, dm));
+  PetscCall(KSPSetDMActive(ksp, PETSC_FALSE));
+  PetscCall(PDESolver(ceed_data, app_ctx, ksp, rhs, &X));
+
+  // ---------------------------------------------------------------------------
+  // Compute L2 error of mms problem; setup-solver.c
+  // ---------------------------------------------------------------------------
+  CeedScalar l2_error_u = 0.0;
+  PetscCall(SetupErrorOperatorCtx(app_ctx->comm, dm, ceed, ceed_data, app_ctx->ctx_error_u));
+  PetscCall(ComputeL2Error(X, &l2_error_u, app_ctx->ctx_error_u));
+
+  // ---------------------------------------------------------------------------
+  // Print solver iterations and final norms; post-processing
+  // ---------------------------------------------------------------------------
+  PetscCall(PrintOutput(dm, ceed, app_ctx, ksp, X, l2_error_u));
+
+  // ---------------------------------------------------------------------------
+  // Free objects
+  // ---------------------------------------------------------------------------
+
+  // Free PETSc objects
+  PetscCall(DMDestroy(&dm));
+  PetscCall(VecDestroy(&X));
+  PetscCall(VecDestroy(&rhs));
+  PetscCall(VecDestroy(&rhs_loc));
+  PetscCall(KSPDestroy(&ksp));
+  PetscCall(CtxVecDestroy(problem_data, app_ctx));
+  // -- Function list
+  PetscCall(PetscFunctionListDestroy(&app_ctx->problems));
+  // -- Structs
+  PetscCall(CeedDataDestroy(ceed_data));
+  PetscCall(PetscFree(app_ctx));
+  PetscCall(PetscFree(ctx_residual));
+  PetscCall(PetscFree(ctx_error_u));
+  PetscCall(PetscFree(problem_data));
+  // Free libCEED objects
+  CeedVectorDestroy(&rhs_ceed);
+  CeedDestroy(&ceed);
+
+  return PetscFinalize();
+}
diff --git a/examples/Hdiv-mass/main.h b/examples/Hdiv-mass/main.h
new file mode 100644
index 0000000000..f433f47a27
--- /dev/null
+++ b/examples/Hdiv-mass/main.h
@@ -0,0 +1,14 @@
+
+#ifndef MAIN_H
+#define MAIN_H
+
+#include "include/cl-options.h"
+#include "include/post-processing.h"
+#include "include/register-problem.h"
+#include "include/setup-dm.h"
+#include "include/setup-fe.h"
+#include "include/setup-libceed.h"
+#include "include/setup-matops.h"
+#include "include/setup-solvers.h"
+
+#endif  // MAIN_H
\ No newline at end of file
diff --git a/examples/Hdiv-mass/problems/mass2d.c b/examples/Hdiv-mass/problems/mass2d.c
new file mode 100644
index 0000000000..0cdef98f85
--- /dev/null
+++ b/examples/Hdiv-mass/problems/mass2d.c
@@ -0,0 +1,44 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Utility functions for setting up POISSON_QUAD2D
+
+#include "../include/register-problem.h"
+#include "../include/setup-libceed.h"
+#include "../qfunctions/poisson-error2d.h"
+#include "../qfunctions/poisson-mass2d.h"
+#include "../qfunctions/poisson-rhs2d.h"
+
+// Hdiv_POISSON_MASS2D is registered in cl-option.c
+PetscErrorCode Hdiv_POISSON_MASS2D(ProblemData problem_data, void *ctx) {
+  PetscFunctionBeginUser;
+
+  // ------------------------------------------------------
+  //               SET UP POISSON_QUAD2D
+  // ------------------------------------------------------
+  problem_data->dim             = 2;
+  problem_data->elem_node       = 4;
+  problem_data->quadrature_mode = CEED_GAUSS;
+  problem_data->setup_rhs       = SetupRhs2D;
+  problem_data->setup_rhs_loc   = SetupRhs2D_loc;
+  problem_data->residual        = SetupMass2D;
+  problem_data->residual_loc    = SetupMass2D_loc;
+  problem_data->setup_error     = SetupError2D;
+  problem_data->setup_error_loc = SetupError2D_loc;
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mass/problems/mass3d.c b/examples/Hdiv-mass/problems/mass3d.c
new file mode 100644
index 0000000000..b2b847d19f
--- /dev/null
+++ b/examples/Hdiv-mass/problems/mass3d.c
@@ -0,0 +1,42 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+
+#include "../include/register-problem.h"
+#include "../include/setup-libceed.h"
+#include "../qfunctions/poisson-error3d.h"
+#include "../qfunctions/poisson-mass3d.h"
+#include "../qfunctions/poisson-rhs3d.h"
+
+PetscErrorCode Hdiv_POISSON_MASS3D(ProblemData problem_data, void *ctx) {
+  PetscFunctionBeginUser;
+
+  // ------------------------------------------------------
+  //               SET UP POISSON_QUAD3D
+  // ------------------------------------------------------
+  problem_data->dim             = 3;
+  problem_data->elem_node       = 8;
+  problem_data->quadrature_mode = CEED_GAUSS;
+  problem_data->setup_rhs       = SetupRhs3D;
+  problem_data->setup_rhs_loc   = SetupRhs3D_loc;
+  problem_data->residual        = SetupMass3D;
+  problem_data->residual_loc    = SetupMass3D_loc;
+  problem_data->setup_error     = SetupError3D;
+  problem_data->setup_error_loc = SetupError3D_loc;
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mass/problems/register-problem.c b/examples/Hdiv-mass/problems/register-problem.c
new file mode 100644
index 0000000000..0e66ad3a15
--- /dev/null
+++ b/examples/Hdiv-mass/problems/register-problem.c
@@ -0,0 +1,33 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Command line option processing for H(div) example using PETSc
+
+#include "../include/register-problem.h"
+
+// Register problems to be available on the command line
+PetscErrorCode RegisterProblems_Hdiv(AppCtx app_ctx) {
+  app_ctx->problems = NULL;
+
+  PetscFunctionBeginUser;
+  // 1) poisson-quad2d (Hdiv_POISSON_MASS2D is created in poisson-mass2d.c)
+  PetscCall(PetscFunctionListAdd(&app_ctx->problems, "mass2d", Hdiv_POISSON_MASS2D));
+  // 2) poisson-hex3d
+  PetscCall(PetscFunctionListAdd(&app_ctx->problems, "mass3d", Hdiv_POISSON_MASS3D));
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mass/qfunctions/poisson-error2d.h b/examples/Hdiv-mass/qfunctions/poisson-error2d.h
new file mode 100644
index 0000000000..e7f19719dc
--- /dev/null
+++ b/examples/Hdiv-mass/qfunctions/poisson-error2d.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Compute pointwise error of the H(div) example using PETSc
+
+#ifndef ERROR2D_H
+#define ERROR2D_H
+
+#include <math.h>
+
+#include "utils.h"
+// -----------------------------------------------------------------------------
+// Compuet error
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(SetupError2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar(*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[0], (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1],
+        (*target) = in[2], (*w) = in[3];
+  // Outputs
+  CeedScalar(*error) = out[0];
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+    // Compute Piola map:uh = J*u/detJ
+    CeedScalar u1[2] = {u[0][i], u[1][i]}, uh[2];
+    AlphaMatVecMult2x2(1 / det_J, J, u1, uh);
+    // Error
+    CeedScalar err2_ux = (uh[0] - target[i + 0 * Q]) * (uh[0] - target[i + 0 * Q]);
+    CeedScalar err2_uy = (uh[1] - target[i + 1 * Q]) * (uh[1] - target[i + 1 * Q]);
+
+    error[i + 0 * Q] = (err2_ux + err2_uy) * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End ERROR2D_H
diff --git a/examples/Hdiv-mass/qfunctions/poisson-error3d.h b/examples/Hdiv-mass/qfunctions/poisson-error3d.h
new file mode 100644
index 0000000000..b8b09d5025
--- /dev/null
+++ b/examples/Hdiv-mass/qfunctions/poisson-error3d.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Compute pointwise error of the H(div) example using PETSc
+
+#ifndef ERROR3D_H
+#define ERROR3D_H
+
+#include <math.h>
+
+#include "utils.h"
+// -----------------------------------------------------------------------------
+// Compuet error
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(SetupError3D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar(*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[0], (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1],
+        (*target) = in[2], (*w) = in[3];
+  // Outputs
+  CeedScalar(*error) = out[0];
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+    // Compute Piola map:uh = J*u/detJ
+    CeedScalar u1[3] = {u[0][i], u[1][i], u[2][i]}, uh[3];
+    AlphaMatVecMult3x3(1 / det_J, J, u1, uh);
+    // Error
+    CeedScalar err2_ux = (uh[0] - target[i + 0 * Q]) * (uh[0] - target[i + 0 * Q]);
+    CeedScalar err2_uy = (uh[1] - target[i + 1 * Q]) * (uh[1] - target[i + 1 * Q]);
+    CeedScalar err2_uz = (uh[2] - target[i + 2 * Q]) * (uh[2] - target[i + 2 * Q]);
+
+    error[i + 0 * Q] = (err2_ux + err2_uy + err2_uz) * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End ERROR3D_H
diff --git a/examples/Hdiv-mass/qfunctions/poisson-mass2d.h b/examples/Hdiv-mass/qfunctions/poisson-mass2d.h
new file mode 100644
index 0000000000..f9293c39ed
--- /dev/null
+++ b/examples/Hdiv-mass/qfunctions/poisson-mass2d.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Mixed poisson 2D quad element using PETSc
+
+#ifndef POISSON_MASS2D_H
+#define POISSON_MASS2D_H
+
+#include <math.h>
+
+#include "utils.h"
+// -----------------------------------------------------------------------------
+// This QFunction applies the mass operator for a vector field of 2 components.
+//
+// Inputs:
+//   w     - weight of quadrature
+//   J     - dx/dX. x physical coordinate, X reference coordinate [-1,1]^dim
+//   u     - Input basis at quadrature points
+//
+// Output:
+//   v     - Output vector (test functions) at quadrature points
+// Note we need to apply Piola map on the basis, which is J*u/detJ
+// So (v,u) = \int (v^T * u detJ*w) ==> \int (v^T J^T*J*u*w/detJ)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(SetupMass2D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // *INDENT-OFF*
+    // Setup, J = dx/dX
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+
+    CeedScalar u1[2] = {u[0][i], u[1][i]}, v1[2];
+    // *INDENT-ON*
+    // Piola map: J^T*J*u*w/detJ
+    // 1) Compute J^T * J
+    CeedScalar JT_J[2][2];
+    AlphaMatTransposeMatMult2x2(1., J, J, JT_J);
+
+    // 2) Compute J^T*J*u * w /detJ
+    AlphaMatVecMult2x2(w[i] / det_J, JT_J, u1, v1);
+    for (CeedInt k = 0; k < 2; k++) {
+      v[k][i] = v1[k];
+    }
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+
+#endif  // End of POISSON_MASS2D_H
diff --git a/examples/Hdiv-mass/qfunctions/poisson-mass3d.h b/examples/Hdiv-mass/qfunctions/poisson-mass3d.h
new file mode 100644
index 0000000000..1cfe39b8d0
--- /dev/null
+++ b/examples/Hdiv-mass/qfunctions/poisson-mass3d.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Mixed poisson 3D hex element using PETSc
+
+#ifndef POISSON_MASS3D_H
+#define POISSON_MASS3D_H
+
+#include <math.h>
+
+#include "utils.h"
+// -----------------------------------------------------------------------------
+// This QFunction applies the mass operator for a vector field of 2 components.
+//
+// Inputs:
+//   w     - weight of quadrature
+//   J     - dx/dX. x physical coordinate, X reference coordinate [-1,1]^dim
+//   u     - Input basis at quadrature points
+//
+// Output:
+//   v     - Output vector (test functions) at quadrature points
+// Note we need to apply Piola map on the basis, which is J*u/detJ
+// So (v,u) = \int (v^T * u detJ*w) ==> \int (v^T J^T*J*u*w/detJ)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(SetupMass3D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-ON*
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // *INDENT-OFF*
+    // Setup, J = dx/dX
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+    CeedScalar       u1[3] = {u[0][i], u[1][i], u[2][i]}, v1[3];
+    // *INDENT-ON*
+    // Piola map: J^T*J*u*w/detJ
+    // 1) Compute J^T * J
+    CeedScalar JT_J[3][3];
+    AlphaMatTransposeMatMult3x3(1., J, J, JT_J);
+    // 2) Compute J^T*J*u * w /detJ
+    AlphaMatVecMult3x3(w[i] / det_J, JT_J, u1, v1);
+    for (CeedInt k = 0; k < 3; k++) {
+      v[k][i] = v1[k];
+    }
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+
+#endif  // End of POISSON_MASS3D_H
diff --git a/examples/Hdiv-mass/qfunctions/poisson-rhs2d.h b/examples/Hdiv-mass/qfunctions/poisson-rhs2d.h
new file mode 100644
index 0000000000..f3b7e32a74
--- /dev/null
+++ b/examples/Hdiv-mass/qfunctions/poisson-rhs2d.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Mixed poisson 2D quad element using PETSc
+
+#ifndef POISSON_RHS2D_H
+#define POISSON_RHS2D_H
+
+#include <math.h>
+
+#include "utils.h"
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+// -----------------------------------------------------------------------------
+// This QFunction sets up the rhs and true solution for the problem
+// Inputs:
+//   x     - interpolation of the physical coordinate
+//   w     - weight of quadrature
+//   J     - dx/dX. x physical coordinate, X reference coordinate [-1,1]^dim
+//
+// Output:
+//   true_soln - True solution that we use it in poisson-error2d.h
+//               to compute pointwise max error
+//   rhs       - Output vector (test functions) at quadrature points
+// Note we need to apply Piola map on the basis, which is J*u/detJ
+// So (v,ue) = \int (v^T * ue detJ*w) ==> \int (v^T J^T* ue * w)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(SetupRhs2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar(*coords) = in[0], (*w) = in[1], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[2];
+  // Outputs
+  // CeedScalar (*rhs)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  CeedScalar(*true_soln) = out[0], (*rhs) = out[1];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, (x,y) and J = dx/dX
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q];
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    // *INDENT-ON*
+    // Compute J^T*ue
+    CeedScalar ue[2] = {-M_PI * cos(M_PI * x) * sin(M_PI * y), -M_PI * sin(M_PI * x) * cos(M_PI * y)};
+    // CeedScalar ue[2] = {x-y, x+y};
+    CeedScalar rhs1[2];
+    AlphaMatTransposeVecMult2x2(1, J, ue, rhs1);
+
+    // Component 1
+    true_soln[i + 0 * Q] = ue[0];
+    rhs[i + 0 * Q]       = rhs1[0] * w[i];
+    // Component 2
+    true_soln[i + 1 * Q] = ue[1];
+    rhs[i + 1 * Q]       = rhs1[1] * w[i];
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End of POISSON_RHS2D_H
diff --git a/examples/Hdiv-mass/qfunctions/poisson-rhs3d.h b/examples/Hdiv-mass/qfunctions/poisson-rhs3d.h
new file mode 100644
index 0000000000..73e539f80b
--- /dev/null
+++ b/examples/Hdiv-mass/qfunctions/poisson-rhs3d.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Mixed poisson 3D Hex element using PETSc
+
+#ifndef POISSON_RHS3D_H
+#define POISSON_RHS3D_H
+
+#include <math.h>
+
+#include "utils.h"
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+// -----------------------------------------------------------------------------
+// This QFunction sets up the rhs and true solution for the problem
+// Inputs:
+//   x     - interpolation of the physical coordinate
+//   w     - weight of quadrature
+//   J     - dx/dX. x physical coordinate, X reference coordinate [-1,1]^dim
+//
+// Output:
+//   true_soln - True solution that we use it in poisson-error2d.h
+//               to compute pointwise max error
+//   rhs       - Output vector (test functions) at quadrature points
+// Note we need to apply Piola map on the basis, which is J*u/detJ
+// So (v,ue) = \int (v^T * ue detJ*w) ==> \int (v^T J^T* ue * w)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(SetupRhs3D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar(*coords) = in[0], (*w) = in[1], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[2];
+  // Outputs
+  // CeedScalar (*rhs)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  CeedScalar(*true_soln) = out[0], (*rhs) = out[1];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, (x,y,z) and J = dx/dX
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q], z = coords[i + 2 * Q];
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    // *INDENT-ON*
+    CeedScalar ue[3] = {-M_PI * cos(M_PI * x) * sin(M_PI * y) * sin(M_PI * z), -M_PI * sin(M_PI * x) * cos(M_PI * y) * sin(M_PI * z),
+                        -M_PI * sin(M_PI * x) * sin(M_PI * y) * sin(M_PI * z)};
+    // CeedScalar ue[3] = {x,y,z};
+    CeedScalar rhs1[3];
+    AlphaMatTransposeVecMult3x3(1, J, ue, rhs1);
+    // Component 1
+    true_soln[i + 0 * Q] = ue[0];
+    rhs[i + 0 * Q]       = rhs1[0] * w[i];
+    // Component 2
+    true_soln[i + 1 * Q] = ue[1];
+    rhs[i + 1 * Q]       = rhs1[1] * w[i];
+    // Component 3
+    true_soln[i + 2 * Q] = ue[2];
+    rhs[i + 2 * Q]       = rhs1[2] * w[i];
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End of POISSON_RHS3D_H
diff --git a/examples/Hdiv-mass/qfunctions/utils.h b/examples/Hdiv-mass/qfunctions/utils.h
new file mode 100644
index 0000000000..918d9ff953
--- /dev/null
+++ b/examples/Hdiv-mass/qfunctions/utils.h
@@ -0,0 +1,192 @@
+/// @file
+/// Utility helpers QFunction source
+
+#ifndef utils_qf_h
+#define utils_qf_h
+
+#include <math.h>
+
+#include "ceed/ceed-f64.h"
+
+#define PI_DOUBLE 3.14159265358979323846
+
+// -----------------------------------------------------------------------------
+// Compute alpha * A * B = C
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatMatMult3x3(const CeedScalar alpha, const CeedScalar A[3][3], const CeedScalar B[3][3], CeedScalar C[3][3]) {
+  for (CeedInt j = 0; j < 3; j++) {
+    for (CeedInt k = 0; k < 3; k++) {
+      C[j][k] = 0;
+      for (CeedInt m = 0; m < 3; m++) {
+        C[j][k] += alpha * A[j][m] * B[m][k];
+      }
+    }
+  }
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Compute alpha * A^T * B = C
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatTransposeMatMult3x3(const CeedScalar alpha, const CeedScalar A[3][3], const CeedScalar B[3][3],
+                                                      CeedScalar C[3][3]) {
+  for (CeedInt j = 0; j < 3; j++) {
+    for (CeedInt k = 0; k < 3; k++) {
+      C[j][k] = 0;
+      for (CeedInt m = 0; m < 3; m++) {
+        C[j][k] += alpha * A[m][j] * B[m][k];
+      }
+    }
+  }
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Compute determinant of 3x3 matrix
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER CeedScalar MatDet3x3(const CeedScalar A[3][3]) {
+  // Compute det(A)
+  const CeedScalar B11 = A[1][1] * A[2][2] - A[1][2] * A[2][1];
+  const CeedScalar B12 = A[0][2] * A[2][1] - A[0][1] * A[2][2];
+  const CeedScalar B13 = A[0][1] * A[1][2] - A[0][2] * A[1][1];
+  return A[0][0] * B11 + A[1][0] * B12 + A[2][0] * B13;
+};
+
+// -----------------------------------------------------------------------------
+// Compute inverse of 3x3 symmetric matrix
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int MatInverse3x3(const CeedScalar A[3][3], const CeedScalar det_A, CeedScalar A_inv[3][3]) {
+  // Compute A^(-1) : A-Inverse
+  CeedScalar B[6] = {
+      A[1][1] * A[2][2] - A[1][2] * A[2][1], /* *NOPAD* */
+      A[0][0] * A[2][2] - A[0][2] * A[2][0], /* *NOPAD* */
+      A[0][0] * A[1][1] - A[0][1] * A[1][0], /* *NOPAD* */
+      A[0][2] * A[1][0] - A[0][0] * A[1][2], /* *NOPAD* */
+      A[0][1] * A[1][2] - A[0][2] * A[1][1], /* *NOPAD* */
+      A[0][2] * A[2][1] - A[0][1] * A[2][2]  /* *NOPAD* */
+  };
+  CeedScalar A_inv1[6];
+  for (CeedInt m = 0; m < 6; m++) {
+    A_inv1[m] = B[m] / (det_A);
+  }
+  A_inv[0][0] = A_inv1[0];
+  A_inv[0][1] = A_inv1[5];
+  A_inv[0][2] = A_inv1[4];
+  A_inv[1][0] = A_inv1[5];
+  A_inv[1][1] = A_inv1[1];
+  A_inv[1][2] = A_inv1[3];
+  A_inv[2][0] = A_inv1[4];
+  A_inv[2][1] = A_inv1[3];
+  A_inv[2][2] = A_inv1[2];
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute matrix-vector product: alpha*A*u
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatVecMult3x3(const CeedScalar alpha, const CeedScalar A[3][3], const CeedScalar u[3], CeedScalar v[3]) {
+  // Compute v = alpha*A*u
+  for (CeedInt k = 0; k < 3; k++) {
+    v[k] = 0;
+    for (CeedInt m = 0; m < 3; m++) v[k] += A[k][m] * u[m] * alpha;
+  }
+
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute matrix-vector product: alpha*A^T*u
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatTransposeVecMult3x3(const CeedScalar alpha, const CeedScalar A[3][3], const CeedScalar u[3], CeedScalar v[3]) {
+  // Compute v = alpha*A^T*u
+  for (CeedInt k = 0; k < 3; k++) {
+    v[k] = 0;
+    for (CeedInt m = 0; m < 3; m++) v[k] += A[m][k] * u[m] * alpha;
+  }
+
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute alpha * A * B = C
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatMatMult2x2(const CeedScalar alpha, const CeedScalar A[2][2], const CeedScalar B[2][2], CeedScalar C[2][2]) {
+  for (CeedInt j = 0; j < 2; j++) {
+    for (CeedInt k = 0; k < 2; k++) {
+      C[j][k] = 0;
+      for (CeedInt m = 0; m < 2; m++) {
+        C[j][k] += alpha * A[j][m] * B[m][k];
+      }
+    }
+  }
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Compute alpha * A^T * B = C
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatTransposeMatMult2x2(const CeedScalar alpha, const CeedScalar A[2][2], const CeedScalar B[2][2],
+                                                      CeedScalar C[2][2]) {
+  for (CeedInt j = 0; j < 2; j++) {
+    for (CeedInt k = 0; k < 2; k++) {
+      C[j][k] = 0;
+      for (CeedInt m = 0; m < 2; m++) {
+        C[j][k] += alpha * A[m][j] * B[m][k];
+      }
+    }
+  }
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Compute determinant of 2x2 matrix
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER CeedScalar MatDet2x2(const CeedScalar A[2][2]) {
+  // Compute det(A)
+  return A[0][0] * A[1][1] - A[1][0] * A[0][1];
+};
+
+// -----------------------------------------------------------------------------
+// Compute inverse of 2x2 symmetric matrix
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int MatInverse2x2(const CeedScalar A[2][2], const CeedScalar det_A, CeedScalar A_inv[2][2]) {
+  // Compute A^(-1) : A-Inverse
+  A_inv[0][0] = A[1][1] / det_A;
+  A_inv[0][1] = -A[0][1] / det_A;
+  A_inv[1][0] = -A[1][0] / det_A;
+  A_inv[1][1] = A[0][0] / det_A;
+
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute matrix-vector product: alpha*A*u
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatVecMult2x2(const CeedScalar alpha, const CeedScalar A[2][2], const CeedScalar u[2], CeedScalar v[2]) {
+  // Compute v = alpha*A*u
+  for (CeedInt k = 0; k < 2; k++) {
+    v[k] = 0;
+    for (CeedInt m = 0; m < 2; m++) v[k] += A[k][m] * u[m] * alpha;
+  }
+
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute matrix-vector product: alpha*A^T*u
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatTransposeVecMult2x2(const CeedScalar alpha, const CeedScalar A[2][2], const CeedScalar u[2], CeedScalar v[2]) {
+  // Compute v = alpha*A^T*u
+  for (CeedInt k = 0; k < 2; k++) {
+    v[k] = 0;
+    for (CeedInt m = 0; m < 2; m++) v[k] += A[m][k] * u[m] * alpha;
+  }
+
+  return 0;
+};
+
+#endif  // utils_qf_h
diff --git a/examples/Hdiv-mass/src/cl-options.c b/examples/Hdiv-mass/src/cl-options.c
new file mode 100644
index 0000000000..4b785b430a
--- /dev/null
+++ b/examples/Hdiv-mass/src/cl-options.c
@@ -0,0 +1,57 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Command line option processing for H(div) example using PETSc
+
+#include "../include/cl-options.h"
+
+// Process general command line options
+PetscErrorCode ProcessCommandLineOptions(AppCtx app_ctx) {
+  PetscBool problem_flag = PETSC_FALSE;
+  PetscBool ceed_flag    = PETSC_FALSE;
+  PetscFunctionBeginUser;
+
+  PetscOptionsBegin(app_ctx->comm, NULL, "H(div) examples in PETSc with libCEED", NULL);
+
+  PetscCall(PetscOptionsString("-ceed", "CEED resource specifier", NULL, app_ctx->ceed_resource, app_ctx->ceed_resource,
+                               sizeof(app_ctx->ceed_resource), &ceed_flag));
+
+  // Provide default ceed resource if not specified
+  if (!ceed_flag) {
+    const char *ceed_resource = "/cpu/self";
+    strncpy(app_ctx->ceed_resource, ceed_resource, 10);
+  }
+
+  PetscCall(PetscOptionsFList("-problem", "Problem to solve", NULL, app_ctx->problems, app_ctx->problem_name, app_ctx->problem_name,
+                              sizeof(app_ctx->problem_name), &problem_flag));
+
+  app_ctx->degree = 1;
+  PetscCall(PetscOptionsInt("-degree", "Polynomial degree of finite elements", NULL, app_ctx->degree, &app_ctx->degree, NULL));
+
+  app_ctx->q_extra = 0;
+  PetscCall(PetscOptionsInt("-q_extra", "Number of extra quadrature points", NULL, app_ctx->q_extra, &app_ctx->q_extra, NULL));
+
+  // Provide default problem if not specified
+  if (!problem_flag) {
+    const char *problem_name = "mass2d";
+    strncpy(app_ctx->problem_name, problem_name, 16);
+  }
+
+  PetscOptionsEnd();
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mass/src/post-processing.c b/examples/Hdiv-mass/src/post-processing.c
new file mode 100644
index 0000000000..c1281a1146
--- /dev/null
+++ b/examples/Hdiv-mass/src/post-processing.c
@@ -0,0 +1,77 @@
+#include "../include/post-processing.h"
+
+// -----------------------------------------------------------------------------
+// This function print the output
+// -----------------------------------------------------------------------------
+PetscErrorCode PrintOutput(DM dm, Ceed ceed, AppCtx app_ctx, KSP ksp, Vec X, CeedScalar l2_error_u) {
+  PetscFunctionBeginUser;
+
+  const char *used_resource;
+  CeedMemType mem_type_backend;
+  CeedGetResource(ceed, &used_resource);
+  CeedGetPreferredMemType(ceed, &mem_type_backend);
+  char hostname[PETSC_MAX_PATH_LEN];
+  PetscCall(PetscGetHostName(hostname, sizeof hostname));
+  PetscMPIInt comm_size;
+  PetscCall(MPI_Comm_size(app_ctx->comm, &comm_size));
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "\n-- Mixed-Elasticity Example - libCEED + PETSc --\n"
+                        "  MPI:\n"
+                        "    Hostname                                : %s\n"
+                        "    Total ranks                             : %d\n"
+                        "  libCEED:\n"
+                        "    libCEED Backend                         : %s\n"
+                        "    libCEED Backend MemType                 : %s\n",
+                        hostname, comm_size, used_resource, CeedMemTypes[mem_type_backend]));
+
+  MatType mat_type;
+  VecType vec_type;
+  PetscCall(DMGetMatType(dm, &mat_type));
+  PetscCall(DMGetVecType(dm, &vec_type));
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "  PETSc:\n"
+                        "    DM MatType                              : %s\n"
+                        "    DM VecType                              : %s\n",
+                        mat_type, vec_type));
+  PetscInt X_l_size, X_g_size;
+  PetscCall(VecGetSize(X, &X_g_size));
+  PetscCall(VecGetLocalSize(X, &X_l_size));
+  PetscInt c_start, c_end;
+  PetscCall(DMPlexGetHeightStratum(dm, 0, &c_start, &c_end));
+  DMPolytopeType cell_type;
+  PetscCall(DMPlexGetCellType(dm, c_start, &cell_type));
+  CeedElemTopology elem_topo = ElemTopologyP2C(cell_type);
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "  Problem:\n"
+                        "    Problem Name                            : %s\n"
+                        "  Mesh:\n"
+                        "    Local Elements                          : %" PetscInt_FMT "\n"
+                        "    Element topology                        : %s\n",
+                        app_ctx->problem_name, c_end - c_start, CeedElemTopologies[elem_topo]));
+  PetscInt           ksp_its;
+  KSPType            ksp_type;
+  KSPConvergedReason ksp_reason;
+  PetscReal          ksp_rnorm;
+  PC                 pc;
+  PCType             pc_type;
+  PetscCall(KSPGetPC(ksp, &pc));
+  PetscCall(PCGetType(pc, &pc_type));
+  PetscCall(KSPGetType(ksp, &ksp_type));
+  PetscCall(KSPGetConvergedReason(ksp, &ksp_reason));
+  PetscCall(KSPGetIterationNumber(ksp, &ksp_its));
+  PetscCall(KSPGetResidualNorm(ksp, &ksp_rnorm));
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "  KSP:\n"
+                        "    KSP Type                                : %s\n"
+                        "    PC Type                                 : %s\n"
+                        "    KSP Convergence                         : %s\n"
+                        "    Total KSP Iterations                    : %" PetscInt_FMT "\n"
+                        "    Final rnorm                             : %e\n",
+                        ksp_type, pc_type, KSPConvergedReasons[ksp_reason], ksp_its, (double)ksp_rnorm));
+
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "  L2 Error (MMS):\n"
+                        "    L2 error of u                           : %e\n",
+                        (double)l2_error_u));
+  PetscFunctionReturn(0);
+};
\ No newline at end of file
diff --git a/examples/Hdiv-mass/src/setup-dm.c b/examples/Hdiv-mass/src/setup-dm.c
new file mode 100644
index 0000000000..ce925c71a8
--- /dev/null
+++ b/examples/Hdiv-mass/src/setup-dm.c
@@ -0,0 +1,45 @@
+#include "../include/setup-dm.h"
+
+#include "petscerror.h"
+
+// ---------------------------------------------------------------------------
+// Create DM
+// ---------------------------------------------------------------------------
+PetscErrorCode CreateDM(MPI_Comm comm, Ceed ceed, DM *dm) {
+  PetscFunctionBeginUser;
+
+  CeedMemType mem_type_backend;
+  CeedGetPreferredMemType(ceed, &mem_type_backend);
+
+  VecType vec_type = NULL;
+  MatType mat_type = NULL;
+  switch (mem_type_backend) {
+    case CEED_MEM_HOST:
+      vec_type = VECSTANDARD;
+      break;
+    case CEED_MEM_DEVICE: {
+      const char *resolved;
+      CeedGetResource(ceed, &resolved);
+      if (strstr(resolved, "/gpu/cuda")) vec_type = VECCUDA;
+      else if (strstr(resolved, "/gpu/hip/occa")) vec_type = VECSTANDARD;  // https://github.com/CEED/libCEED/issues/678
+      else if (strstr(resolved, "/gpu/hip")) vec_type = VECHIP;
+      else vec_type = VECSTANDARD;
+    }
+  }
+  if (strstr(vec_type, VECCUDA)) mat_type = MATAIJCUSPARSE;
+  else if (strstr(vec_type, VECKOKKOS)) mat_type = MATAIJKOKKOS;
+  else mat_type = MATAIJ;
+
+  // Create DMPLEX
+  PetscCall(DMCreate(comm, dm));
+  PetscCall(DMSetType(*dm, DMPLEX));
+  PetscCall(DMSetMatType(*dm, mat_type));
+  PetscCall(DMSetVecType(*dm, vec_type));
+  // Set Tensor elements
+  PetscCall(PetscOptionsSetValue(NULL, "-dm_plex_simplex", "0"));
+  // Set CL options
+  PetscCall(DMSetFromOptions(*dm));
+  PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view"));
+
+  PetscFunctionReturn(0);
+};
\ No newline at end of file
diff --git a/examples/Hdiv-mass/src/setup-fe.c b/examples/Hdiv-mass/src/setup-fe.c
new file mode 100644
index 0000000000..09bf4d78cd
--- /dev/null
+++ b/examples/Hdiv-mass/src/setup-fe.c
@@ -0,0 +1,149 @@
+#include "../include/setup-fe.h"
+
+#include "petscerror.h"
+// -----------------------------------------------------------------------------
+// Convert PETSc MemType to libCEED MemType
+// -----------------------------------------------------------------------------
+CeedMemType MemTypeP2C(PetscMemType mem_type) { return PetscMemTypeDevice(mem_type) ? CEED_MEM_DEVICE : CEED_MEM_HOST; }
+
+// ---------------------------------------------------------------------------
+// Setup FE for H(div) space
+// ---------------------------------------------------------------------------
+PetscErrorCode SetupFEHdiv(AppCtx app_ctx, ProblemData problem_data, DM dm) {
+  PetscSection sec;
+  PetscInt     dofs_per_face;
+  PetscInt     p_start, p_end;
+  PetscInt     c_start, c_end;  // cells
+  PetscInt     f_start, f_end;  // faces
+  PetscInt     v_start, v_end;  // vertices
+
+  PetscFunctionBeginUser;
+
+  // Get plex limits
+  PetscCall(DMPlexGetChart(dm, &p_start, &p_end));
+  PetscCall(DMPlexGetHeightStratum(dm, 0, &c_start, &c_end));
+  PetscCall(DMPlexGetHeightStratum(dm, 1, &f_start, &f_end));
+  PetscCall(DMPlexGetDepthStratum(dm, 0, &v_start, &v_end));
+  // Create section
+  PetscCall(PetscSectionCreate(app_ctx->comm, &sec));
+  PetscCall(PetscSectionSetNumFields(sec, 1));
+  PetscCall(PetscSectionSetFieldName(sec, 0, "Velocity"));
+  PetscCall(PetscSectionSetFieldComponents(sec, 0, 1));
+  PetscCall(PetscSectionSetChart(sec, p_start, p_end));
+  // Setup dofs per face
+  for (PetscInt f = f_start; f < f_end; f++) {
+    PetscCall(DMPlexGetConeSize(dm, f, &dofs_per_face));
+    PetscCall(PetscSectionSetFieldDof(sec, f, 0, dofs_per_face));
+    PetscCall(PetscSectionSetDof(sec, f, dofs_per_face));
+  }
+  PetscCall(PetscSectionSetUp(sec));
+  PetscCall(DMSetSection(dm, sec));
+  PetscCall(PetscSectionDestroy(&sec));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// Utility function - convert from DMPolytopeType to CeedElemTopology
+// -----------------------------------------------------------------------------
+CeedElemTopology ElemTopologyP2C(DMPolytopeType cell_type) {
+  switch (cell_type) {
+    case DM_POLYTOPE_TRIANGLE:
+      return CEED_TOPOLOGY_TRIANGLE;
+    case DM_POLYTOPE_QUADRILATERAL:
+      return CEED_TOPOLOGY_QUAD;
+    case DM_POLYTOPE_TETRAHEDRON:
+      return CEED_TOPOLOGY_TET;
+    case DM_POLYTOPE_HEXAHEDRON:
+      return CEED_TOPOLOGY_HEX;
+    default:
+      return 0;
+  }
+};
+
+// -----------------------------------------------------------------------------
+// Utility function - essential BC dofs are encoded in closure indices as -(i+1)
+// -----------------------------------------------------------------------------
+PetscInt Involute(PetscInt i) { return i >= 0 ? i : -(i + 1); };
+
+// -----------------------------------------------------------------------------
+// Get CEED restriction data from DMPlex
+// -----------------------------------------------------------------------------
+PetscErrorCode CreateRestrictionFromPlex(Ceed ceed, DM dm, CeedInt height, DMLabel domain_label, CeedInt value, CeedElemRestriction *elem_restr) {
+  PetscInt num_elem, elem_size, num_dof, num_comp, *elem_restr_offsets;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(DMPlexGetLocalOffsets(dm, domain_label, value, height, 0, &num_elem, &elem_size, &num_comp, &num_dof, &elem_restr_offsets));
+  CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp, 1, num_dof, CEED_MEM_HOST, CEED_COPY_VALUES, elem_restr_offsets, elem_restr);
+  PetscCall(PetscFree(elem_restr_offsets));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// Get Oriented CEED restriction data from DMPlex
+// -----------------------------------------------------------------------------
+PetscErrorCode CreateRestrictionFromPlexOriented(Ceed ceed, DM dm, CeedInt P, CeedElemRestriction *elem_restr_u, CeedElemRestriction *elem_restr_p) {
+  PetscSection    section;
+  PetscInt        p, num_elem, num_dof, *restr_indices_u, *restr_indices_p, elem_offset, num_fields, dim, c_start, c_end;
+  Vec             U_loc;
+  const PetscInt *ornt;  // this is for orientation of dof
+
+  PetscFunctionBeginUser;
+
+  PetscCall(DMGetDimension(dm, &dim));
+  PetscCall(DMGetLocalSection(dm, &section));
+  PetscCall(PetscSectionGetNumFields(section, &num_fields));
+  PetscInt num_comp[num_fields], field_offsets[num_fields + 1];
+  field_offsets[0] = 0;
+  for (PetscInt f = 0; f < num_fields; f++) {
+    PetscCall(PetscSectionGetFieldComponents(section, f, &num_comp[f]));
+    field_offsets[f + 1] = field_offsets[f] + num_comp[f];
+  }
+  PetscCall(DMPlexGetHeightStratum(dm, 0, &c_start, &c_end));
+  num_elem = c_end - c_start;
+  PetscCall(PetscMalloc1(num_elem * dim * PetscPowInt(P, dim), &restr_indices_u));
+  PetscCall(PetscMalloc1(num_elem, &restr_indices_p));
+  bool *orient_indices;  // to flip the dof
+  PetscCall(PetscMalloc1(num_elem * dim * PetscPowInt(P, dim), &orient_indices));
+
+  for (p = 0, elem_offset = 0; p < num_elem; p++) {
+    restr_indices_p[p] = p;  // each cell has on P0 dof
+    PetscInt num_indices, *indices, faces_per_elem, dofs_per_face;
+    PetscCall(DMPlexGetClosureIndices(dm, section, section, p, PETSC_TRUE, &num_indices, &indices, NULL, NULL));
+    PetscCall(DMPlexGetConeOrientation(dm, p, &ornt));
+    // Get number of faces per element
+    PetscCall(DMPlexGetConeSize(dm, p, &faces_per_elem));
+    dofs_per_face = faces_per_elem - 2;
+    for (PetscInt f = 0; f < faces_per_elem; f++) {
+      for (PetscInt i = 0; i < dofs_per_face; i++) {
+        PetscInt ii = dofs_per_face * f + i;
+        // Essential boundary conditions are encoded as -(loc+1), but we don't care so we decode.
+        PetscInt loc                 = Involute(indices[ii * num_comp[0]]);
+        restr_indices_u[elem_offset] = loc;
+        // Set orientation
+        orient_indices[elem_offset] = ornt[f] < 0;
+        elem_offset++;
+      }
+    }
+    PetscCall(DMPlexRestoreClosureIndices(dm, section, section, p, PETSC_TRUE, &num_indices, &indices, NULL, NULL));
+  }
+  // if (elem_offset != num_elem*dim*PetscPowInt(P, dim))
+  //   SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_LIB,
+  //            "ElemRestriction of size (%" PetscInt_FMT ",%" PetscInt_FMT ")
+  //             initialized %" PetscInt_FMT "nodes", num_elem,
+  //             dim*PetscPowInt(P, dim),elem_offset);
+  PetscCall(DMGetLocalVector(dm, &U_loc));
+  PetscCall(VecGetLocalSize(U_loc, &num_dof));
+  PetscCall(DMRestoreLocalVector(dm, &U_loc));
+  // dof per element in Hdiv is dim*P^dim, for linear element P=2
+  CeedElemRestrictionCreateOriented(ceed, num_elem, dim * PetscPowInt(P, dim), field_offsets[num_fields], 1, num_dof, CEED_MEM_HOST, CEED_COPY_VALUES,
+                                    restr_indices_u, orient_indices, elem_restr_u);
+  CeedElemRestrictionCreate(ceed, num_elem, 1, 1, 1, num_dof, CEED_MEM_HOST, CEED_COPY_VALUES, restr_indices_p, elem_restr_p);
+  PetscCall(PetscFree(restr_indices_u));
+  PetscCall(PetscFree(orient_indices));
+  PetscCall(PetscFree(restr_indices_p));
+
+  PetscFunctionReturn(0);
+};
\ No newline at end of file
diff --git a/examples/Hdiv-mass/src/setup-libceed.c b/examples/Hdiv-mass/src/setup-libceed.c
new file mode 100644
index 0000000000..82ac374751
--- /dev/null
+++ b/examples/Hdiv-mass/src/setup-libceed.c
@@ -0,0 +1,188 @@
+#include "../include/setup-libceed.h"
+
+#include <stdio.h>
+
+#include "../basis/Hdiv-hex.h"
+#include "../basis/Hdiv-quad.h"
+#include "../basis/L2-P0.h"
+
+// -----------------------------------------------------------------------------
+// Destroy libCEED objects
+// -----------------------------------------------------------------------------
+PetscErrorCode CeedDataDestroy(CeedData ceed_data) {
+  PetscFunctionBegin;
+
+  // Vectors
+  CeedVectorDestroy(&ceed_data->x_ceed);
+  CeedVectorDestroy(&ceed_data->y_ceed);
+  // Restrictions
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_x);
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_u);
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_u_i);
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_p);
+  // Bases
+  CeedBasisDestroy(&ceed_data->basis_x);
+  CeedBasisDestroy(&ceed_data->basis_u);
+  CeedBasisDestroy(&ceed_data->basis_p);
+  // QFunctions
+  CeedQFunctionDestroy(&ceed_data->qf_residual);
+  CeedQFunctionDestroy(&ceed_data->qf_error);
+  // Operators
+  CeedOperatorDestroy(&ceed_data->op_residual);
+  CeedOperatorDestroy(&ceed_data->op_error);
+  PetscCall(PetscFree(ceed_data));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// Set up libCEED on the fine grid for a given degree
+// -----------------------------------------------------------------------------
+PetscErrorCode SetupLibceed(DM dm, Ceed ceed, AppCtx app_ctx, ProblemData problem_data, CeedData ceed_data, CeedVector rhs_ceed) {
+  CeedInt P = app_ctx->degree + 1;
+  // Number of quadratures in 1D, q_extra is set in cl-options.c
+  CeedInt            Q = P + 1 + app_ctx->q_extra;
+  CeedInt            dim, num_comp_x, num_comp_u;
+  DM                 dm_coord;
+  Vec                coords;
+  PetscInt           c_start, c_end, num_elem;
+  const PetscScalar *coordArray;
+  CeedVector         x_coord;
+  CeedQFunction      qf_setup_rhs, qf_residual, qf_error;
+  CeedOperator       op_setup_rhs, op_residual, op_error;
+
+  PetscFunctionBeginUser;
+  // ---------------------------------------------------------------------------
+  // libCEED bases:Hdiv basis_u and Lagrange basis_x
+  // ---------------------------------------------------------------------------
+  dim        = problem_data->dim;
+  num_comp_x = dim;
+  num_comp_u = 1;  // one vector dof
+  // Number of quadratures per element
+  CeedInt    num_qpts = PetscPowInt(Q, dim);
+  CeedInt    P_u      = dim * PetscPowInt(P, dim);  // dof per element
+  CeedScalar q_ref[dim * num_qpts], q_weights[num_qpts];
+  CeedScalar div[P_u * num_qpts], interp[dim * P_u * num_qpts], interp_p[num_qpts], *grad = NULL;
+
+  if (dim == 2) {
+    HdivBasisQuad(Q, q_ref, q_weights, interp, div, problem_data->quadrature_mode);
+    CeedBasisCreateHdiv(ceed, CEED_TOPOLOGY_QUAD, num_comp_u, P_u, num_qpts, interp, div, q_ref, q_weights, &ceed_data->basis_u);
+    L2BasisP0(dim, Q, q_ref, q_weights, interp_p, problem_data->quadrature_mode);
+    CeedBasisCreateH1(ceed, CEED_TOPOLOGY_QUAD, 1, 1, num_qpts, interp_p, grad, q_ref, q_weights, &ceed_data->basis_p);
+  } else {
+    HdivBasisHex(Q, q_ref, q_weights, interp, div, problem_data->quadrature_mode);
+    CeedBasisCreateHdiv(ceed, CEED_TOPOLOGY_HEX, num_comp_u, P_u, num_qpts, interp, div, q_ref, q_weights, &ceed_data->basis_u);
+    L2BasisP0(dim, Q, q_ref, q_weights, interp_p, problem_data->quadrature_mode);
+    CeedBasisCreateH1(ceed, CEED_TOPOLOGY_HEX, 1, 1, num_qpts, interp_p, grad, q_ref, q_weights, &ceed_data->basis_p);
+  }
+  CeedBasisCreateTensorH1Lagrange(ceed, dim, num_comp_x, 2, Q, problem_data->quadrature_mode, &ceed_data->basis_x);
+
+  // ---------------------------------------------------------------------------
+  // libCEED restrictions
+  // ---------------------------------------------------------------------------
+  PetscCall(DMGetCoordinateDM(dm, &dm_coord));
+  PetscCall(DMPlexSetClosurePermutationTensor(dm_coord, PETSC_DETERMINE, NULL));
+
+  CeedInt  height       = 0;  // 0 means no boundary conditions
+  DMLabel  domain_label = 0;
+  PetscInt value        = 0;
+  // -- Coordinate restriction
+  PetscCall(CreateRestrictionFromPlex(ceed, dm_coord, height, domain_label, value, &ceed_data->elem_restr_x));
+  // -- Solution restriction, Error restriction
+  PetscCall(CreateRestrictionFromPlexOriented(ceed, dm, P, &ceed_data->elem_restr_u, &ceed_data->elem_restr_p));
+  PetscCall(DMPlexGetHeightStratum(dm, 0, &c_start, &c_end));
+  num_elem = c_end - c_start;
+  // -- Target restriction for MMS
+  CeedElemRestrictionCreateStrided(ceed, num_elem, num_qpts, dim, dim * num_elem * num_qpts, CEED_STRIDES_BACKEND, &ceed_data->elem_restr_u_i);
+  // ---------------------------------------------------------------------------
+  // Element coordinates
+  // ---------------------------------------------------------------------------
+  PetscCall(DMGetCoordinatesLocal(dm, &coords));
+  PetscCall(VecGetArrayRead(coords, &coordArray));
+  CeedElemRestrictionCreateVector(ceed_data->elem_restr_x, &x_coord, NULL);
+  CeedVectorSetArray(x_coord, CEED_MEM_HOST, CEED_COPY_VALUES, (PetscScalar *)coordArray);
+  PetscCall(VecRestoreArrayRead(coords, &coordArray));
+
+  // ---------------------------------------------------------------------------
+  // Setup RHS and true solution
+  // ---------------------------------------------------------------------------
+  CeedVector target;
+  CeedVectorCreate(ceed, num_elem * num_qpts * dim, &target);
+  // Create the q-function that sets up the RHS and true solution
+  CeedQFunctionCreateInterior(ceed, 1, problem_data->setup_rhs, problem_data->setup_rhs_loc, &qf_setup_rhs);
+  CeedQFunctionAddInput(qf_setup_rhs, "x", num_comp_x, CEED_EVAL_INTERP);
+  CeedQFunctionAddInput(qf_setup_rhs, "weight", 1, CEED_EVAL_WEIGHT);
+  CeedQFunctionAddInput(qf_setup_rhs, "dx", dim * dim, CEED_EVAL_GRAD);
+  CeedQFunctionAddOutput(qf_setup_rhs, "true_soln", dim, CEED_EVAL_NONE);
+  CeedQFunctionAddOutput(qf_setup_rhs, "rhs", dim, CEED_EVAL_INTERP);
+  // Create the operator that builds the RHS and true solution
+  CeedOperatorCreate(ceed, qf_setup_rhs, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_setup_rhs);
+  CeedOperatorSetField(op_setup_rhs, "x", ceed_data->elem_restr_x, ceed_data->basis_x, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_setup_rhs, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+  CeedOperatorSetField(op_setup_rhs, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_setup_rhs, "true_soln", ceed_data->elem_restr_u_i, CEED_BASIS_COLLOCATED, target);
+  CeedOperatorSetField(op_setup_rhs, "rhs", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+
+  // Setup RHS and true solution
+  CeedOperatorApply(op_setup_rhs, x_coord, rhs_ceed, CEED_REQUEST_IMMEDIATE);
+
+  // ---------------------------------------------------------------------------
+  // Persistent libCEED vectors
+  // ---------------------------------------------------------------------------
+  // -- Operator action variables: we use them in setup-solvers.c
+  CeedElemRestrictionCreateVector(ceed_data->elem_restr_u, &ceed_data->x_ceed, NULL);
+  CeedElemRestrictionCreateVector(ceed_data->elem_restr_u, &ceed_data->y_ceed, NULL);
+
+  // Local residual evaluator
+  // ---------------------------------------------------------------------------
+  // Create the QFunction and Operator that computes the residual of the PDE.
+  // ---------------------------------------------------------------------------
+  // -- QFunction
+  CeedQFunctionCreateInterior(ceed, 1, problem_data->residual, problem_data->residual_loc, &qf_residual);
+  CeedQFunctionAddInput(qf_residual, "weight", 1, CEED_EVAL_WEIGHT);
+  CeedQFunctionAddInput(qf_residual, "dx", dim * dim, CEED_EVAL_GRAD);
+  CeedQFunctionAddInput(qf_residual, "u", dim, CEED_EVAL_INTERP);
+  CeedQFunctionAddOutput(qf_residual, "v", dim, CEED_EVAL_INTERP);
+
+  // -- Operator
+  CeedOperatorCreate(ceed, qf_residual, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_residual);
+  CeedOperatorSetField(op_residual, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+  CeedOperatorSetField(op_residual, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, x_coord);
+  CeedOperatorSetField(op_residual, "u", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_residual, "v", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+
+  // -- Save libCEED data to apply operator in matops.c
+  ceed_data->qf_residual = qf_residual;
+  ceed_data->op_residual = op_residual;
+
+  // ---------------------------------------------------------------------------
+  // Setup Error Qfunction
+  // ---------------------------------------------------------------------------
+  // Create the q-function that sets up the error
+  CeedQFunctionCreateInterior(ceed, 1, problem_data->setup_error, problem_data->setup_error_loc, &qf_error);
+  CeedQFunctionAddInput(qf_error, "dx", dim * dim, CEED_EVAL_GRAD);
+  CeedQFunctionAddInput(qf_error, "u", dim, CEED_EVAL_INTERP);
+  CeedQFunctionAddInput(qf_error, "true_soln", dim, CEED_EVAL_NONE);
+  CeedQFunctionAddInput(qf_error, "weight", 1, CEED_EVAL_WEIGHT);
+  // CeedQFunctionAddOutput(qf_error, "error", 1, CEED_EVAL_NONE);
+  CeedQFunctionAddOutput(qf_error, "error", 1, CEED_EVAL_INTERP);
+  // Create the operator that builds the error
+  CeedOperatorCreate(ceed, qf_error, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_error);
+  CeedOperatorSetField(op_error, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, x_coord);
+  CeedOperatorSetField(op_error, "u", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_error, "true_soln", ceed_data->elem_restr_u_i, CEED_BASIS_COLLOCATED, target);
+  CeedOperatorSetField(op_error, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+  // CeedOperatorSetField(op_error, "error", ceed_data->elem_restr_e_i, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_error, "error", ceed_data->elem_restr_p, ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+  // -- Save libCEED data to apply operator in matops.c
+  ceed_data->qf_error = qf_error;
+  ceed_data->op_error = op_error;
+
+  CeedQFunctionDestroy(&qf_setup_rhs);
+  CeedOperatorDestroy(&op_setup_rhs);
+  CeedVectorDestroy(&x_coord);
+  CeedVectorDestroy(&target);
+
+  PetscFunctionReturn(0);
+};
+// -----------------------------------------------------------------------------
\ No newline at end of file
diff --git a/examples/Hdiv-mass/src/setup-matops.c b/examples/Hdiv-mass/src/setup-matops.c
new file mode 100644
index 0000000000..27fb84f3ba
--- /dev/null
+++ b/examples/Hdiv-mass/src/setup-matops.c
@@ -0,0 +1,51 @@
+#include "../include/setup-matops.h"
+
+#include <stdio.h>
+
+// -----------------------------------------------------------------------------
+// Apply the local action of a libCEED operator and store result in PETSc vector
+// i.e. compute A X = Y
+// -----------------------------------------------------------------------------
+PetscErrorCode ApplyLocalCeedOp(Vec X, Vec Y, OperatorApplyContext op_apply_ctx) {
+  PetscFunctionBeginUser;
+
+  // Zero target vector
+  PetscCall(VecZeroEntries(Y));
+
+  // Sum into target vector
+  PetscCall(ApplyAddLocalCeedOp(X, Y, op_apply_ctx));
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode ApplyAddLocalCeedOp(Vec X, Vec Y, OperatorApplyContext op_apply_ctx) {
+  PetscScalar *x, *y;
+  PetscMemType x_mem_type, y_mem_type;
+
+  PetscFunctionBeginUser;
+
+  // Global-to-local
+  PetscCall(DMGlobalToLocal(op_apply_ctx->dm, X, INSERT_VALUES, op_apply_ctx->X_loc));
+
+  // Setup libCEED vectors
+  PetscCall(VecGetArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x, &x_mem_type));
+  PetscCall(VecGetArrayAndMemType(op_apply_ctx->Y_loc, &y, &y_mem_type));
+  CeedVectorSetArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, x);
+  CeedVectorSetArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, y);
+
+  // Apply libCEED operator
+  CeedOperatorApply(op_apply_ctx->op_apply, op_apply_ctx->x_ceed, op_apply_ctx->y_ceed, CEED_REQUEST_IMMEDIATE);
+
+  // Restore PETSc vectors
+  CeedVectorTakeArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), NULL);
+  CeedVectorTakeArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), NULL);
+  PetscCall(VecRestoreArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x));
+  PetscCall(VecRestoreArrayAndMemType(op_apply_ctx->Y_loc, &y));
+
+  // Local-to-global
+  PetscCall(DMLocalToGlobal(op_apply_ctx->dm, op_apply_ctx->Y_loc, ADD_VALUES, Y));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
\ No newline at end of file
diff --git a/examples/Hdiv-mass/src/setup-solvers.c b/examples/Hdiv-mass/src/setup-solvers.c
new file mode 100644
index 0000000000..fac3e91970
--- /dev/null
+++ b/examples/Hdiv-mass/src/setup-solvers.c
@@ -0,0 +1,123 @@
+#include "../include/setup-solvers.h"
+
+#include "../include/setup-libceed.h"
+#include "../include/setup-matops.h"
+#include "petscvec.h"
+
+// -----------------------------------------------------------------------------
+// Setup operator context data
+// -----------------------------------------------------------------------------
+PetscErrorCode SetupResidualOperatorCtx(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_residual) {
+  PetscFunctionBeginUser;
+
+  ctx_residual->comm = comm;
+  ctx_residual->dm   = dm;
+  PetscCall(DMCreateLocalVector(dm, &ctx_residual->X_loc));
+  PetscCall(VecDuplicate(ctx_residual->X_loc, &ctx_residual->Y_loc));
+  ctx_residual->x_ceed   = ceed_data->x_ceed;
+  ctx_residual->y_ceed   = ceed_data->y_ceed;
+  ctx_residual->ceed     = ceed;
+  ctx_residual->op_apply = ceed_data->op_residual;
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode SetupErrorOperatorCtx(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_error_u) {
+  PetscFunctionBeginUser;
+
+  ctx_error_u->comm = comm;
+  ctx_error_u->dm   = dm;
+  PetscCall(DMCreateLocalVector(dm, &ctx_error_u->X_loc));
+  PetscCall(VecDuplicate(ctx_error_u->X_loc, &ctx_error_u->Y_loc));
+  ctx_error_u->x_ceed   = ceed_data->x_ceed;
+  ctx_error_u->y_ceed   = ceed_data->y_ceed;
+  ctx_error_u->ceed     = ceed;
+  ctx_error_u->op_apply = ceed_data->op_error;
+
+  PetscFunctionReturn(0);
+}
+
+// -----------------------------------------------------------------------------
+// This function wraps the libCEED operator for a MatShell
+// -----------------------------------------------------------------------------
+PetscErrorCode ApplyMatOp(Mat A, Vec X, Vec Y) {
+  OperatorApplyContext op_apply_ctx;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(MatShellGetContext(A, &op_apply_ctx));
+
+  // libCEED for local action of residual evaluator
+  PetscCall(ApplyLocalCeedOp(X, Y, op_apply_ctx));
+
+  PetscFunctionReturn(0);
+};
+
+// ---------------------------------------------------------------------------
+// Setup Solver
+// ---------------------------------------------------------------------------
+PetscErrorCode PDESolver(CeedData ceed_data, AppCtx app_ctx, KSP ksp, Vec rhs, Vec *X) {
+  PetscInt X_l_size, X_g_size;
+
+  PetscFunctionBeginUser;
+
+  // Create global unknown solution U
+  PetscCall(VecGetSize(*X, &X_g_size));
+  // Local size for matShell
+  PetscCall(VecGetLocalSize(*X, &X_l_size));
+
+  // ---------------------------------------------------------------------------
+  // Setup SNES
+  // ---------------------------------------------------------------------------
+  // Operator
+  Mat     mat_op;
+  VecType vec_type;
+  PetscCall(DMGetVecType(app_ctx->ctx_residual->dm, &vec_type));
+  // -- Form Action of Jacobian on delta_u
+  PetscCall(MatCreateShell(app_ctx->comm, X_l_size, X_l_size, X_g_size, X_g_size, app_ctx->ctx_residual, &mat_op));
+  PetscCall(MatShellSetOperation(mat_op, MATOP_MULT, (void (*)(void))ApplyMatOp));
+  PetscCall(MatShellSetVecType(mat_op, vec_type));
+
+  PC pc;
+  PetscCall(KSPGetPC(ksp, &pc));
+  PetscCall(PCSetType(pc, PCNONE));
+  PetscCall(KSPSetType(ksp, KSPCG));
+  // PetscCall(KSPSetNormType(ksp, KSP_NORM_NATURAL));
+  PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
+
+  PetscCall(KSPSetOperators(ksp, mat_op, mat_op));
+  PetscCall(KSPSetFromOptions(ksp));
+  PetscCall(VecZeroEntries(*X));
+  PetscCall(KSPSolve(ksp, rhs, *X));
+
+  PetscCall(MatDestroy(&mat_op));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// This function calculates the error in the final solution
+// -----------------------------------------------------------------------------
+PetscErrorCode ComputeL2Error(Vec X, PetscScalar *l2_error, OperatorApplyContext op_error_ctx) {
+  Vec E;
+  PetscFunctionBeginUser;
+
+  PetscCall(VecDuplicate(X, &E));
+  PetscCall(ApplyLocalCeedOp(X, E, op_error_ctx));
+  PetscScalar error_sq = 1.0;
+  PetscCall(VecSum(E, &error_sq));
+  *l2_error = sqrt(error_sq);
+  PetscCall(VecDestroy(&E));
+  PetscFunctionReturn(0);
+};
+
+PetscErrorCode CtxVecDestroy(ProblemData problem_data, AppCtx app_ctx) {
+  PetscFunctionBegin;
+
+  PetscCall(VecDestroy(&app_ctx->ctx_residual->Y_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_residual->X_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_error_u->Y_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_error_u->X_loc));
+
+  PetscFunctionReturn(0);
+}
\ No newline at end of file
diff --git a/examples/Hdiv-mixed/Makefile b/examples/Hdiv-mixed/Makefile
new file mode 100644
index 0000000000..6cfc950525
--- /dev/null
+++ b/examples/Hdiv-mixed/Makefile
@@ -0,0 +1,82 @@
+# Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+# All Rights reserved. See files LICENSE and NOTICE for details.
+#
+# This file is part of CEED, a collection of benchmarks, miniapps, software
+# libraries and APIs for efficient high-order finite element and spectral
+# element discretizations for exascale applications. For more information and
+# source code availability see http://github.com/ceed.
+#
+# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+# a collaborative effort of two U.S. Department of Energy organizations (Office
+# of Science and the National Nuclear Security Administration) responsible for
+# the planning and preparation of a capable exascale ecosystem, including
+# software, applications, hardware, advanced system engineering and early
+# testbed platforms, in support of the nation's exascale computing imperative.
+
+COMMON ?= ../../common.mk
+-include $(COMMON)
+
+# Note: PETSC_ARCH can be undefined or empty for installations which do not use
+#       PETSC_ARCH - for example when using PETSc installed through Spack.
+PETSc.pc := $(PETSC_DIR)/$(PETSC_ARCH)/lib/pkgconfig/PETSc.pc
+CEED_DIR ?= ../..
+ceed.pc := $(CEED_DIR)/lib/pkgconfig/ceed.pc
+
+CC = $(call pkgconf, --variable=ccompiler $(PETSc.pc) $(ceed.pc))
+CFLAGS = -std=c99 \
+  $(call pkgconf, --variable=cflags_extra $(PETSc.pc)) \
+  $(call pkgconf, --cflags-only-other $(PETSc.pc)) \
+  $(OPT)
+CPPFLAGS = $(call pkgconf, --cflags-only-I $(PETSc.pc) $(ceed.pc)) \
+  $(call pkgconf, --variable=cflags_dep $(PETSc.pc))
+LDFLAGS = $(call pkgconf, --libs-only-L --libs-only-other $(PETSc.pc) $(ceed.pc))
+LDFLAGS += $(patsubst -L%, $(call pkgconf, --variable=ldflag_rpath $(PETSc.pc))%, $(call pkgconf, --libs-only-L $(PETSc.pc) $(ceed.pc)))
+LDLIBS = $(call pkgconf, --libs-only-l $(PETSc.pc) $(ceed.pc)) -lm
+
+OBJDIR := build
+SRCDIR := src
+PROBLEMDIR := problems
+
+src.c := main.c $(sort $(wildcard $(PROBLEMDIR)/*.c)) $(sort $(wildcard $(SRCDIR)/*.c))
+src.o = $(src.c:%.c=$(OBJDIR)/%.o)
+
+all: main
+
+main: $(src.o) | $(PETSc.pc) $(ceed.pc)
+	$(call quiet,LINK.o) $(CEED_LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@
+
+.SECONDEXPANSION: # to expand $$(@D)/.DIR
+%/.DIR :
+	@mkdir -p $(@D)
+	@touch $@
+
+# Quiet, color output
+quiet ?= $($(1))
+
+$(OBJDIR)/%.o : %.c | $$(@D)/.DIR
+	$(call quiet,CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $(abspath $<)
+
+# Rules for building the examples
+#%: %.c
+
+print: $(PETSc.pc) $(ceed.pc)
+	$(info CC      : $(CC))
+	$(info CFLAGS  : $(CFLAGS))
+	$(info CPPFLAGS: $(CPPFLAGS))
+	$(info LDFLAGS : $(LDFLAGS))
+	$(info LDLIBS  : $(LDLIBS))
+	@true
+
+clean:
+	$(RM) -r $(OBJDIR) main *.vtu
+
+$(PETSc.pc):
+	$(if $(wildcard $@),,$(error \
+	  PETSc config not found at $@. Please set PETSC_DIR and PETSC_ARCH))
+
+.PHONY: all print clean
+
+pkgconf = $(shell pkg-config $1 | sed -e 's/^"//g' -e 's/"$$//g')
+
+-include $(src.o:%.o=%.d)
diff --git a/examples/Hdiv-mixed/basis/Hdiv-hex.h b/examples/Hdiv-mixed/basis/Hdiv-hex.h
new file mode 100644
index 0000000000..1777981e11
--- /dev/null
+++ b/examples/Hdiv-mixed/basis/Hdiv-hex.h
@@ -0,0 +1,160 @@
+#ifndef Hdiv_hex_h
+#define Hdiv_hex_h
+// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+// All Rights reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+// To see how the nodal basis is constructed visit:
+// https://github.com/rezgarshakeri/H-div-Tests
+int NodalHdivBasisHex(CeedScalar *x, CeedScalar *Bx, CeedScalar *By, CeedScalar *Bz) {
+  Bx[0] = 0.0625 * x[0] * x[0] - 0.0625;
+  By[0] = -0.0625 * x[0] * x[1] * x[1] + 0.0625 * x[0] + 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[0] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bx[1] = 0.0625 - 0.0625 * x[0] * x[0];
+  By[1] = 0.0625 * x[0] * x[1] * x[1] - 0.0625 * x[0] + 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[1] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] - 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bx[2] = 0.0625 * x[0] * x[0] - 0.0625;
+  By[2] = 0.0625 * x[0] * x[1] * x[1] - 0.0625 * x[0] - 0.0625 * x[1] * x[1] + 0.0625;
+  Bz[2] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] - 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bx[3] = 0.0625 - 0.0625 * x[0] * x[0];
+  By[3] = -0.0625 * x[0] * x[1] * x[1] + 0.0625 * x[0] - 0.0625 * x[1] * x[1] + 0.0625;
+  Bz[3] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] - 0.125 * x[0] + 0.125 * x[1] * x[2] - 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bx[4] = 0.0625 * x[0] * x[0] - 0.0625;
+  By[4] = -0.0625 * x[0] * x[1] * x[1] + 0.0625 * x[0] + 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[4] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] - 0.125 * x[0] - 0.125 * x[1] * x[2] - 0.125 * x[1] + 0.125 * x[2] +
+          0.125;
+  Bx[5] = 0.0625 - 0.0625 * x[0] * x[0];
+  By[5] = 0.0625 * x[0] * x[1] * x[1] - 0.0625 * x[0] + 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[5] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] - 0.125 * x[1] + 0.125 * x[2] +
+          0.125;
+  Bx[6] = 0.0625 * x[0] * x[0] - 0.0625;
+  By[6] = 0.0625 * x[0] * x[1] * x[1] - 0.0625 * x[0] - 0.0625 * x[1] * x[1] + 0.0625;
+  Bz[6] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] - 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] +
+          0.125;
+  Bx[7] = 0.0625 - 0.0625 * x[0] * x[0];
+  By[7] = -0.0625 * x[0] * x[1] * x[1] + 0.0625 * x[0] - 0.0625 * x[1] * x[1] + 0.0625;
+  Bz[7] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] +
+          0.125;
+  Bx[8] = 0.0625 * x[0] * x[0] * x[2] - 0.0625 * x[0] * x[0] - 0.0625 * x[2] + 0.0625;
+  By[8] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] - 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bz[8] = 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[9] = -0.0625 * x[0] * x[0] * x[2] + 0.0625 * x[0] * x[0] + 0.0625 * x[2] - 0.0625;
+  By[9] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+          0.125;
+  Bz[9]  = 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[10] = -0.0625 * x[0] * x[0] * x[2] - 0.0625 * x[0] * x[0] + 0.0625 * x[2] + 0.0625;
+  By[10] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] - 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] - 0.125 * x[2] -
+           0.125;
+  Bz[10] = 0.0625 - 0.0625 * x[2] * x[2];
+  Bx[11] = 0.0625 * x[0] * x[0] * x[2] + 0.0625 * x[0] * x[0] - 0.0625 * x[2] - 0.0625;
+  By[11] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] -
+           0.125 * x[2] - 0.125;
+  Bz[11] = 0.0625 - 0.0625 * x[2] * x[2];
+  Bx[12] = 0.0625 * x[0] * x[0] * x[2] - 0.0625 * x[0] * x[0] - 0.0625 * x[2] + 0.0625;
+  By[12] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] -
+           0.125 * x[2] + 0.125;
+  Bz[12] = 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[13] = -0.0625 * x[0] * x[0] * x[2] + 0.0625 * x[0] * x[0] + 0.0625 * x[2] - 0.0625;
+  By[13] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] - 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] - 0.125 * x[2] +
+           0.125;
+  Bz[13] = 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[14] = -0.0625 * x[0] * x[0] * x[2] - 0.0625 * x[0] * x[0] + 0.0625 * x[2] + 0.0625;
+  By[14] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] +
+           0.125;
+  Bz[14] = 0.0625 - 0.0625 * x[2] * x[2];
+  Bx[15] = 0.0625 * x[0] * x[0] * x[2] + 0.0625 * x[0] * x[0] - 0.0625 * x[2] - 0.0625;
+  By[15] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] - 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] +
+           0.125 * x[2] + 0.125;
+  Bz[15] = 0.0625 - 0.0625 * x[2] * x[2];
+  Bx[16] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] - 0.125 * x[1] - 0.125 * x[2] +
+           0.125;
+  By[16] = 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[16] = -0.0625 * x[1] * x[2] * x[2] + 0.0625 * x[1] + 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[17] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] -
+           0.125 * x[2] + 0.125;
+  By[17] = 0.0625 - 0.0625 * x[1] * x[1];
+  Bz[17] = 0.0625 * x[1] * x[2] * x[2] - 0.0625 * x[1] + 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[18] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] - 0.125 * x[1] +
+           0.125 * x[2] + 0.125;
+  By[18] = 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[18] = 0.0625 * x[1] * x[2] * x[2] - 0.0625 * x[1] - 0.0625 * x[2] * x[2] + 0.0625;
+  Bx[19] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] +
+           0.125;
+  By[19] = 0.0625 - 0.0625 * x[1] * x[1];
+  Bz[19] = -0.0625 * x[1] * x[2] * x[2] + 0.0625 * x[1] - 0.0625 * x[2] * x[2] + 0.0625;
+  Bx[20] = 0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] + 0.125 * x[1] + 0.125 * x[2] -
+           0.125;
+  By[20] = 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[20] = -0.0625 * x[1] * x[2] * x[2] + 0.0625 * x[1] + 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[21] = -0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] - 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] - 0.125 * x[1] +
+           0.125 * x[2] - 0.125;
+  By[21] = 0.0625 - 0.0625 * x[1] * x[1];
+  Bz[21] = 0.0625 * x[1] * x[2] * x[2] - 0.0625 * x[1] + 0.0625 * x[2] * x[2] - 0.0625;
+  Bx[22] = -0.125 * x[0] * x[1] * x[2] - 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] + 0.125 * x[1] * x[2] + 0.125 * x[1] -
+           0.125 * x[2] - 0.125;
+  By[22] = 0.0625 * x[1] * x[1] - 0.0625;
+  Bz[22] = 0.0625 * x[1] * x[2] * x[2] - 0.0625 * x[1] - 0.0625 * x[2] * x[2] + 0.0625;
+  Bx[23] = 0.125 * x[0] * x[1] * x[2] + 0.125 * x[0] * x[1] + 0.125 * x[0] * x[2] + 0.125 * x[0] - 0.125 * x[1] * x[2] - 0.125 * x[1] - 0.125 * x[2] -
+           0.125;
+  By[23] = 0.0625 - 0.0625 * x[1] * x[1];
+  Bz[23] = -0.0625 * x[1] * x[2] * x[2] + 0.0625 * x[1] - 0.0625 * x[2] * x[2] + 0.0625;
+  return 0;
+}
+static void HdivBasisHex(CeedInt Q, CeedScalar *q_ref, CeedScalar *q_weights, CeedScalar *interp, CeedScalar *div, CeedQuadMode quad_mode) {
+  // Get 1D quadrature on [-1,1]
+  CeedScalar q_ref_1d[Q], q_weight_1d[Q];
+  switch (quad_mode) {
+    case CEED_GAUSS:
+      CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+    case CEED_GAUSS_LOBATTO:
+      CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+  }
+
+  // Divergence operator; Divergence of nodal basis for ref element
+  CeedScalar D = 0.125;
+  // Loop over quadrature points
+  CeedScalar Bx[24], By[24], Bz[24];
+  CeedScalar x[3];
+  for (CeedInt k = 0; k < Q; k++) {
+    for (CeedInt i = 0; i < Q; i++) {
+      for (CeedInt j = 0; j < Q; j++) {
+        CeedInt k1            = Q * Q * k + Q * i + j;
+        q_ref[k1 + 0 * Q * Q] = q_ref_1d[j];
+        q_ref[k1 + 1 * Q * Q] = q_ref_1d[i];
+        q_ref[k1 + 2 * Q * Q] = q_ref_1d[k];
+        q_weights[k1]         = q_weight_1d[j] * q_weight_1d[i] * q_weight_1d[k];
+        x[0]                  = q_ref_1d[j];
+        x[1]                  = q_ref_1d[i];
+        x[2]                  = q_ref_1d[k];
+        NodalHdivBasisHex(x, Bx, By, Bz);
+        for (CeedInt d = 0; d < 24; d++) {
+          interp[k1 * 24 + d]                  = Bx[d];
+          interp[k1 * 24 + d + 24 * Q * Q * Q] = By[d];
+          interp[k1 * 24 + d + 48 * Q * Q * Q] = Bz[d];
+          div[k1 * 24 + d]                     = D;
+        }
+      }
+    }
+  }
+}
+
+#endif  // Hdiv_hex_h
\ No newline at end of file
diff --git a/examples/Hdiv-mixed/basis/Hdiv-quad.h b/examples/Hdiv-mixed/basis/Hdiv-quad.h
new file mode 100644
index 0000000000..842e1910b5
--- /dev/null
+++ b/examples/Hdiv-mixed/basis/Hdiv-quad.h
@@ -0,0 +1,86 @@
+#ifndef Hdiv_quad_h
+#define Hdiv_quad_h
+// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+// All Rights reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+// Hdiv basis for quadrilateral element in 2D
+// Local numbering is as follow (each edge has 2 vector dof)
+//     b4     b5
+//    2---------3
+//  b7|         |b3
+//    |         |
+//  b6|         |b2
+//    0---------1
+//     b0     b1
+// Bx[0-->7] = b0_x-->b7_x, By[0-->7] = b0_y-->b7_y
+// To see how the nodal basis is constructed visit:
+// https://github.com/rezgarshakeri/H-div-Tests
+int NodalHdivBasisQuad(CeedScalar *x, CeedScalar *Bx, CeedScalar *By) {
+  Bx[0] = 0.125 * x[0] * x[0] - 0.125;
+  By[0] = -0.25 * x[0] * x[1] + 0.25 * x[0] + 0.25 * x[1] - 0.25;
+  Bx[1] = 0.125 - 0.125 * x[0] * x[0];
+  By[1] = 0.25 * x[0] * x[1] - 0.25 * x[0] + 0.25 * x[1] - 0.25;
+  Bx[2] = -0.25 * x[0] * x[1] + 0.25 * x[0] - 0.25 * x[1] + 0.25;
+  By[2] = 0.125 * x[1] * x[1] - 0.125;
+  Bx[3] = 0.25 * x[0] * x[1] + 0.25 * x[0] + 0.25 * x[1] + 0.25;
+  By[3] = 0.125 - 0.125 * x[1] * x[1];
+  Bx[4] = 0.125 * x[0] * x[0] - 0.125;
+  By[4] = -0.25 * x[0] * x[1] - 0.25 * x[0] + 0.25 * x[1] + 0.25;
+  Bx[5] = 0.125 - 0.125 * x[0] * x[0];
+  By[5] = 0.25 * x[0] * x[1] + 0.25 * x[0] + 0.25 * x[1] + 0.25;
+  Bx[6] = -0.25 * x[0] * x[1] + 0.25 * x[0] + 0.25 * x[1] - 0.25;
+  By[6] = 0.125 * x[1] * x[1] - 0.125;
+  Bx[7] = 0.25 * x[0] * x[1] + 0.25 * x[0] - 0.25 * x[1] - 0.25;
+  By[7] = 0.125 - 0.125 * x[1] * x[1];
+  return 0;
+}
+static void HdivBasisQuad(CeedInt Q, CeedScalar *q_ref, CeedScalar *q_weights, CeedScalar *interp, CeedScalar *div, CeedQuadMode quad_mode) {
+  // Get 1D quadrature on [-1,1]
+  CeedScalar q_ref_1d[Q], q_weight_1d[Q];
+  switch (quad_mode) {
+    case CEED_GAUSS:
+      CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+    case CEED_GAUSS_LOBATTO:
+      CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+  }
+
+  // Divergence operator; Divergence of nodal basis for ref element
+  CeedScalar D = 0.25;
+  // Loop over quadrature points
+  CeedScalar Bx[8], By[8];
+  CeedScalar x[2];
+
+  for (CeedInt i = 0; i < Q; i++) {
+    for (CeedInt j = 0; j < Q; j++) {
+      CeedInt k1        = Q * i + j;
+      q_ref[k1]         = q_ref_1d[j];
+      q_ref[k1 + Q * Q] = q_ref_1d[i];
+      q_weights[k1]     = q_weight_1d[j] * q_weight_1d[i];
+      x[0]              = q_ref_1d[j];
+      x[1]              = q_ref_1d[i];
+      NodalHdivBasisQuad(x, Bx, By);
+      for (CeedInt k = 0; k < 8; k++) {
+        interp[k1 * 8 + k]             = Bx[k];
+        interp[k1 * 8 + k + 8 * Q * Q] = By[k];
+        div[k1 * 8 + k]                = D;
+      }
+    }
+  }
+}
+
+#endif  // Hdiv_quad_h
diff --git a/examples/Hdiv-mixed/basis/L2-P0.h b/examples/Hdiv-mixed/basis/L2-P0.h
new file mode 100644
index 0000000000..6149d4a34d
--- /dev/null
+++ b/examples/Hdiv-mixed/basis/L2-P0.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+// All Rights reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+// Build L2 constant basis
+
+static void L2BasisP0(CeedInt dim, CeedInt Q, CeedScalar *q_ref, CeedScalar *q_weights, CeedScalar *interp, CeedQuadMode quad_mode) {
+  // Get 1D quadrature on [-1,1]
+  CeedScalar q_ref_1d[Q], q_weight_1d[Q];
+  switch (quad_mode) {
+    case CEED_GAUSS:
+      CeedGaussQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+    case CEED_GAUSS_LOBATTO:
+      CeedLobattoQuadrature(Q, q_ref_1d, q_weight_1d);
+      break;
+  }
+
+  // P0 L2 basis is just a constant
+  CeedScalar P0 = 1.0;
+  // Loop over quadrature points
+  if (dim == 2) {
+    for (CeedInt i = 0; i < Q; i++) {
+      for (CeedInt j = 0; j < Q; j++) {
+        CeedInt k1        = Q * i + j;
+        q_ref[k1]         = q_ref_1d[j];
+        q_ref[k1 + Q * Q] = q_ref_1d[i];
+        q_weights[k1]     = q_weight_1d[j] * q_weight_1d[i];
+        interp[k1]        = P0;
+      }
+    }
+  } else {
+    for (CeedInt k = 0; k < Q; k++) {
+      for (CeedInt i = 0; i < Q; i++) {
+        for (CeedInt j = 0; j < Q; j++) {
+          CeedInt k1            = Q * Q * k + Q * i + j;
+          q_ref[k1 + 0 * Q * Q] = q_ref_1d[j];
+          q_ref[k1 + 1 * Q * Q] = q_ref_1d[i];
+          q_ref[k1 + 2 * Q * Q] = q_ref_1d[k];
+          q_weights[k1]         = q_weight_1d[j] * q_weight_1d[i] * q_weight_1d[k];
+          interp[k1]            = P0;
+        }
+      }
+    }
+  }
+}
diff --git a/examples/Hdiv-mixed/conv_plot.py b/examples/Hdiv-mixed/conv_plot.py
new file mode 100644
index 0000000000..d4bfcff84e
--- /dev/null
+++ b/examples/Hdiv-mixed/conv_plot.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2017, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+# All Rights reserved. See files LICENSE and NOTICE for details.
+#
+# This file is part of CEED, a collection of benchmarks, miniapps, software
+# libraries and APIs for efficient high-order finite element and spectral
+# element discretizations for exascale applications. For more information and
+# source code availability see http://github.com/ceed.
+#
+# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+# a collaborative effort of two U.S. Department of Energy organizations (Office
+# of Science and the National Nuclear Security Administration) responsible for
+# the planning and preparation of a capable exascale ecosystem, including
+# software, applications, hardware, advanced system engineering and early
+# testbed platforms, in support of the nation's exascale computing imperative.
+
+# After ./conv_test.sh you can plot using
+# python conv_plot.py -f conv_test_result.csv
+
+import pandas as pd
+import argparse
+from pylab import *
+from matplotlib import use
+
+
+def plot():
+    # Define argparse for the input variables
+    parser = argparse.ArgumentParser(description='Get input arguments')
+    parser.add_argument('-f',
+                        dest='conv_result_file',
+                        type=str,
+                        required=True,
+                        help='Path to the CSV file')
+    args = parser.parse_args()
+    conv_result_file = args.conv_result_file
+
+    # Load the data
+    data = pd.read_csv(conv_result_file)
+    fig, ax = plt.subplots()
+
+    data = data.sort_values('run')
+
+    E_u = data['error_u']
+    E_p = data['error_p']
+    #E_hdiv = data['error_hdiv']
+    h = 1/data['mesh_res']
+    N = data['mesh_res']
+    H1 =  amin(E_p)* (h/amin(h)) # H = C h^1
+    H2 =  amin(E_u)* (h/amin(h))**2  # H = C h^2
+
+    ax.loglog(h, E_p, 'o', color='blue', label='Pressure')
+    ax.loglog(h, E_u, 'o', color='black', label = 'Velocity')
+    #ax.loglog(h, E_hdiv, '*', color='red', label = 'Velocity in H(div)')
+    ax.loglog(h, H1, '--', color='blue', label='O(h)')
+    ax.loglog(h, H2, '--', color='black', label='O(h$^2$)')
+
+    ax.legend(loc='upper left')
+    ax.set_xlabel('h')
+    ax.set_ylabel('L2 Error')
+    ax.set_title('Convergence by h Refinement')
+    #xlim(.06, .3)
+    fig.tight_layout()
+    plt.savefig('convrate_mixed.png', bbox_inches='tight')
+
+    conv_u = []
+    conv_p = []
+    conv_u.append(0)
+    conv_p.append(0)
+    for i in range(1,len(E_u)):
+        conv_u.append(log10(E_u[i]/E_u[i-1])/log10(h[i]/h[i-1]))
+        conv_p.append(log10(E_p[i]/E_p[i-1])/log10(h[i]/h[i-1]))
+
+    result = {'Number of element':N, 'order of u':conv_u, 'order of p':conv_p}
+    df = pd.DataFrame(result)
+    print(df)
+    
+
+
+if __name__ == "__main__":
+    plot()
diff --git a/examples/Hdiv-mixed/conv_test.sh b/examples/Hdiv-mixed/conv_test.sh
new file mode 100755
index 0000000000..f44774abce
--- /dev/null
+++ b/examples/Hdiv-mixed/conv_test.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Copyright (c) 2017, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
+# All Rights reserved. See files LICENSE and NOTICE for details.
+#
+# This file is part of CEED, a collection of benchmarks, miniapps, software
+# libraries and APIs for efficient high-order finite element and spectral
+# element discretizations for exascale applications. For more information and
+# source code availability see http://github.com/ceed.
+#
+# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+# a collaborative effort of two U.S. Department of Energy organizations (Office
+# of Science and the National Nuclear Security Administration) responsible for
+# the planning and preparation of a capable exascale ecosystem, including
+# software, applications, hardware, advanced system engineering and early
+# testbed platforms, in support of the nation's exascale computing imperative.
+
+# After make the problem, you can run convergence test by:
+#./conv_test.sh -d 2 (or -d 3)
+
+# Reading arguments with getopts options
+while getopts d: flag
+do
+    case "${flag}" in
+        d) dim=${OPTARG};;
+    esac
+done
+echo "Running convergence test in ${dim}D for Darcy problem";
+
+declare -A run_flags
+    run_flags[pc_type]=svd
+    run_flags[ceed]=/cpu/self/ref/serial
+    if [[ $dim -eq 2 ]];
+    then
+        run_flags[problem]=darcy2d
+        run_flags[dm_plex_dim]=$dim
+        run_flags[dm_plex_box_faces]=2,2
+        run_flags[dm_plex_box_lower]=0,0
+        run_flags[dm_plex_box_upper]=1,1
+    else
+        run_flags[problem]=darcy3d
+        run_flags[dm_plex_dim]=$dim
+        run_flags[dm_plex_box_faces]=2,2,2
+        run_flags[dm_plex_box_lower]=0,0,0
+        run_flags[dm_plex_box_upper]=1,1,1
+    fi
+
+declare -A test_flags
+    test_flags[res_start]=4
+    test_flags[res_stride]=2
+    test_flags[res_end]=12
+
+file_name=conv_test_result.csv
+
+echo "run,mesh_res,error_u,error_p" > $file_name
+
+i=0
+
+for ((res=${test_flags[res_start]}; res<=${test_flags[res_end]}; res+=${test_flags[res_stride]})); do
+    if [[ $dim -eq 2 ]]; then
+        run_flags[dm_plex_box_faces]=$res,$res
+    else
+        run_flags[dm_plex_box_faces]=$res,$res,$res
+    fi
+    args=''
+    for arg in "${!run_flags[@]}"; do
+        if ! [[ -z ${run_flags[$arg]} ]]; then
+            args="$args -$arg ${run_flags[$arg]}"
+        fi
+    done
+    ./main $args | grep "L2 error of u and p" | awk -v i="$i" -v res="$res" '{ printf "%d,%d,%.5f,%.5f\n", i, res, $8, $9}' >> $file_name
+    i=$((i+1))
+done
+
+python conv_plot.py -f conv_test_result.csv
\ No newline at end of file
diff --git a/examples/Hdiv-mixed/conv_test_result.csv b/examples/Hdiv-mixed/conv_test_result.csv
new file mode 100644
index 0000000000..074a2faf64
--- /dev/null
+++ b/examples/Hdiv-mixed/conv_test_result.csv
@@ -0,0 +1,6 @@
+run,mesh_res,error_u,error_p
+0,4,26.30005,0.03133
+1,6,11.97420,0.01464
+2,8,6.79226,0.00838
+3,10,4.36393,0.00540
+4,12,3.03689,0.00377
diff --git a/examples/Hdiv-mixed/convrate_mixed.png b/examples/Hdiv-mixed/convrate_mixed.png
new file mode 100644
index 0000000000..fbe94c5133
Binary files /dev/null and b/examples/Hdiv-mixed/convrate_mixed.png differ
diff --git a/examples/Hdiv-mixed/include/cl-options.h b/examples/Hdiv-mixed/include/cl-options.h
new file mode 100644
index 0000000000..79f4fb51bc
--- /dev/null
+++ b/examples/Hdiv-mixed/include/cl-options.h
@@ -0,0 +1,9 @@
+#ifndef cloptions_h
+#define cloptions_h
+
+#include "structs.h"
+
+// Process general command line options
+PetscErrorCode ProcessCommandLineOptions(AppCtx app_ctx);
+
+#endif  // cloptions_h
diff --git a/examples/Hdiv-mixed/include/petsc-macros.h b/examples/Hdiv-mixed/include/petsc-macros.h
new file mode 100644
index 0000000000..f8c63ebdc4
--- /dev/null
+++ b/examples/Hdiv-mixed/include/petsc-macros.h
@@ -0,0 +1,17 @@
+#ifndef petsc_macros
+#define petsc_macros
+
+#if PETSC_VERSION_LT(3, 14, 0)
+#define DMPlexGetClosureIndices(a, b, c, d, e, f, g, h, i) DMPlexGetClosureIndices(a, b, c, d, f, g, i)
+#define DMPlexRestoreClosureIndices(a, b, c, d, e, f, g, h, i) DMPlexRestoreClosureIndices(a, b, c, d, f, g, i)
+#endif
+
+#if PETSC_VERSION_LT(3, 14, 0)
+#define DMAddBoundary(a, b, c, d, e, f, g, h, i, j, k, l, m, n) DMAddBoundary(a, b, c, e, h, i, j, k, f, g, m)
+#elif PETSC_VERSION_LT(3, 16, 0)
+#define DMAddBoundary(a, b, c, d, e, f, g, h, i, j, k, l, m, n) DMAddBoundary(a, b, c, e, h, i, j, k, l, f, g, m)
+#else
+#define DMAddBoundary(a, b, c, d, e, f, g, h, i, j, k, l, m, n) DMAddBoundary(a, b, c, d, f, g, h, i, j, k, l, m, n)
+#endif
+
+#endif
diff --git a/examples/Hdiv-mixed/include/post-processing.h b/examples/Hdiv-mixed/include/post-processing.h
new file mode 100644
index 0000000000..5bd940be39
--- /dev/null
+++ b/examples/Hdiv-mixed/include/post-processing.h
@@ -0,0 +1,15 @@
+#ifndef post_processing_h
+#define post_processing_h
+
+#include <ceed.h>
+#include <petsc.h>
+
+#include "../include/setup-libceed.h"
+#include "structs.h"
+PetscErrorCode PrintOutput(DM dm, Ceed ceed, AppCtx app_ctx, PetscBool has_ts, TS ts, SNES snes, KSP ksp, Vec U, CeedScalar l2_error_u,
+                           CeedScalar l2_error_p);
+PetscErrorCode SetupProjectVelocityCtx_Hdiv(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_Hdiv);
+PetscErrorCode SetupProjectVelocityCtx_H1(MPI_Comm comm, DM dm_H1, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_H1);
+PetscErrorCode ProjectVelocity(AppCtx app_ctx, Vec U, Vec *U_H1);
+PetscErrorCode CtxVecDestroy(AppCtx app_ctx);
+#endif  // post_processing_h
diff --git a/examples/Hdiv-mixed/include/register-problem.h b/examples/Hdiv-mixed/include/register-problem.h
new file mode 100644
index 0000000000..88e4b2a34a
--- /dev/null
+++ b/examples/Hdiv-mixed/include/register-problem.h
@@ -0,0 +1,26 @@
+#ifndef register_problems_h
+#define register_problems_h
+
+#include "structs.h"
+
+// Register problems to be available on the command line
+PetscErrorCode RegisterProblems_Hdiv(AppCtx app_ctx);
+// -----------------------------------------------------------------------------
+// Set up problems function prototype
+// -----------------------------------------------------------------------------
+// 1) darcy2d
+PetscErrorCode Hdiv_DARCY2D(Ceed ceed, ProblemData problem_data, DM dm, void *ctx);
+
+// 2) darcy3d
+PetscErrorCode Hdiv_DARCY3D(Ceed ceed, ProblemData problem_data, DM dm, void *ctx);
+
+// 3) darcy3dprism
+
+// 4) richard
+PetscErrorCode Hdiv_RICHARD2D(Ceed ceed, ProblemData problem_data, DM dm, void *ctx);
+
+PetscErrorCode Hdiv_RICHARD3D(Ceed ceed, ProblemData problem_data, DM dm, void *ctx);
+
+extern int FreeContextPetsc(void *);
+
+#endif  // register_problems_h
diff --git a/examples/Hdiv-mixed/include/setup-boundary.h b/examples/Hdiv-mixed/include/setup-boundary.h
new file mode 100644
index 0000000000..c62fb92982
--- /dev/null
+++ b/examples/Hdiv-mixed/include/setup-boundary.h
@@ -0,0 +1,22 @@
+#ifndef setup_boundary_h
+#define setup_boundary_h
+
+#include <ceed.h>
+#include <petsc.h>
+#include <petscdmplex.h>
+#include <petscsys.h>
+
+#include "structs.h"
+
+// ---------------------------------------------------------------------------
+// Create boundary label
+// ---------------------------------------------------------------------------
+PetscErrorCode CreateBCLabel(DM dm, const char name[]);
+
+// ---------------------------------------------------------------------------
+// Add Dirichlet boundaries to DM
+// ---------------------------------------------------------------------------
+PetscErrorCode DMAddBoundariesDirichlet(DM dm);
+PetscErrorCode BoundaryDirichletMMS(PetscInt dim, PetscReal t, const PetscReal coords[], PetscInt num_comp_u, PetscScalar *u, void *ctx);
+PetscErrorCode DMAddBoundariesPressure(Ceed ceed, CeedData ceed_data, AppCtx app_ctx, ProblemData problem_data, DM dm, CeedVector bc_pressure);
+#endif  // setup_boundary_h
diff --git a/examples/Hdiv-mixed/include/setup-dm.h b/examples/Hdiv-mixed/include/setup-dm.h
new file mode 100644
index 0000000000..775b685b92
--- /dev/null
+++ b/examples/Hdiv-mixed/include/setup-dm.h
@@ -0,0 +1,18 @@
+#ifndef setupdm_h
+#define setupdm_h
+
+#include <ceed.h>
+#include <petsc.h>
+#include <petscdmplex.h>
+#include <petscsys.h>
+
+#include "structs.h"
+
+// ---------------------------------------------------------------------------
+// Setup DM
+// ---------------------------------------------------------------------------
+PetscErrorCode CreateDM(MPI_Comm comm, Ceed ceed, DM *dm);
+PetscErrorCode PerturbVerticesSmooth(DM dm);
+PetscErrorCode PerturbVerticesRandom(DM dm);
+
+#endif  // setupdm_h
diff --git a/examples/Hdiv-mixed/include/setup-fe.h b/examples/Hdiv-mixed/include/setup-fe.h
new file mode 100644
index 0000000000..8836489f8b
--- /dev/null
+++ b/examples/Hdiv-mixed/include/setup-fe.h
@@ -0,0 +1,23 @@
+#ifndef setupfe_h
+#define setupfe_h
+
+#include <ceed.h>
+#include <petsc.h>
+#include <petscdmplex.h>
+#include <petscsys.h>
+
+#include "structs.h"
+
+// ---------------------------------------------------------------------------
+// Setup FE
+// ---------------------------------------------------------------------------
+CeedMemType    MemTypeP2C(PetscMemType mtype);
+PetscErrorCode SetupFEHdiv(MPI_Comm comm, DM dm, DM dm_u0, DM dm_p0);
+PetscErrorCode SetupFEH1(ProblemData problem_data, AppCtx app_ctx, DM dm_H1);
+PetscInt       Involute(PetscInt i);
+PetscErrorCode CreateRestrictionFromPlex(Ceed ceed, DM dm, CeedInt height, DMLabel domain_label, CeedInt value, CeedElemRestriction *elem_restr);
+// Utility function to create local CEED Oriented restriction from DMPlex
+PetscErrorCode CreateRestrictionFromPlexOriented(Ceed ceed, DM dm, DM dm_u0, DM dm_p0, CeedInt P, CeedElemRestriction *elem_restr_u,
+                                                 CeedElemRestriction *elem_restr_p, CeedElemRestriction *elem_restr_u0,
+                                                 CeedElemRestriction *elem_restr_p0);
+#endif  // setupfe_h
diff --git a/examples/Hdiv-mixed/include/setup-libceed.h b/examples/Hdiv-mixed/include/setup-libceed.h
new file mode 100644
index 0000000000..d04c3b1fa5
--- /dev/null
+++ b/examples/Hdiv-mixed/include/setup-libceed.h
@@ -0,0 +1,10 @@
+#ifndef setuplibceed_h
+#define setuplibceed_h
+
+#include "setup-fe.h"
+#include "structs.h"
+
+// Destroy libCEED objects
+PetscErrorCode CeedDataDestroy(CeedData ceed_data, ProblemData problem_data);
+PetscErrorCode SetupLibceed(DM dm, DM dm_u0, DM dm_p0, DM dm_H1, Ceed ceed, AppCtx app_ctx, ProblemData problem_data, CeedData ceed_data);
+#endif  // setuplibceed_h
diff --git a/examples/Hdiv-mixed/include/setup-matops.h b/examples/Hdiv-mixed/include/setup-matops.h
new file mode 100644
index 0000000000..51bb686bcf
--- /dev/null
+++ b/examples/Hdiv-mixed/include/setup-matops.h
@@ -0,0 +1,13 @@
+#ifndef setup_matops_h
+#define setup_matops_h
+
+#include <ceed.h>
+#include <petsc.h>
+
+#include "structs.h"
+
+PetscErrorCode ApplyLocalCeedOp(Vec X, Vec Y, OperatorApplyContext op_apply_ctx);
+PetscErrorCode ApplyAddLocalCeedOp(Vec X, Vec Y, OperatorApplyContext op_apply_ctx);
+PetscErrorCode GetDiagonal(Mat A, Vec D);
+
+#endif  // setup_matops_h
diff --git a/examples/Hdiv-mixed/include/setup-solvers.h b/examples/Hdiv-mixed/include/setup-solvers.h
new file mode 100644
index 0000000000..36d50a210d
--- /dev/null
+++ b/examples/Hdiv-mixed/include/setup-solvers.h
@@ -0,0 +1,19 @@
+#ifndef setup_solvers_h
+#define setup_solvers_h
+
+#include <ceed.h>
+#include <petsc.h>
+
+#include "petscvec.h"
+#include "structs.h"
+
+PetscErrorCode SetupJacobianOperatorCtx(DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_jacobian);
+PetscErrorCode SetupResidualOperatorCtx(DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_residual);
+PetscErrorCode SetupErrorOperatorCtx(DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_error);
+PetscErrorCode ApplyMatOp(Mat A, Vec X, Vec Y);
+PetscErrorCode SNESFormResidual(SNES snes, Vec X, Vec Y, void *ctx);
+PetscErrorCode SNESFormJacobian(SNES snes, Vec U, Mat J, Mat J_pre, void *ctx);
+PetscErrorCode PDESolver(CeedData ceed_data, AppCtx app_ctx, SNES snes, KSP ksp, Vec *U);
+PetscErrorCode ComputeL2Error(CeedData ceed_data, AppCtx app_ctx, Vec U, CeedScalar *l2_error_u, CeedScalar *l2_error_p);
+
+#endif  // setup_solvers_h
diff --git a/examples/Hdiv-mixed/include/setup-ts.h b/examples/Hdiv-mixed/include/setup-ts.h
new file mode 100644
index 0000000000..9db7f31203
--- /dev/null
+++ b/examples/Hdiv-mixed/include/setup-ts.h
@@ -0,0 +1,16 @@
+#ifndef setup_ts_h
+#define setup_ts_h
+
+#include <ceed.h>
+#include <petsc.h>
+
+#include "structs.h"
+
+PetscErrorCode CreateInitialConditions(CeedData ceed_data, AppCtx app_ctx, Vec U);
+PetscErrorCode SetupResidualOperatorCtx_Ut(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_residual_ut);
+PetscErrorCode SetupResidualOperatorCtx_U0(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_initial_u0);
+PetscErrorCode SetupResidualOperatorCtx_P0(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_initial_p0);
+PetscErrorCode TSFormIResidual(TS ts, PetscReal time, Vec X, Vec X_t, Vec Y, void *ctx_residual_ut);
+PetscErrorCode TSSolveRichard(CeedData ceed_data, AppCtx app_ctx, TS ts, Vec *U);
+
+#endif  // setup_ts_h
diff --git a/examples/Hdiv-mixed/include/structs.h b/examples/Hdiv-mixed/include/structs.h
new file mode 100644
index 0000000000..1c70274d64
--- /dev/null
+++ b/examples/Hdiv-mixed/include/structs.h
@@ -0,0 +1,67 @@
+#ifndef structs_h
+#define structs_h
+
+#include <ceed.h>
+#include <petsc.h>
+
+// PETSc operator contexts
+typedef struct OperatorApplyContext_ *OperatorApplyContext;
+struct OperatorApplyContext_ {
+  MPI_Comm              comm;
+  Vec                   X_loc, Y_loc, X_t_loc;
+  CeedVector            x_ceed, y_ceed, x_t_ceed, x_coord, rhs_ceed_H1;
+  CeedOperator          op_apply, op_rhs_H1;
+  DM                    dm;
+  Ceed                  ceed;
+  CeedScalar            t, dt;
+  CeedContextFieldLabel solution_time_label, final_time_label, timestep_label;
+  CeedElemRestriction   elem_restr_u_H1;
+  VecType               vec_type;
+};
+
+// libCEED data struct
+typedef struct CeedData_ *CeedData;
+struct CeedData_ {
+  CeedBasis           basis_x, basis_u, basis_p, basis_u_face;
+  CeedElemRestriction elem_restr_x, elem_restr_u, elem_restr_U_i, elem_restr_p, elem_restr_p_i, elem_restr_u0, elem_restr_p0, elem_restr_u_H1;
+  CeedQFunction       qf_residual, qf_jacobian, qf_error, qf_ics_u, qf_ics_p, qf_rhs_u0, qf_rhs_p0, qf_rhs_H1, qf_post_mass;
+  CeedOperator        op_residual, op_jacobian, op_error, op_ics_u, op_ics_p, op_rhs_u0, op_rhs_p0, op_rhs_H1, op_post_mass;
+  CeedVector x_ceed, y_ceed, x_coord, x_t_ceed, rhs_u0_ceed, u0_ceed, v0_ceed, rhs_p0_ceed, p0_ceed, q0_ceed, rhs_ceed_H1, u_ceed, up_ceed, vp_ceed;
+  CeedInt    num_elem;
+};
+
+// Application context from user command line options
+typedef struct AppCtx_ *AppCtx;
+struct AppCtx_ {
+  char     ceed_resource[PETSC_MAX_PATH_LEN];  // libCEED backend
+  MPI_Comm comm;
+  // Degree of polynomial (1 only), extra quadrature pts
+  PetscInt degree;
+  PetscInt q_extra;
+  // For applying traction BCs
+  PetscInt    bc_pressure_count;
+  PetscInt    bc_faces[16];  // face ID
+  PetscScalar bc_pressure_value[16];
+  // Problem type arguments
+  PetscFunctionList    problems;
+  char                 problem_name[PETSC_MAX_PATH_LEN];
+  CeedScalar           t_final, t;
+  PetscBool            view_solution, quartic;
+  char                 output_dir[PETSC_MAX_PATH_LEN];
+  PetscInt             output_freq;
+  OperatorApplyContext ctx_residual, ctx_jacobian, ctx_error, ctx_residual_ut, ctx_initial_u0, ctx_initial_p0, ctx_Hdiv, ctx_H1;
+};
+
+// Problem specific data
+typedef struct ProblemData_ *ProblemData;
+struct ProblemData_ {
+  CeedQFunctionUser true_solution, residual, jacobian, error, ics_u, ics_p, bc_pressure, rhs_u0, rhs_p0, post_rhs, post_mass;
+  const char       *true_solution_loc, *residual_loc, *jacobian_loc, *error_loc, *bc_pressure_loc, *ics_u_loc, *ics_p_loc, *rhs_u0_loc, *rhs_p0_loc,
+      *post_rhs_loc, *post_mass_loc;
+  CeedQuadMode         quadrature_mode;
+  CeedInt              elem_node, dim, q_data_size_face;
+  CeedQFunctionContext true_qfunction_ctx, error_qfunction_ctx, residual_qfunction_ctx, jacobian_qfunction_ctx, rhs_u0_qfunction_ctx;
+  PetscBool            has_ts, view_solution, quartic;
+};
+
+#endif  // structs_h
\ No newline at end of file
diff --git a/examples/Hdiv-mixed/main.c b/examples/Hdiv-mixed/main.c
new file mode 100644
index 0000000000..e701a25381
--- /dev/null
+++ b/examples/Hdiv-mixed/main.c
@@ -0,0 +1,283 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+//                        libCEED + PETSc Example: Mixed-Poisson in H(div) space
+//
+// This example demonstrates a simple usage of libCEED with PETSc to solve
+//   elasticity problems.
+//
+// The code uses higher level communication protocols in DMPlex.
+//
+// Build with: make
+// Run with:
+//   ./main -pc_type svd -problem darcy2d -dm_plex_dim 2 -dm_plex_box_faces 4,4
+//   ./main -pc_type none -problem darcy2d -dm_plex_dim 2 -dm_plex_box_faces 4,4 -ksp_type minres
+//   ./main -pc_type svd -problem darcy3d -dm_plex_dim 3 -dm_plex_box_faces 4,4,4
+//   ./main -pc_type svd -problem darcy3d -dm_plex_filename /path to the mesh file
+//   ./main -pc_type svd -problem richard2d -dm_plex_dim 2 -dm_plex_box_faces 4,4
+// (boundary is not working)
+//   ./main -pc_type svd -problem darcy2d -dm_plex_dim 2 -dm_plex_box_faces 4,4 -bc_pressure 1
+//   ./main -pc_type svd -problem darcy2d -dm_plex_dim 2 -dm_plex_box_faces 4,4 -bc_pressure 1,2,3,4
+//   ./main -pc_type svd -problem darcy3d -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -view_solution -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,0.1,1
+const char help[] = "Solve H(div)-mixed problem using PETSc and libCEED\n";
+
+#include "main.h"
+
+int main(int argc, char **argv) {
+  // ---------------------------------------------------------------------------
+  // Initialize PETSc
+  // ---------------------------------------------------------------------------
+  PetscCall(PetscInitialize(&argc, &argv, NULL, help));
+  MPI_Comm comm = PETSC_COMM_WORLD;
+
+  // ---------------------------------------------------------------------------
+  // Create structs
+  // ---------------------------------------------------------------------------
+  AppCtx app_ctx;
+  PetscCall(PetscCalloc1(1, &app_ctx));
+
+  ProblemData problem_data = NULL;
+  PetscCall(PetscCalloc1(1, &problem_data));
+
+  CeedData ceed_data;
+  PetscCall(PetscCalloc1(1, &ceed_data));
+
+  OperatorApplyContext ctx_jacobian, ctx_residual, ctx_residual_ut, ctx_initial_u0, ctx_initial_p0, ctx_error, ctx_Hdiv, ctx_H1;
+  PetscCall(PetscCalloc1(1, &ctx_jacobian));
+  PetscCall(PetscCalloc1(1, &ctx_residual));
+  PetscCall(PetscCalloc1(1, &ctx_residual_ut));
+  PetscCall(PetscCalloc1(1, &ctx_initial_u0));
+  PetscCall(PetscCalloc1(1, &ctx_initial_p0));
+  PetscCall(PetscCalloc1(1, &ctx_error));
+  PetscCall(PetscCalloc1(1, &ctx_Hdiv));
+  PetscCall(PetscCalloc1(1, &ctx_H1));
+  // Context for Darcy problem
+  app_ctx->ctx_residual = ctx_residual;
+  app_ctx->ctx_jacobian = ctx_jacobian;
+  // Context for Richards problem
+  app_ctx->ctx_residual_ut = ctx_residual_ut;
+  // Context for initial velocity
+  app_ctx->ctx_initial_u0 = ctx_initial_u0;
+  // Context for initial pressure
+  app_ctx->ctx_initial_p0 = ctx_initial_p0;
+  // Context for MMS
+  app_ctx->ctx_error = ctx_error;
+  // Context for post-processing
+  app_ctx->ctx_Hdiv = ctx_Hdiv;
+  app_ctx->ctx_H1   = ctx_H1;
+  app_ctx->comm     = comm;
+
+  // ---------------------------------------------------------------------------
+  // Process command line options
+  // ---------------------------------------------------------------------------
+  PetscCall(ProcessCommandLineOptions(app_ctx));
+
+  // ---------------------------------------------------------------------------
+  // Initialize libCEED
+  // ---------------------------------------------------------------------------
+  // -- Initialize backend
+  Ceed ceed;
+  CeedInit(app_ctx->ceed_resource, &ceed);
+
+  // -- Process general command line options
+  // ---------------------------------------------------------------------------
+  // Create DM
+  // ---------------------------------------------------------------------------
+  DM dm, dm_u0, dm_p0, dm_H1;
+  // DM for mixed problem
+  PetscCall(CreateDM(app_ctx->comm, ceed, &dm));
+  // DM for projecting initial velocity to Hdiv space
+  PetscCall(CreateDM(app_ctx->comm, ceed, &dm_u0));
+  // DM for projecting initial pressure in L2
+  PetscCall(CreateDM(app_ctx->comm, ceed, &dm_p0));
+  // DM for projecting solution U into H1 space for PetscViewer
+  PetscCall(CreateDM(app_ctx->comm, ceed, &dm_H1));
+  // TODO: add mesh option
+  // perturb dm to have smooth random mesh
+  // PetscCall( PerturbVerticesSmooth(dm) );
+  // PetscCall( PerturbVerticesSmooth(dm_H1) );
+
+  // perturb dm to have random mesh
+  // PetscCall(PerturbVerticesRandom(dm) );
+  // PetscCall(PerturbVerticesRandom(dm_H1) );
+
+  // ---------------------------------------------------------------------------
+  // Choose the problem from the list of registered problems
+  // ---------------------------------------------------------------------------
+  PetscCall(RegisterProblems_Hdiv(app_ctx));
+  {
+    PetscErrorCode (*p)(Ceed, ProblemData, DM, void *);
+    PetscCall(PetscFunctionListFind(app_ctx->problems, app_ctx->problem_name, &p));
+    if (!p) SETERRQ(PETSC_COMM_SELF, 1, "Problem '%s' not found", app_ctx->problem_name);
+    PetscCall((*p)(ceed, problem_data, dm, &app_ctx));
+  }
+
+  // ---------------------------------------------------------------------------
+  // Setup FE for H(div) mixed-problem and H1 projection in post-processing.c
+  // ---------------------------------------------------------------------------
+  PetscCall(SetupFEHdiv(app_ctx->comm, dm, dm_u0, dm_p0));
+  PetscCall(SetupFEH1(problem_data, app_ctx, dm_H1));
+
+  // ---------------------------------------------------------------------------
+  // Create global unkown solution
+  // ---------------------------------------------------------------------------
+  Vec U;  // U=[p,u]
+  PetscCall(DMCreateGlobalVector(dm, &U));
+
+  // ---------------------------------------------------------------------------
+  // Setup libCEED
+  // ---------------------------------------------------------------------------
+  // -- Set up libCEED objects
+  PetscCall(SetupLibceed(dm, dm_u0, dm_p0, dm_H1, ceed, app_ctx, problem_data, ceed_data));
+
+  // ---------------------------------------------------------------------------
+  // Setup pressure boundary conditions (not working)
+  // ---------------------------------------------------------------------------
+  // --Create empty local vector for libCEED
+  // Vec          P_loc;
+  // PetscInt     P_loc_size;
+  // CeedScalar  *p0;
+  // CeedVector   P_ceed;
+  // PetscMemType pressure_mem_type;
+  // PetscCall(DMCreateLocalVector(dm, &P_loc));
+  // PetscCall(VecGetSize(P_loc, &P_loc_size));
+  // PetscCall(VecZeroEntries(P_loc));
+  // PetscCall(VecGetArrayAndMemType(P_loc, &p0, &pressure_mem_type));
+  // CeedVectorCreate(ceed, P_loc_size, &P_ceed);
+  // CeedVectorSetArray(P_ceed, MemTypeP2C(pressure_mem_type), CEED_USE_POINTER, p0);
+  //// -- Apply operator to create local pressure vector on boundary
+  // PetscCall(DMAddBoundariesPressure(ceed, ceed_data, app_ctx, problem_data, dm, P_ceed));
+  //// CeedVectorView(P_ceed, "%12.8f", stdout);
+  ////  -- Map local to global
+  // Vec P;
+  // CeedVectorTakeArray(P_ceed, MemTypeP2C(pressure_mem_type), NULL);
+  // PetscCall(VecRestoreArrayAndMemType(P_loc, &p0));
+  // PetscCall(DMCreateGlobalVector(dm, &P));
+  // PetscCall(VecZeroEntries(P));
+  // PetscCall(DMLocalToGlobal(dm, P_loc, ADD_VALUES, P));
+
+  // ---------------------------------------------------------------------------
+  // Setup context for projection problem; post-processing.c
+  // ---------------------------------------------------------------------------
+  PetscCall(SetupProjectVelocityCtx_Hdiv(app_ctx->comm, dm, ceed, ceed_data, app_ctx->ctx_Hdiv));
+  PetscCall(SetupProjectVelocityCtx_H1(app_ctx->comm, dm_H1, ceed, ceed_data, app_ctx->ctx_H1));
+
+  // ---------------------------------------------------------------------------
+  // Setup TSSolve for Richard problem
+  // ---------------------------------------------------------------------------
+  TS ts;
+  if (problem_data->has_ts) {
+    // ---------------------------------------------------------------------------
+    // Setup context for initial conditions
+    // ---------------------------------------------------------------------------
+    PetscCall(SetupResidualOperatorCtx_U0(app_ctx->comm, dm_u0, ceed, ceed_data, app_ctx->ctx_initial_u0));
+    PetscCall(SetupResidualOperatorCtx_P0(app_ctx->comm, dm_p0, ceed, ceed_data, app_ctx->ctx_initial_p0));
+    PetscCall(SetupResidualOperatorCtx_Ut(app_ctx->comm, dm, ceed, ceed_data, app_ctx->ctx_residual_ut));
+    PetscCall(CreateInitialConditions(ceed_data, app_ctx, U));
+    // VecView(U, PETSC_VIEWER_STDOUT_WORLD);
+    //  Solve Richards problem
+    PetscCall(TSCreate(app_ctx->comm, &ts));
+    PetscCall(VecZeroEntries(app_ctx->ctx_residual_ut->X_loc));
+    PetscCall(VecZeroEntries(app_ctx->ctx_residual_ut->X_t_loc));
+    PetscCall(TSSolveRichard(ceed_data, app_ctx, ts, &U));
+    // VecView(U, PETSC_VIEWER_STDOUT_WORLD);
+  }
+
+  // ---------------------------------------------------------------------------
+  // Setup SNES for Darcy problem
+  // ---------------------------------------------------------------------------
+  SNES snes;
+  KSP  ksp;
+  if (!problem_data->has_ts) {
+    PetscCall(SetupJacobianOperatorCtx(dm, ceed, ceed_data, app_ctx->ctx_jacobian));
+    PetscCall(SetupResidualOperatorCtx(dm, ceed, ceed_data, app_ctx->ctx_residual));
+    // Create SNES
+    PetscCall(SNESCreate(app_ctx->comm, &snes));
+    PetscCall(SNESGetKSP(snes, &ksp));
+    PetscCall(PDESolver(ceed_data, app_ctx, snes, ksp, &U));
+    // VecView(U, PETSC_VIEWER_STDOUT_WORLD);
+  }
+
+  // ---------------------------------------------------------------------------
+  // Compute L2 error of mms problem
+  // ---------------------------------------------------------------------------
+  PetscCall(SetupErrorOperatorCtx(dm, ceed, ceed_data, app_ctx->ctx_error));
+  CeedScalar l2_error_u, l2_error_p;
+  PetscCall(ComputeL2Error(ceed_data, app_ctx, U, &l2_error_u, &l2_error_p));
+
+  // ---------------------------------------------------------------------------
+  // Print solver iterations and final norms
+  // ---------------------------------------------------------------------------
+  PetscCall(PrintOutput(dm, ceed, app_ctx, problem_data->has_ts, ts, snes, ksp, U, l2_error_u, l2_error_p));
+
+  // ---------------------------------------------------------------------------
+  // Save solution (paraview)
+  // ---------------------------------------------------------------------------
+  if (app_ctx->view_solution) {
+    PetscViewer viewer_p;
+    PetscCall(PetscViewerVTKOpen(app_ctx->comm, "darcy_pressure.vtu", FILE_MODE_WRITE, &viewer_p));
+    PetscCall(VecView(U, viewer_p));
+    PetscCall(PetscViewerDestroy(&viewer_p));
+
+    Vec U_H1;  // velocity in H1 space for post-processing
+    PetscCall(DMCreateGlobalVector(dm_H1, &U_H1));
+    PetscCall(ProjectVelocity(app_ctx, U, &U_H1));
+
+    PetscViewer viewer_u;
+    PetscCall(PetscViewerVTKOpen(app_ctx->comm, "darcy_velocity.vtu", FILE_MODE_WRITE, &viewer_u));
+    PetscCall(VecView(U_H1, viewer_u));
+    PetscCall(PetscViewerDestroy(&viewer_u));
+    PetscCall(VecDestroy(&U_H1));
+  }
+  // ---------------------------------------------------------------------------
+  // Free objects
+  // ---------------------------------------------------------------------------
+
+  // Free PETSc objects
+  PetscCall(DMDestroy(&dm));
+  PetscCall(DMDestroy(&dm_u0));
+  PetscCall(DMDestroy(&dm_p0));
+  PetscCall(DMDestroy(&dm_H1));
+  PetscCall(VecDestroy(&U));
+  PetscCall(CtxVecDestroy(app_ctx));
+  if (problem_data->has_ts) {
+    PetscCall(TSDestroy(&ts));
+  } else {
+    PetscCall(SNESDestroy(&snes));
+  }
+  PetscCall(CeedDataDestroy(ceed_data, problem_data));
+
+  // -- Function list
+  PetscCall(PetscFunctionListDestroy(&app_ctx->problems));
+
+  // -- Structs
+  PetscCall(PetscFree(app_ctx));
+  PetscCall(PetscFree(problem_data));
+  PetscCall(PetscFree(ctx_initial_u0));
+  PetscCall(PetscFree(ctx_initial_p0));
+  PetscCall(PetscFree(ctx_residual_ut));
+  PetscCall(PetscFree(ctx_residual));
+  PetscCall(PetscFree(ctx_jacobian));
+  PetscCall(PetscFree(ctx_error));
+  PetscCall(PetscFree(ctx_H1));
+  PetscCall(PetscFree(ctx_Hdiv));
+
+  // Free libCEED objects
+  // CeedVectorDestroy(&bc_pressure);
+  CeedDestroy(&ceed);
+
+  return PetscFinalize();
+}
diff --git a/examples/Hdiv-mixed/main.h b/examples/Hdiv-mixed/main.h
new file mode 100644
index 0000000000..8f3e6abe05
--- /dev/null
+++ b/examples/Hdiv-mixed/main.h
@@ -0,0 +1,16 @@
+
+#ifndef MAIN_H
+#define MAIN_H
+
+#include "include/cl-options.h"
+#include "include/post-processing.h"
+#include "include/register-problem.h"
+#include "include/setup-boundary.h"
+#include "include/setup-dm.h"
+#include "include/setup-fe.h"
+#include "include/setup-libceed.h"
+#include "include/setup-matops.h"
+#include "include/setup-solvers.h"
+#include "include/setup-ts.h"
+
+#endif  // MAIN_H
\ No newline at end of file
diff --git a/examples/Hdiv-mixed/problems/darcy2d.c b/examples/Hdiv-mixed/problems/darcy2d.c
new file mode 100644
index 0000000000..e5e535c117
--- /dev/null
+++ b/examples/Hdiv-mixed/problems/darcy2d.c
@@ -0,0 +1,107 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Utility functions for setting up Darcy problem in 2D
+
+#include "../include/register-problem.h"
+#include "../qfunctions/darcy-error2d.h"
+#include "../qfunctions/darcy-system-quartic2d.h"
+#include "../qfunctions/darcy-system2d.h"
+#include "../qfunctions/darcy-true-quartic2d.h"
+#include "../qfunctions/darcy-true2d.h"
+#include "../qfunctions/post-processing2d.h"
+// #include "../qfunctions/pressure-boundary2d.h"
+
+PetscErrorCode Hdiv_DARCY2D(Ceed ceed, ProblemData problem_data, DM dm, void *ctx) {
+  AppCtx               app_ctx = *(AppCtx *)ctx;
+  DARCYContext         darcy_ctx;
+  CeedQFunctionContext darcy_context;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(PetscCalloc1(1, &darcy_ctx));
+
+  // ------------------------------------------------------
+  //               SET UP POISSON_QUAD2D
+  // ------------------------------------------------------
+  problem_data->dim               = 2;
+  problem_data->elem_node         = 4;
+  problem_data->q_data_size_face  = 3;
+  problem_data->quadrature_mode   = CEED_GAUSS;
+  problem_data->true_solution     = DarcyTrue2D;
+  problem_data->true_solution_loc = DarcyTrue2D_loc;
+  problem_data->residual          = DarcySystem2D;
+  problem_data->residual_loc      = DarcySystem2D_loc;
+  problem_data->jacobian          = JacobianDarcySystem2D;
+  problem_data->jacobian_loc      = JacobianDarcySystem2D_loc;
+  problem_data->error             = DarcyError2D;
+  problem_data->error_loc         = DarcyError2D_loc;
+  // problem_data->bc_pressure             = BCPressure2D;
+  // problem_data->bc_pressure_loc         = BCPressure2D_loc;
+  problem_data->post_rhs      = PostProcessingRhs2D;
+  problem_data->post_rhs_loc  = PostProcessingRhs2D_loc;
+  problem_data->post_mass     = PostProcessingMass2D;
+  problem_data->post_mass_loc = PostProcessingMass2D_loc;
+  problem_data->has_ts        = PETSC_FALSE;
+  problem_data->view_solution = app_ctx->view_solution;
+  problem_data->quartic       = app_ctx->quartic;
+
+  if (app_ctx->quartic) {
+    problem_data->true_solution     = DarcyTrueQuartic2D;
+    problem_data->true_solution_loc = DarcyTrueQuartic2D_loc;
+    problem_data->residual          = DarcySystemQuartic2D;
+    problem_data->residual_loc      = DarcySystemQuartic2D_loc;
+    problem_data->jacobian          = JacobianDarcySystemQuartic2D;
+    problem_data->jacobian_loc      = JacobianDarcySystemQuartic2D_loc;
+  }
+
+  // ------------------------------------------------------
+  //              Command line Options
+  // ------------------------------------------------------
+  CeedScalar kappa = 10., rho_a0 = 998.2, g = 9.8, alpha_a = 1., b_a = 10.;
+  PetscOptionsBegin(app_ctx->comm, NULL, "Options for Hdiv-mixed problem", NULL);
+  PetscCall(PetscOptionsScalar("-kappa", "Hydraulic Conductivity", NULL, kappa, &kappa, NULL));
+  PetscCall(PetscOptionsScalar("-rho_a0", "Density at p0", NULL, rho_a0, &rho_a0, NULL));
+  PetscCall(PetscOptionsScalar("-alpha_a", "Parameter for relative permeability", NULL, alpha_a, &alpha_a, NULL));
+  PetscCall(PetscOptionsScalar("-b_a", "Parameter for relative permeability", NULL, b_a, &b_a, NULL));
+  PetscOptionsEnd();
+
+  PetscReal domain_min[2], domain_max[2], domain_size[2];
+  PetscCall(DMGetBoundingBox(dm, domain_min, domain_max));
+  for (PetscInt i = 0; i < 2; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+  darcy_ctx->kappa   = kappa;
+  darcy_ctx->rho_a0  = rho_a0;
+  darcy_ctx->g       = g;
+  darcy_ctx->alpha_a = alpha_a;
+  darcy_ctx->b_a     = b_a;
+  darcy_ctx->lx      = domain_size[0];
+  darcy_ctx->ly      = domain_size[1];
+
+  CeedQFunctionContextCreate(ceed, &darcy_context);
+  CeedQFunctionContextSetData(darcy_context, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(*darcy_ctx), darcy_ctx);
+  // CeedQFunctionContextSetDataDestroy(darcy_context, CEED_MEM_HOST,
+  //                                    FreeContextPetsc);
+  problem_data->true_qfunction_ctx = darcy_context;
+  CeedQFunctionContextReferenceCopy(darcy_context, &problem_data->residual_qfunction_ctx);
+  CeedQFunctionContextReferenceCopy(darcy_context, &problem_data->jacobian_qfunction_ctx);
+  CeedQFunctionContextReferenceCopy(darcy_context, &problem_data->error_qfunction_ctx);
+
+  PetscCall(PetscFree(darcy_ctx));
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mixed/problems/darcy3d.c b/examples/Hdiv-mixed/problems/darcy3d.c
new file mode 100644
index 0000000000..52e101931c
--- /dev/null
+++ b/examples/Hdiv-mixed/problems/darcy3d.c
@@ -0,0 +1,95 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Utility functions for setting up Darcy problem in 3D
+
+#include "../include/register-problem.h"
+#include "../qfunctions/darcy-error3d.h"
+#include "../qfunctions/darcy-system3d.h"
+#include "../qfunctions/darcy-true3d.h"
+#include "../qfunctions/post-processing3d.h"
+// #include "../qfunctions/pressure-boundary3d.h"
+
+PetscErrorCode Hdiv_DARCY3D(Ceed ceed, ProblemData problem_data, DM dm, void *ctx) {
+  AppCtx               app_ctx = *(AppCtx *)ctx;
+  DARCYContext         darcy_ctx;
+  CeedQFunctionContext darcy_context;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(PetscCalloc1(1, &darcy_ctx));
+
+  // ------------------------------------------------------
+  //               SET UP POISSON_QUAD2D
+  // ------------------------------------------------------
+  problem_data->dim               = 3;
+  problem_data->elem_node         = 8;
+  problem_data->q_data_size_face  = 4;
+  problem_data->quadrature_mode   = CEED_GAUSS;
+  problem_data->true_solution     = DarcyTrue3D;
+  problem_data->true_solution_loc = DarcyTrue3D_loc;
+  problem_data->residual          = DarcySystem3D;
+  problem_data->residual_loc      = DarcySystem3D_loc;
+  problem_data->jacobian          = JacobianDarcySystem3D;
+  problem_data->jacobian_loc      = JacobianDarcySystem3D_loc;
+  problem_data->error             = DarcyError3D;
+  problem_data->error_loc         = DarcyError3D_loc;
+  // problem_data->bc_pressure             = BCPressure3D;
+  // problem_data->bc_pressure_loc         = BCPressure3D_loc;
+  problem_data->post_rhs      = PostProcessingRhs3D;
+  problem_data->post_rhs_loc  = PostProcessingRhs3D_loc;
+  problem_data->post_mass     = PostProcessingMass3D;
+  problem_data->post_mass_loc = PostProcessingMass3D_loc;
+  problem_data->has_ts        = PETSC_FALSE;
+  problem_data->view_solution = app_ctx->view_solution;
+  // ------------------------------------------------------
+  //              Command line Options
+  // ------------------------------------------------------
+  CeedScalar kappa = 1., rho_a0 = 998.2, g = 9.8, alpha_a = 1., b_a = 10.;
+  PetscOptionsBegin(app_ctx->comm, NULL, "Options for Hdiv-mixed problem", NULL);
+  PetscCall(PetscOptionsScalar("-kappa", "Hydraulic Conductivity", NULL, kappa, &kappa, NULL));
+  PetscCall(PetscOptionsScalar("-rho_a0", "Density at p0", NULL, rho_a0, &rho_a0, NULL));
+  PetscCall(PetscOptionsScalar("-alpha_a", "Parameter for relative permeability", NULL, alpha_a, &alpha_a, NULL));
+  PetscCall(PetscOptionsScalar("-b_a", "Parameter for relative permeability", NULL, b_a, &b_a, NULL));
+  PetscOptionsEnd();
+
+  PetscReal domain_min[3], domain_max[3], domain_size[3];
+  PetscCall(DMGetBoundingBox(dm, domain_min, domain_max));
+  for (PetscInt i = 0; i < 3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+  darcy_ctx->kappa   = kappa;
+  darcy_ctx->rho_a0  = rho_a0;
+  darcy_ctx->g       = g;
+  darcy_ctx->alpha_a = alpha_a;
+  darcy_ctx->b_a     = b_a;
+  darcy_ctx->lx      = domain_size[0];
+  darcy_ctx->ly      = domain_size[1];
+  darcy_ctx->lz      = domain_size[2];
+
+  CeedQFunctionContextCreate(ceed, &darcy_context);
+  CeedQFunctionContextSetData(darcy_context, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(*darcy_ctx), darcy_ctx);
+  // CeedQFunctionContextSetDataDestroy(darcy_context, CEED_MEM_HOST,
+  //                                    FreeContextPetsc);
+  problem_data->true_qfunction_ctx = darcy_context;
+  CeedQFunctionContextReferenceCopy(darcy_context, &problem_data->residual_qfunction_ctx);
+  CeedQFunctionContextReferenceCopy(darcy_context, &problem_data->jacobian_qfunction_ctx);
+  CeedQFunctionContextReferenceCopy(darcy_context, &problem_data->error_qfunction_ctx);
+
+  PetscCall(PetscFree(darcy_ctx));
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mixed/problems/register-problem.c b/examples/Hdiv-mixed/problems/register-problem.c
new file mode 100644
index 0000000000..994e4f4605
--- /dev/null
+++ b/examples/Hdiv-mixed/problems/register-problem.c
@@ -0,0 +1,42 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Command line option processing for H(div) example using PETSc
+
+#include "../include/register-problem.h"
+
+// Register problems to be available on the command line
+PetscErrorCode RegisterProblems_Hdiv(AppCtx app_ctx) {
+  app_ctx->problems = NULL;
+  PetscFunctionBeginUser;
+  // 1) darcy2d (Hdiv_DARCY2D is created in darcy2d.c)
+  PetscCall(PetscFunctionListAdd(&app_ctx->problems, "darcy2d", Hdiv_DARCY2D));
+  // 2) darcy3d (Hdiv_DARCY3D is created in dacry3d.c)
+  PetscCall(PetscFunctionListAdd(&app_ctx->problems, "darcy3d", Hdiv_DARCY3D));
+  // 3) darcy3d-prism
+
+  // 4) richard
+  PetscCall(PetscFunctionListAdd(&app_ctx->problems, "richard2d", Hdiv_RICHARD2D));
+  PetscCall(PetscFunctionListAdd(&app_ctx->problems, "richard3d", Hdiv_RICHARD3D));
+  PetscFunctionReturn(0);
+}
+
+// Free a plain data context that was allocated using PETSc; returning libCEED error codes
+int FreeContextPetsc(void *data) {
+  if (PetscFree(data)) return CeedError(NULL, CEED_ERROR_ACCESS, "PetscFree failed");
+  return CEED_ERROR_SUCCESS;
+}
\ No newline at end of file
diff --git a/examples/Hdiv-mixed/problems/richard2d.c b/examples/Hdiv-mixed/problems/richard2d.c
new file mode 100644
index 0000000000..27b88300f0
--- /dev/null
+++ b/examples/Hdiv-mixed/problems/richard2d.c
@@ -0,0 +1,116 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Utility functions for setting up Richard problem in 2D
+
+#include "../include/register-problem.h"
+#include "../qfunctions/darcy-error2d.h"
+#include "../qfunctions/post-processing2d.h"
+#include "../qfunctions/richard-ics2d.h"
+#include "../qfunctions/richard-system2d.h"
+#include "../qfunctions/richard-true2d.h"
+// #include "../qfunctions/pressure-boundary2d.h"
+#include "petscsystypes.h"
+
+PetscErrorCode Hdiv_RICHARD2D(Ceed ceed, ProblemData problem_data, DM dm, void *ctx) {
+  AppCtx               app_ctx = *(AppCtx *)ctx;
+  RICHARDContext       richard_ctx;
+  CeedQFunctionContext richard_context;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(PetscCalloc1(1, &richard_ctx));
+
+  // ------------------------------------------------------
+  //               SET UP POISSON_QUAD2D
+  // ------------------------------------------------------
+  problem_data->dim               = 2;
+  problem_data->elem_node         = 4;
+  problem_data->q_data_size_face  = 3;
+  problem_data->quadrature_mode   = CEED_GAUSS;
+  problem_data->true_solution     = RichardTrue2D;
+  problem_data->true_solution_loc = RichardTrue2D_loc;
+  problem_data->rhs_u0            = RichardRhsU02D;
+  problem_data->rhs_u0_loc        = RichardRhsU02D_loc;
+  problem_data->ics_u             = RichardICsU2D;
+  problem_data->ics_u_loc         = RichardICsU2D_loc;
+  problem_data->rhs_p0            = RichardRhsP02D;
+  problem_data->rhs_p0_loc        = RichardRhsP02D_loc;
+  problem_data->ics_p             = RichardICsP2D;
+  problem_data->ics_p_loc         = RichardICsP2D_loc;
+  problem_data->residual          = RichardSystem2D;
+  problem_data->residual_loc      = RichardSystem2D_loc;
+  // problem_data->jacobian                = JacobianRichardSystem2D;
+  // problem_data->jacobian_loc            = JacobianRichardSystem2D_loc;
+  problem_data->error     = DarcyError2D;
+  problem_data->error_loc = DarcyError2D_loc;
+  // problem_data->bc_pressure             = BCPressure2D;
+  // problem_data->bc_pressure_loc         = BCPressure2D_loc;
+  problem_data->post_rhs      = PostProcessingRhs2D;
+  problem_data->post_rhs_loc  = PostProcessingRhs2D_loc;
+  problem_data->post_mass     = PostProcessingMass2D;
+  problem_data->post_mass_loc = PostProcessingMass2D_loc;
+  problem_data->has_ts        = PETSC_TRUE;
+  problem_data->view_solution = app_ctx->view_solution;
+
+  // ------------------------------------------------------
+  //              Command line Options
+  // ------------------------------------------------------
+  CeedScalar kappa = 10., alpha_a = 1., b_a = 10., rho_a0 = 998.2, beta = 0., g = 9.8, p0 = 101325;
+
+  PetscOptionsBegin(app_ctx->comm, NULL, "Options for Hdiv-mixed problem", NULL);
+  PetscCall(PetscOptionsScalar("-kappa", "Hydraulic Conductivity", NULL, kappa, &kappa, NULL));
+  PetscCall(PetscOptionsScalar("-alpha_a", "Parameter for relative permeability", NULL, alpha_a, &alpha_a, NULL));
+  PetscCall(PetscOptionsScalar("-b_a", "Parameter for relative permeability", NULL, b_a, &b_a, NULL));
+  PetscCall(PetscOptionsScalar("-rho_a0", "Density at p0", NULL, rho_a0, &rho_a0, NULL));
+  PetscCall(PetscOptionsScalar("-beta", "Water compressibility", NULL, beta, &beta, NULL));
+  app_ctx->t_final = 0.5;
+  PetscCall(PetscOptionsScalar("-t_final", "End time", NULL, app_ctx->t_final, &app_ctx->t_final, NULL));
+  PetscOptionsEnd();
+
+  PetscReal domain_min[2], domain_max[2], domain_size[2];
+  PetscCall(DMGetBoundingBox(dm, domain_min, domain_max));
+  for (PetscInt i = 0; i < 2; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+  richard_ctx->kappa   = kappa;
+  richard_ctx->alpha_a = alpha_a;
+  richard_ctx->b_a     = b_a;
+  richard_ctx->rho_a0  = rho_a0;
+  richard_ctx->beta    = beta;
+  richard_ctx->g       = g;
+  richard_ctx->p0      = p0;
+  richard_ctx->gamma   = 5.;
+  richard_ctx->t       = 0.;
+  richard_ctx->t_final = app_ctx->t_final;
+  richard_ctx->lx      = domain_size[0];
+  richard_ctx->ly      = domain_size[1];
+
+  CeedQFunctionContextCreate(ceed, &richard_context);
+  CeedQFunctionContextSetData(richard_context, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(*richard_ctx), richard_ctx);
+  // CeedQFunctionContextSetDataDestroy(richard_context, CEED_MEM_HOST,
+  //                                    FreeContextPetsc);
+  CeedQFunctionContextRegisterDouble(richard_context, "time", offsetof(struct RICHARDContext_, t), 1, "current solver time");
+  CeedQFunctionContextRegisterDouble(richard_context, "final_time", offsetof(struct RICHARDContext_, t_final), 1, "final time");
+  CeedQFunctionContextRegisterDouble(richard_context, "time_step", offsetof(struct RICHARDContext_, dt), 1, "time step");
+  problem_data->true_qfunction_ctx = richard_context;
+  CeedQFunctionContextReferenceCopy(richard_context, &problem_data->rhs_u0_qfunction_ctx);
+  CeedQFunctionContextReferenceCopy(richard_context, &problem_data->residual_qfunction_ctx);
+  CeedQFunctionContextReferenceCopy(richard_context, &problem_data->error_qfunction_ctx);
+  PetscCall(PetscFree(richard_ctx));
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mixed/problems/richard3d.c b/examples/Hdiv-mixed/problems/richard3d.c
new file mode 100644
index 0000000000..438723d934
--- /dev/null
+++ b/examples/Hdiv-mixed/problems/richard3d.c
@@ -0,0 +1,117 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Utility functions for setting up Richard problem in 3D
+
+#include "../include/register-problem.h"
+#include "../qfunctions/darcy-error3d.h"
+#include "../qfunctions/post-processing3d.h"
+#include "../qfunctions/richard-ics3d.h"
+#include "../qfunctions/richard-system3d.h"
+#include "../qfunctions/richard-true3d.h"
+// #include "../qfunctions/pressure-boundary2d.h"
+#include "petscsystypes.h"
+
+PetscErrorCode Hdiv_RICHARD3D(Ceed ceed, ProblemData problem_data, DM dm, void *ctx) {
+  AppCtx               app_ctx = *(AppCtx *)ctx;
+  RICHARDContext       richard_ctx;
+  CeedQFunctionContext richard_context;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(PetscCalloc1(1, &richard_ctx));
+
+  // ------------------------------------------------------
+  //               SET UP POISSON_QUAD2D
+  // ------------------------------------------------------
+  problem_data->dim               = 3;
+  problem_data->elem_node         = 8;
+  problem_data->q_data_size_face  = 4;
+  problem_data->quadrature_mode   = CEED_GAUSS;
+  problem_data->true_solution     = RichardTrue3D;
+  problem_data->true_solution_loc = RichardTrue3D_loc;
+  problem_data->rhs_u0            = RichardRhsU03D;
+  problem_data->rhs_u0_loc        = RichardRhsU03D_loc;
+  problem_data->ics_u             = RichardICsU3D;
+  problem_data->ics_u_loc         = RichardICsU3D_loc;
+  problem_data->rhs_p0            = RichardRhsP03D;
+  problem_data->rhs_p0_loc        = RichardRhsP03D_loc;
+  problem_data->ics_p             = RichardICsP3D;
+  problem_data->ics_p_loc         = RichardICsP3D_loc;
+  problem_data->residual          = RichardSystem3D;
+  problem_data->residual_loc      = RichardSystem3D_loc;
+  // problem_data->jacobian                = JacobianRichardSystem2D;
+  // problem_data->jacobian_loc            = JacobianRichardSystem2D_loc;
+  problem_data->error     = DarcyError3D;
+  problem_data->error_loc = DarcyError3D_loc;
+  // problem_data->bc_pressure             = BCPressure2D;
+  // problem_data->bc_pressure_loc         = BCPressure2D_loc;
+  problem_data->post_rhs      = PostProcessingRhs3D;
+  problem_data->post_rhs_loc  = PostProcessingRhs3D_loc;
+  problem_data->post_mass     = PostProcessingMass3D;
+  problem_data->post_mass_loc = PostProcessingMass3D_loc;
+  problem_data->has_ts        = PETSC_TRUE;
+  problem_data->view_solution = app_ctx->view_solution;
+
+  // ------------------------------------------------------
+  //              Command line Options
+  // ------------------------------------------------------
+  CeedScalar kappa = 10., alpha_a = 1., b_a = 10., rho_a0 = 998.2, beta = 0., g = 9.8, p0 = 101325;
+
+  PetscOptionsBegin(app_ctx->comm, NULL, "Options for Hdiv-mixed problem", NULL);
+  PetscCall(PetscOptionsScalar("-kappa", "Hydraulic Conductivity", NULL, kappa, &kappa, NULL));
+  PetscCall(PetscOptionsScalar("-alpha_a", "Parameter for relative permeability", NULL, alpha_a, &alpha_a, NULL));
+  PetscCall(PetscOptionsScalar("-b_a", "Parameter for relative permeability", NULL, b_a, &b_a, NULL));
+  PetscCall(PetscOptionsScalar("-rho_a0", "Density at p0", NULL, rho_a0, &rho_a0, NULL));
+  PetscCall(PetscOptionsScalar("-beta", "Water compressibility", NULL, beta, &beta, NULL));
+  app_ctx->t_final = 0.5;
+  PetscCall(PetscOptionsScalar("-t_final", "End time", NULL, app_ctx->t_final, &app_ctx->t_final, NULL));
+  PetscOptionsEnd();
+
+  PetscReal domain_min[3], domain_max[3], domain_size[3];
+  PetscCall(DMGetBoundingBox(dm, domain_min, domain_max));
+  for (PetscInt i = 0; i < 3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+
+  richard_ctx->kappa   = kappa;
+  richard_ctx->alpha_a = alpha_a;
+  richard_ctx->b_a     = b_a;
+  richard_ctx->rho_a0  = rho_a0;
+  richard_ctx->beta    = beta;
+  richard_ctx->g       = g;
+  richard_ctx->p0      = p0;
+  richard_ctx->gamma   = 5.;
+  richard_ctx->t       = 0.;
+  richard_ctx->t_final = app_ctx->t_final;
+  richard_ctx->lx      = domain_size[0];
+  richard_ctx->ly      = domain_size[1];
+  richard_ctx->lz      = domain_size[2];
+
+  CeedQFunctionContextCreate(ceed, &richard_context);
+  CeedQFunctionContextSetData(richard_context, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(*richard_ctx), richard_ctx);
+  // CeedQFunctionContextSetDataDestroy(richard_context, CEED_MEM_HOST,
+  //                                    FreeContextPetsc);
+  CeedQFunctionContextRegisterDouble(richard_context, "time", offsetof(struct RICHARDContext_, t), 1, "current solver time");
+  CeedQFunctionContextRegisterDouble(richard_context, "final_time", offsetof(struct RICHARDContext_, t_final), 1, "final time");
+  CeedQFunctionContextRegisterDouble(richard_context, "time_step", offsetof(struct RICHARDContext_, dt), 1, "time step");
+  problem_data->true_qfunction_ctx = richard_context;
+  CeedQFunctionContextReferenceCopy(richard_context, &problem_data->rhs_u0_qfunction_ctx);
+  CeedQFunctionContextReferenceCopy(richard_context, &problem_data->residual_qfunction_ctx);
+  CeedQFunctionContextReferenceCopy(richard_context, &problem_data->error_qfunction_ctx);
+  PetscCall(PetscFree(richard_ctx));
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mixed/qfunctions/darcy-error2d.h b/examples/Hdiv-mixed/qfunctions/darcy-error2d.h
new file mode 100644
index 0000000000..8e5c6de83f
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/darcy-error2d.h
@@ -0,0 +1,76 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Compute pointwise error of the H(div) example using PETSc
+
+#ifndef DARCY_ERROR2D_H
+#define DARCY_ERROR2D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// Compuet error
+// -----------------------------------------------------------------------------
+#ifndef DARCY_CTX
+#define DARCY_CTX
+typedef struct DARCYContext_ *DARCYContext;
+struct DARCYContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar lx, ly;
+};
+#endif
+CEED_QFUNCTION(DarcyError2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*p) = (const CeedScalar(*))in[3], (*true_soln) = in[4];
+  // Outputs
+  CeedScalar(*error) = out[0];
+  // Context
+  DARCYContext context = (DARCYContext)ctx;
+  // const CeedScalar kappa    = context->kappa;
+  const CeedScalar rho_a0 = context->rho_a0;
+  const CeedScalar g      = context->g;
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+    // Compute Piola map:uh = J*u/detJ
+    CeedScalar u1[2] = {u[0][i], u[1][i]}, uh[2];
+    AlphaMatVecMult2x2(1 / det_J, J, u1, uh);
+
+    // Error
+    CeedScalar psi   = p[i] / (rho_a0 * g);
+    error[i + 0 * Q] = (psi - true_soln[i + 0 * Q]) * (psi - true_soln[i + 0 * Q]) * w[i] * det_J;
+    error[i + 1 * Q] = (uh[0] - true_soln[i + 1 * Q]) * (uh[0] - true_soln[i + 1 * Q]) * w[i] * det_J;
+    error[i + 2 * Q] = (uh[1] - true_soln[i + 2 * Q]) * (uh[1] - true_soln[i + 2 * Q]) * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End DARCY_ERROR2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/darcy-error3d.h b/examples/Hdiv-mixed/qfunctions/darcy-error3d.h
new file mode 100644
index 0000000000..d0d82093ed
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/darcy-error3d.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Compute pointwise error of the H(div) example using PETSc
+
+#ifndef DARCY_ERROR3D_H
+#define DARCY_ERROR3D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// Compuet error
+// -----------------------------------------------------------------------------
+#ifndef DARCY_CTX
+#define DARCY_CTX
+typedef struct DARCYContext_ *DARCYContext;
+struct DARCYContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar lx, ly, lz;
+};
+#endif
+CEED_QFUNCTION(DarcyError3D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*p) = (const CeedScalar(*))in[3], (*true_soln) = in[4];
+  // Outputs
+  CeedScalar(*error) = out[0];
+  // Context
+  DARCYContext context = (DARCYContext)ctx;
+  // const CeedScalar    kappa   = context->kappa;
+  const CeedScalar rho_a0 = context->rho_a0;
+  const CeedScalar g      = context->g;
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+    // Compute Piola map:uh = J*u/detJ
+    CeedScalar u1[3] = {u[0][i], u[1][i], u[2][i]}, uh[3];
+    AlphaMatVecMult3x3(1 / det_J, J, u1, uh);
+
+    // Error
+    CeedScalar psi   = p[i] / (rho_a0 * g);
+    error[i + 0 * Q] = (psi - true_soln[i + 0 * Q]) * (psi - true_soln[i + 0 * Q]) * w[i] * det_J;
+    error[i + 1 * Q] = (uh[0] - true_soln[i + 1 * Q]) * (uh[0] - true_soln[i + 1 * Q]) * w[i] * det_J;
+    error[i + 2 * Q] = (uh[1] - true_soln[i + 2 * Q]) * (uh[1] - true_soln[i + 2 * Q]) * w[i] * det_J;
+    error[i + 3 * Q] = (uh[2] - true_soln[i + 3 * Q]) * (uh[2] - true_soln[i + 3 * Q]) * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End DARCY_ERROR3D_H
diff --git a/examples/Hdiv-mixed/qfunctions/darcy-system-quartic2d.h b/examples/Hdiv-mixed/qfunctions/darcy-system-quartic2d.h
new file mode 100644
index 0000000000..4f57c0ad91
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/darcy-system-quartic2d.h
@@ -0,0 +1,153 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Darcy problem 2D (quad element) using PETSc
+
+#ifndef DARCY_SYSTEM_QUARTIC2D_H
+#define DARCY_SYSTEM_QUARTIC2D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -grad(\psi)   in \Omega
+//  -\div(u) = -f                                   in \Omega
+//  p        = p_b                                  on \Gamma_D
+//  u.n      = u_b                                  on \Gamma_N
+//
+// Weak form: Find (u, \psi) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, u) -(\div(v), \psi) = -<v, p_b*n>_{\Gamma_D}
+// -(q, \div(u))  + (q, f)  = 0
+//
+// This QFunction setup the mixed form of the above equation
+// Inputs:
+//   w     : weight of quadrature
+//   J     : dx/dX. x physical coordinate, X reference coordinate [-1,1]^dim
+//   u     : basis_u at quadrature points
+// div(u)  : divergence of basis_u at quadrature points
+//   p     : basis_p at quadrature points
+//   f     : force vector created in true qfunction
+//
+// Output:
+//   v     : (v, K^{-1}/rho*k_r u) = \int (v^T * K^{-1}/rho*k_r*u detJ*w)dX ==> \int (v^T J^T * K^{-1}/rho*k_r *J*u*w/detJ)dX
+//           -(v, rho*g_u)     = \int (v^T * rho*g_u detJ*w)dX ==> \int (v^T J^T * rho*g_u*w) dX
+// div(v)  : -(\div(v), \psi) = -\int (div(v)^T * \psi *w) dX
+//   q     : -(q, \div(u)) = -\int (q^T * div(u) * w) dX
+//            (q, f)       = \int( q^T * f * w*detJ )dX
+// -----------------------------------------------------------------------------
+#ifndef DARCY_CTX
+#define DARCY_CTX
+typedef struct DARCYContext_ *DARCYContext;
+struct DARCYContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar lx, ly;
+};
+#endif
+// -----------------------------------------------------------------------------
+// Residual evaluation for Darcy problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(DarcySystemQuartic2D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*div_u) = (const CeedScalar(*))in[3], (*p) = (const CeedScalar(*))in[4],
+        (*f) = in[5];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*div_v) = (CeedScalar(*))out[1], (*q) = (CeedScalar(*))out[2];
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+
+    // (v, u): v = J^T*J*u*w/detJ
+    // 1) J^T*J
+    CeedScalar JT_J[2][2];
+    AlphaMatTransposeMatMult2x2(1, J, J, JT_J);
+
+    // 2) Compute v1 = (J^T*J)*u*w/detJ
+    CeedScalar u1[2] = {u[0][i], u[1][i]}, v1[2];
+    AlphaMatVecMult2x2(w[i] / det_J, JT_J, u1, v1);
+
+    // Output at quadrature points: (v, u)
+    for (CeedInt k = 0; k < 2; k++) {
+      v[k][i] = v1[k];
+    }
+    // Output at quadrature points: -(\div(v), \psi)
+    div_v[i] = -p[i] * w[i];
+    // Output at quadrature points:-(q, \div(u))  + (q, f)
+    q[i] = -div_u[i] * w[i] + f[i + 0 * Q] * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Jacobian evaluation for Darcy problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(JacobianDarcySystemQuartic2D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*du)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*div_du) = (const CeedScalar(*))in[3], (*dp) = (const CeedScalar(*))in[4];
+
+  // Outputs
+  CeedScalar(*dv)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*div_dv) = (CeedScalar(*))out[1], (*dq) = (CeedScalar(*))out[2];
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+
+    // 1) Compute J^T *J
+    CeedScalar JT_J[2][2];
+    AlphaMatTransposeMatMult2x2(1, J, J, JT_J);
+
+    // 2) Compute dv1 = J^T *J*du*w/detJ
+    CeedScalar du1[2] = {du[0][i], du[1][i]}, dv1[2];
+    AlphaMatVecMult2x2(w[i] / det_J, JT_J, du1, dv1);
+
+    // Output at quadrature points: (dv, K^{-1}/rho*k_r u) -(dv, rho*g_u)
+    for (CeedInt k = 0; k < 2; k++) {
+      dv[k][i] = dv1[k];
+    }
+    // Output at quadrature points: -(\div(dv), d\psi)
+    div_dv[i] = -dp[i] * w[i];
+    // Output at quadrature points:-(dq, \div(du))
+    dq[i] = -div_du[i] * w[i];
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+
+#endif  // End of DARCY_SYSTEM2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/darcy-system2d.h b/examples/Hdiv-mixed/qfunctions/darcy-system2d.h
new file mode 100644
index 0000000000..8d50f973e7
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/darcy-system2d.h
@@ -0,0 +1,221 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Darcy problem 2D (quad element) using PETSc
+
+#ifndef DARCY_SYSTEM2D_H
+#define DARCY_SYSTEM2D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "ceed/ceed-f64.h"
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -rho*k_r*K *[grad(\psi) - rho*g_u]   in \Omega
+//  -\div(u) = -f                                   in \Omega
+//  p        = p_b                                  on \Gamma_D
+//  u.n      = u_b                                  on \Gamma_N
+//
+//  Where rho = rho_a/rho_a0, rho_a = rho_a0*exp(\beta * (p - p0)), p0 = 101325 Pa is atmospheric pressure
+//  rho_a0 is the density at p_0, g_u = g/norm(g) where g is gravity.
+//  k_r = b_a + alpha_a * (\psi - x2), where \psi = p / (rho_a0 * norm(g)) and x2 is vertical axis
+//
+// Weak form: Find (u, \psi) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, K^{-1}/rho*k_r * u) -(v, rho*g_u) -(\div(v), \psi) = -<v, p_b*n>_{\Gamma_D}
+// -(q, \div(u))  + (q, f)                                 = 0
+//
+// We solve MMS for  K = kappa*I and beta=0 ==> rho=1 and \theta = alpha_a*\psi, so
+//
+// This QFunction setup the mixed form of the above equation
+// Inputs:
+//   w     : weight of quadrature
+//   J     : dx/dX. x physical coordinate, X reference coordinate [-1,1]^dim
+//   u     : basis_u at quadrature points
+// div(u)  : divergence of basis_u at quadrature points
+//   p     : basis_p at quadrature points
+//   f     : force vector created in true qfunction
+//
+// Output:
+//   v     : (v, K^{-1}/rho*k_r u) = \int (v^T * K^{-1}/rho*k_r*u detJ*w)dX ==> \int (v^T J^T * K^{-1}/rho*k_r *J*u*w/detJ)dX
+//           -(v, rho*g_u)     = \int (v^T * rho*g_u detJ*w)dX ==> \int (v^T J^T * rho*g_u*w) dX
+// div(v)  : -(\div(v), \psi) = -\int (div(v)^T * \psi *w) dX
+//   q     : -(q, \div(u)) = -\int (q^T * div(u) * w) dX
+//            (q, f)       = \int( q^T * f * w*detJ )dX
+// -----------------------------------------------------------------------------
+#ifndef DARCY_CTX
+#define DARCY_CTX
+typedef struct DARCYContext_ *DARCYContext;
+struct DARCYContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar lx, ly;
+};
+#endif
+// -----------------------------------------------------------------------------
+// Residual evaluation for Darcy problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(DarcySystem2D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*div_u) = (const CeedScalar(*))in[3], (*p) = (const CeedScalar(*))in[4],
+        (*f) = in[5], (*coords) = in[6];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*div_v) = (CeedScalar(*))out[1], (*q) = (CeedScalar(*))out[2];
+  // Context
+  DARCYContext     context = (DARCYContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar rho_a0  = context->rho_a0;
+  const CeedScalar g       = context->g;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q];
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+
+    // k_r = b_a + alpha_a * (\psi - x2)
+    CeedScalar k_r = b_a + alpha_a * (1 - x * y);
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.0;
+    // (v, K^{-1}/rho*k_r u): v = J^T* (K^{-1}/rho*k_r) *J*u*w/detJ
+    // 1) Compute K^{-1}, note K = kappa*I
+    CeedScalar K[2][2] = {
+        {kappa, 0.   },
+        {0.,    kappa}
+    };
+    const CeedScalar det_K = MatDet2x2(K);
+    CeedScalar       K_inv[2][2];
+    MatInverse2x2(K, det_K, K_inv);
+
+    // 2) (K^{-1}/rho*k_r) *J
+    CeedScalar Kinv_J[2][2];
+    AlphaMatMatMult2x2(1 / (rho * k_r), K_inv, J, Kinv_J);
+
+    // 3) Compute J^T* (K^{-1}/rho*k_r) *J
+    CeedScalar JT_Kinv_J[2][2];
+    AlphaMatTransposeMatMult2x2(1, J, Kinv_J, JT_Kinv_J);
+
+    // 4) Compute v1 = J^T* (K^{-1}/rho*k_r) *J*u*w/detJ
+    CeedScalar u1[2] = {u[0][i], u[1][i]}, v1[2];
+    AlphaMatVecMult2x2(w[i] / det_J, JT_Kinv_J, u1, v1);
+
+    // 5) -(v, rho*g_u): v2 = -J^T*rho*g_u*w, g_u = g/norm(g)
+    CeedScalar g_u[2] = {0., 1.}, v2[2];
+    AlphaMatTransposeVecMult2x2(-rho * w[i], J, g_u, v2);
+
+    // Output at quadrature points: (v, K^{-1}/rho*k_r u) -(v, rho*g_u)
+    for (CeedInt k = 0; k < 2; k++) {
+      v[k][i] = v1[k] + v2[k];
+    }
+    // Output at quadrature points: -(\div(v), \psi)
+    CeedScalar psi = p[i] / (rho_a0 * g);
+    div_v[i]       = -psi * w[i];
+    // Output at quadrature points:-(q, \div(u))  + (q, f)
+    q[i] = -div_u[i] * w[i] + f[i + 0 * Q] * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Jacobian evaluation for Darcy problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(JacobianDarcySystem2D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*du)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*div_du) = (const CeedScalar(*))in[3], (*dp) = (const CeedScalar(*))in[4],
+        (*coords) = in[5];
+
+  // Outputs
+  CeedScalar(*dv)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*div_dv) = (CeedScalar(*))out[1], (*dq) = (CeedScalar(*))out[2];
+
+  DARCYContext     context = (DARCYContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar rho_a0  = context->rho_a0;
+  const CeedScalar g       = context->g;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q];
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+
+    // k_r = b_a + alpha_a * (\psi - x2)
+    CeedScalar k_r = b_a + alpha_a * (1 - x * y);
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.0;
+    // (dv, K^{-1}/rho*k_r du): dv = J^T* (K^{-1}/rho*k_r) *J*du*w/detJ
+    // 1) Compute K^{-1}, note K = kappa*I
+    CeedScalar K[2][2] = {
+        {kappa, 0.   },
+        {0.,    kappa}
+    };
+    const CeedScalar det_K = MatDet2x2(K);
+    CeedScalar       K_inv[2][2];
+    MatInverse2x2(K, det_K, K_inv);
+
+    // 2) (K^{-1}/rho*k_r) *J
+    CeedScalar Kinv_J[2][2];
+    AlphaMatMatMult2x2(1 / (rho * k_r), K_inv, J, Kinv_J);
+
+    // 3) Compute J^T* (K^{-1}/rho*k_r) *J
+    CeedScalar JT_Kinv_J[2][2];
+    AlphaMatTransposeMatMult2x2(1, J, Kinv_J, JT_Kinv_J);
+
+    // 4) Compute dv1 = J^T* (K^{-1}/rho*k_r) *J*du*w/detJ
+    CeedScalar du1[2] = {du[0][i], du[1][i]}, dv1[2];
+    AlphaMatVecMult2x2(w[i] / det_J, JT_Kinv_J, du1, dv1);
+
+    // 5) -(dv, rho*g_u): dv2 = 0
+
+    // Output at quadrature points: (dv, K^{-1}/rho*k_r u) -(dv, rho*g_u)
+    for (CeedInt k = 0; k < 2; k++) {
+      dv[k][i] = dv1[k];
+    }
+    // Output at quadrature points: -(\div(dv), d\psi)
+    CeedScalar dpsi = dp[i] / (rho_a0 * g);
+    div_dv[i]       = -dpsi * w[i];
+    // Output at quadrature points:-(dq, \div(du))
+    dq[i] = -div_du[i] * w[i];
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+
+#endif  // End of DARCY_SYSTEM2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/darcy-system3d.h b/examples/Hdiv-mixed/qfunctions/darcy-system3d.h
new file mode 100644
index 0000000000..840eb22946
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/darcy-system3d.h
@@ -0,0 +1,225 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Darcy problem 3D (hex element) using PETSc
+
+#ifndef DARCY_SYSTEM3D_H
+#define DARCY_SYSTEM3D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -rho*k_r*K *[grad(\psi) - rho*g_u]   in \Omega
+//  -\div(u) = -f                                   in \Omega
+//  p        = p_b                                  on \Gamma_D
+//  u.n      = u_b                                  on \Gamma_N
+//
+//  Where rho = rho_a/rho_a0, rho_a = rho_a0*exp(\beta * (p - p0)), p0 = 101325 Pa is atmospheric pressure
+//  rho_a0 is the density at p_0, g_u = g/norm(g) where g is gravity.
+//  k_r = b_a + alpha_a * (\psi - x2), where \psi = p / (rho_a0 * norm(g)) and x2 is vertical axis
+//
+// Weak form: Find (u, \psi) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, K^{-1}/rho*k_r * u) -(v, rho*g_u) -(\div(v), \psi) = -<v, p_b*n>_{\Gamma_D}
+// -(q, \div(u))  + (q, f)                                 = 0
+//
+// We solve MMS for  K = kappa*I and beta=0 ==> rho=1
+//
+// This QFunction setup the mixed form of the above equation
+// Inputs:
+//   w     : weight of quadrature
+//   J     : dx/dX. x physical coordinate, X reference coordinate [-1,1]^dim
+//   u     : basis_u at quadrature points
+// div(u)  : divergence of basis_u at quadrature points
+//   p     : basis_p at quadrature points
+//   f     : force vector created in true qfunction
+//
+// Output:
+//   v     : (v, K^{-1}/rho*k_r u) = \int (v^T * K^{-1}/rho*k_r*u detJ*w)dX ==> \int (v^T J^T * K^{-1}/rho*k_r *J*u*w/detJ)dX
+//           -(v, rho*g_u)     = \int (v^T * rho*g_u detJ*w)dX ==> \int (v^T J^T * rho*g_u*w) dX
+// div(v)  : -(\div(v), \psi) = -\int (div(v)^T * \psi *w) dX
+//   q     : -(q, \div(u)) = -\int (q^T * div(u) * w) dX
+//            (q, f)       = \int( q^T * f * w*detJ )dX
+// -----------------------------------------------------------------------------
+#ifndef DARCY_CTX
+#define DARCY_CTX
+typedef struct DARCYContext_ *DARCYContext;
+struct DARCYContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar lx, ly, lz;
+};
+#endif
+// -----------------------------------------------------------------------------
+// Residual evaluation for Darcy problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(DarcySystem3D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*div_u) = (const CeedScalar(*))in[3], (*p) = (const CeedScalar(*))in[4],
+        (*f) = in[5], (*coords) = in[6];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*div_v) = (CeedScalar(*))out[1], (*q) = (CeedScalar(*))out[2];
+  // Context
+  DARCYContext     context = (DARCYContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar rho_a0  = context->rho_a0;
+  const CeedScalar g       = context->g;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q], z = coords[i + 2 * Q];
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+
+    // k_r = b_a + alpha_a * (\psi - x2)
+    CeedScalar k_r = b_a + alpha_a * (1 - x * y * z);
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.0;
+    // (v, K^{-1}/rho*k_r u): v = J^T* (K^{-1}/rho*k_r) *J*u*w/detJ
+    // 1) Compute K^{-1}, note K = kappa*I
+    CeedScalar K[3][3] = {
+        {kappa, 0.,    0.   },
+        {0.,    kappa, 0.   },
+        {0.,    0.,    kappa}
+    };
+    const CeedScalar det_K = MatDet3x3(K);
+    CeedScalar       K_inv[3][3];
+    MatInverse3x3(K, det_K, K_inv);
+
+    // 2) (K^{-1}/rho*k_r) *J
+    CeedScalar Kinv_J[3][3];
+    AlphaMatMatMult3x3(1 / (rho * k_r), K_inv, J, Kinv_J);
+
+    // 3) Compute J^T* (K^{-1}/rho*k_r) *J
+    CeedScalar JT_Kinv_J[3][3];
+    AlphaMatTransposeMatMult3x3(1, J, Kinv_J, JT_Kinv_J);
+
+    // 4) Compute v1 = J^T* (K^{-1}/rho*k_r) *J*u*w/detJ
+    CeedScalar u1[3] = {u[0][i], u[1][i], u[2][i]}, v1[3];
+    AlphaMatVecMult3x3(w[i] / det_J, JT_Kinv_J, u1, v1);
+
+    // 5) -(v, rho*g_u): v2 = -J^T*rho*g_u*w, g_u = g/norm(g)
+    CeedScalar g_u[3] = {0., 0., 1.}, v2[3];
+    AlphaMatTransposeVecMult3x3(-rho * w[i], J, g_u, v2);
+
+    // Output at quadrature points: (v, K^{-1}/rho*k_r u) -(v, rho*g_u)
+    for (CeedInt k = 0; k < 3; k++) {
+      v[k][i] = v1[k] + v2[k];
+    }
+    // Output at quadrature points: -(\div(v), \psi)
+    CeedScalar psi = p[i] / (rho_a0 * g);
+    div_v[i]       = -psi * w[i];
+    // Output at quadrature points:-(q, \div(u))  + (q, f)
+    q[i] = -div_u[i] * w[i] + f[i + 0 * Q] * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Jacobian evaluation for Darcy problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(JacobianDarcySystem3D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[1],
+        (*du)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*div_du) = (const CeedScalar(*))in[3], (*dp) = (const CeedScalar(*))in[4],
+        (*coords) = in[5];
+
+  // Outputs
+  CeedScalar(*dv)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*div_dv) = (CeedScalar(*))out[1], (*dq) = (CeedScalar(*))out[2];
+  // Context
+  DARCYContext     context = (DARCYContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar rho_a0  = context->rho_a0;
+  const CeedScalar g       = context->g;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q], z = coords[i + 2 * Q];
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+
+    // k_r = b_a + alpha_a * (\psi - x2)
+    CeedScalar k_r = b_a + alpha_a * (1 - x * y * z);
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.0;
+    // (dv, K^{-1}/rho*k_r du): dv = J^T* (K^{-1}/rho*k_r) *J*du*w/detJ
+    // 1) Compute K^{-1}, note K = kappa*I
+    CeedScalar K[3][3] = {
+        {kappa, 0.,    0.   },
+        {0.,    kappa, 0.   },
+        {0.,    0.,    kappa}
+    };
+    const CeedScalar det_K = MatDet3x3(K);
+    CeedScalar       K_inv[3][3];
+    MatInverse3x3(K, det_K, K_inv);
+
+    // 2) (K^{-1}/rho*k_r) *J
+    CeedScalar Kinv_J[3][3];
+    AlphaMatMatMult3x3(1 / (rho * k_r), K_inv, J, Kinv_J);
+
+    // 3) Compute J^T* (K^{-1}/rho*k_r) *J
+    CeedScalar JT_Kinv_J[3][3];
+    AlphaMatTransposeMatMult3x3(1, J, Kinv_J, JT_Kinv_J);
+
+    // 4) Compute dv1 = J^T* (K^{-1}/rho*k_r) *J*du*w/detJ
+    CeedScalar du1[3] = {du[0][i], du[1][i], du[2][i]}, dv1[3];
+    AlphaMatVecMult3x3(w[i] / det_J, JT_Kinv_J, du1, dv1);
+
+    // 5) -(dv, rho*g_u): dv2 = 0
+
+    // Output at quadrature points: (dv, K^{-1}/rho*k_r u) -(dv, rho*g_u)
+    for (CeedInt k = 0; k < 3; k++) {
+      dv[k][i] = dv1[k];
+    }
+    // Output at quadrature points: -(\div(dv), d\psi)
+    CeedScalar dpsi = dp[i] / (rho_a0 * g);
+    div_dv[i]       = -dpsi * w[i];
+    // Output at quadrature points:-(dq, \div(du))
+    dq[i] = -div_du[i] * w[i];
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+
+#endif  // End of DARCY_SYSTEM3D_H
diff --git a/examples/Hdiv-mixed/qfunctions/darcy-true-quartic2d.h b/examples/Hdiv-mixed/qfunctions/darcy-true-quartic2d.h
new file mode 100644
index 0000000000..81e154d510
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/darcy-true-quartic2d.h
@@ -0,0 +1,95 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Compute true solution of the H(div) example using PETSc
+
+#ifndef DARCY_TRUE_QUARTIC2D_H
+#define DARCY_TRUE_QUARTIC2D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u       = -\grad(psi)  on \Omega
+//  \div(u) = f              on \Omega
+//  p = p0                   on \Gamma_D
+//  u.n = g                  on \Gamma_N
+// Weak form: Find (u,p) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, u) - (\div(v), psi) = -<v, p0 n>_{\Gamma_D}
+// -(q, \div(u))            = -(q, f)
+//
+// This QFunction setup the true solution and forcing f of the above equation
+// Inputs:
+//   coords: physical coordinate
+//
+// Output:
+//   true_force     : = div(u)
+//   true_solution  : = [\psi, u] where \psi, u are the exact solution solution
+// -----------------------------------------------------------------------------
+#ifndef DARCY_CTX
+#define DARCY_CTX
+typedef struct DARCYContext_ *DARCYContext;
+struct DARCYContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar lx, ly;
+};
+#endif
+CEED_QFUNCTION(DarcyTrueQuartic2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*coords) = in[0];
+  // Outputs
+  CeedScalar(*true_force) = out[0], (*true_solution) = out[1];
+  // Context
+  DARCYContext     context = (DARCYContext)ctx;
+  const CeedScalar lx      = context->lx;
+  const CeedScalar ly      = context->ly;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar x = coords[i + 0 * Q], y = coords[i + 1 * Q];
+    CeedScalar psi    = x * (lx - x) * y * (ly - y);
+    CeedScalar psi_x  = (lx - 2 * x) * y * (ly - y);
+    CeedScalar psi_xx = -2 * y * (ly - y);
+    CeedScalar psi_y  = x * (lx - x) * (ly - 2 * y);
+    CeedScalar psi_yy = -2 * x * (lx - x);
+
+    // ue = -grad(\psi)
+    CeedScalar ue[2] = {-psi_x, -psi_y};
+    // f = \div(u)
+    CeedScalar div_u = -psi_xx - psi_yy;
+    // True Force: f = \div(u)
+    true_force[i + 0 * Q] = div_u;
+    // True Solution
+    true_solution[i + 0 * Q] = psi;
+    true_solution[i + 1 * Q] = ue[0];
+    true_solution[i + 2 * Q] = ue[1];
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End DARCY_TRUE_QUARTIC2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/darcy-true2d.h b/examples/Hdiv-mixed/qfunctions/darcy-true2d.h
new file mode 100644
index 0000000000..b9ae00777a
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/darcy-true2d.h
@@ -0,0 +1,105 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Compute true solution of the H(div) example using PETSc
+
+#ifndef DARCY_TRUE2D_H
+#define DARCY_TRUE2D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -rho*k_r*K *[grad(\psi) - rho*g_u]   in \Omega
+//  -\div(u) = -f                                   in \Omega
+//  p        = p_b                                  on \Gamma_D
+//  u.n      = u_b                                  on \Gamma_N
+//
+//  Where rho = rho_a/rho_a0, rho_a = rho_a0*exp(\beta * (p - p0)), p0 = 101325 Pa is atmospheric pressure
+//  rho_a0 is the density at p_0, g_u = g/norm(g) where g is gravity.
+//  k_r = b_a + alpha_a * (\psi - x2), where \psi = p / (rho_a0 * norm(g)) and x2 is vertical axis
+//
+// We solve MMS for  K = kappa*I and beta=0 ==> rho=1 and \theta = alpha_a*\psi, so
+//
+// This QFunction setup the true solution and forcing f of the above equation
+// Inputs:
+//   coords: physical coordinate
+//
+// Output:
+//   true_force     : = div(u)
+//   true_solution  : = [\psi, u] where \psi, u are the exact solution solution
+// -----------------------------------------------------------------------------
+#ifndef DARCY_CTX
+#define DARCY_CTX
+typedef struct DARCYContext_ *DARCYContext;
+struct DARCYContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar lx, ly;
+};
+#endif
+CEED_QFUNCTION(DarcyTrue2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*coords) = in[0];
+  // Outputs
+  CeedScalar(*true_force) = out[0], (*true_solution) = out[1];
+  // Context
+  DARCYContext     context = (DARCYContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+  const CeedScalar lx      = context->lx;
+  const CeedScalar ly      = context->ly;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar x = coords[i + 0 * Q], y = coords[i + 1 * Q];
+    CeedScalar psi    = sin(PI_DOUBLE * x / lx) * sin(PI_DOUBLE * y / ly);
+    CeedScalar psi_x  = (PI_DOUBLE / lx) * cos(PI_DOUBLE * x / lx) * sin(PI_DOUBLE * y / ly);
+    CeedScalar psi_xx = -(PI_DOUBLE / lx) * (PI_DOUBLE / lx) * psi;
+    CeedScalar psi_y  = (PI_DOUBLE / ly) * sin(PI_DOUBLE * x / lx) * cos(PI_DOUBLE * y / ly);
+    CeedScalar psi_yy = -(PI_DOUBLE / ly) * (PI_DOUBLE / ly) * psi;
+    // k_r = b_a + alpha_a * (1 - x*y)
+    CeedScalar k_r  = b_a + alpha_a * (1 - x * y);
+    CeedScalar k_rx = -alpha_a * y;
+    CeedScalar k_ry = -alpha_a * x;
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.;
+    // u = -rho*k_r*K *[grad(\psi) - rho*g_u]
+    CeedScalar u[2]  = {-rho * kappa * k_r * psi_x, -rho * kappa * k_r * (psi_y - 1)};
+    CeedScalar div_u = -rho * kappa * (k_rx * psi_x + k_r * psi_xx + k_ry * (psi_y - 1) + k_r * psi_yy);
+    // True Force: f = \div(u)
+    true_force[i + 0 * Q] = div_u;
+    // True Solution
+    true_solution[i + 0 * Q] = psi;
+    true_solution[i + 1 * Q] = u[0];
+    true_solution[i + 2 * Q] = u[1];
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End DARCY_TRUE2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/darcy-true3d.h b/examples/Hdiv-mixed/qfunctions/darcy-true3d.h
new file mode 100644
index 0000000000..7206c636ac
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/darcy-true3d.h
@@ -0,0 +1,110 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Compute true solution of the H(div) example using PETSc
+
+#ifndef DARCY_TRUE3D_H
+#define DARCY_TRUE3D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -rho*k_r*K *[grad(\psi) - rho*g_u]   in \Omega
+//  -\div(u) = -f                                   in \Omega
+//  p        = p_b                                  on \Gamma_D
+//  u.n      = u_b                                  on \Gamma_N
+//
+//  Where rho = rho_a/rho_a0, rho_a = rho_a0*exp(\beta * (p - p0)), p0 = 101325 Pa is atmospheric pressure
+//  rho_a0 is the density at p_0, g_u = g/norm(g) where g is gravity.
+//  k_r = b_a + alpha_a * (\psi - x2), where \psi = p / (rho_a0 * norm(g)) and x2 is vertical axis
+//
+// We solve MMS for  K = kappa*I and beta=0 ==> rho=1
+//
+// This QFunction setup the true solution and forcing f of the above equation
+// Inputs:
+//   coords: physical coordinate
+//
+// Output:
+//   true_force     : = div(u)
+//   true_solution  : = [\psi, u] where \psi, u are the exact solution solution
+// -----------------------------------------------------------------------------
+#ifndef DARCY_CTX
+#define DARCY_CTX
+typedef struct DARCYContext_ *DARCYContext;
+struct DARCYContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar lx, ly, lz;
+};
+#endif
+CEED_QFUNCTION(DarcyTrue3D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*coords) = in[0];
+  // Outputs
+  CeedScalar(*true_force) = out[0], (*true_soln) = out[1];
+  // Context
+  DARCYContext     context = (DARCYContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+  const CeedScalar lx      = context->lx;
+  const CeedScalar ly      = context->ly;
+  const CeedScalar lz      = context->lz;
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar x = coords[i + 0 * Q], y = coords[i + 1 * Q], z = coords[i + 2 * Q];
+    CeedScalar psi    = sin(PI_DOUBLE * x / lx) * sin(PI_DOUBLE * y / ly) * sin(PI_DOUBLE * z / lz);
+    CeedScalar psi_x  = (PI_DOUBLE / lx) * cos(PI_DOUBLE * x / lx) * sin(PI_DOUBLE * y / ly) * sin(PI_DOUBLE * z / lz);
+    CeedScalar psi_xx = -(PI_DOUBLE / lx) * (PI_DOUBLE / lx) * psi;
+    CeedScalar psi_y  = (PI_DOUBLE / ly) * sin(PI_DOUBLE * x / lx) * cos(PI_DOUBLE * y / ly) * sin(PI_DOUBLE * z / lz);
+    CeedScalar psi_yy = -(PI_DOUBLE / ly) * (PI_DOUBLE / ly) * psi;
+    CeedScalar psi_z  = (PI_DOUBLE / lz) * sin(PI_DOUBLE * x / lx) * sin(PI_DOUBLE * y / ly) * cos(PI_DOUBLE * z / lz);
+    CeedScalar psi_zz = -(PI_DOUBLE / lz) * (PI_DOUBLE / lz) * psi;
+
+    // k_r = b_a + alpha_a * (psi - x2)
+    CeedScalar k_r  = b_a + alpha_a * (1 - x * y * z);
+    CeedScalar k_rx = -alpha_a * y * z;
+    CeedScalar k_ry = -alpha_a * x * z;
+    CeedScalar k_rz = -alpha_a * x * y;
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.;
+    // u = -rho*k_r*K *[grad(\psi) - rho*g_u]
+    CeedScalar u[3]  = {-rho * kappa * k_r * psi_x, -rho * kappa * k_r * psi_y, -rho * kappa * k_r * (psi_z - 1)};
+    CeedScalar div_u = -rho * kappa * (k_rx * psi_x + k_r * psi_xx + k_ry * psi_y + k_r * psi_yy + k_rz * (psi_z - 1) + k_r * psi_zz);
+
+    // True Force: f = \div(u)
+    true_force[i + 0 * Q] = div_u;
+    // True Solution
+    true_soln[i + 0 * Q] = psi;
+    true_soln[i + 1 * Q] = u[0];
+    true_soln[i + 2 * Q] = u[1];
+    true_soln[i + 3 * Q] = u[2];
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End of DARCY_TRUE3D_H
diff --git a/examples/Hdiv-mixed/qfunctions/post-processing2d.h b/examples/Hdiv-mixed/qfunctions/post-processing2d.h
new file mode 100644
index 0000000000..b3b0404541
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/post-processing2d.h
@@ -0,0 +1,89 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Force of Richard problem 2D (quad element) using PETSc
+
+#ifndef POST_PROCESSING2D_H
+#define POST_PROCESSING2D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "ceed/ceed-f64.h"
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// We solve (v, u) = (v, uh), to project Hdiv to L2 space
+// This QFunction create post_rhs = (v, uh)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(PostProcessingRhs2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar(*post_rhs) = out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+
+    // 1) Compute Piola map: uh = J*u/detJ
+    // 2) rhs = (v, uh) = uh*w*det_J ==> rhs = J*u*w
+    CeedScalar u1[2] = {u[0][i], u[1][i]}, rhs[2];
+    AlphaMatVecMult2x2(w[i], J, u1, rhs);
+
+    post_rhs[i + 0 * Q] = rhs[0];
+    post_rhs[i + 1 * Q] = rhs[1];
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// We solve (v, u) = (v, uh), to project Hdiv to L2 space
+// This QFunction create mass matrix (v, u), then we solve using ksp to have
+// projected uh in L2 space and use it for post-processing
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(PostProcessingMass2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+
+    // (v, u): v = u*w*detJ
+    for (CeedInt k = 0; k < 2; k++) {
+      v[k][i] = u[k][i] * w[i] * det_J;
+    }
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End of POST_PROCESSING2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/post-processing3d.h b/examples/Hdiv-mixed/qfunctions/post-processing3d.h
new file mode 100644
index 0000000000..0e2dcf9d33
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/post-processing3d.h
@@ -0,0 +1,92 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Force of Richard problem 2D (quad element) using PETSc
+
+#ifndef POST_PROCESSING3D_H
+#define POST_PROCESSING3D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "ceed/ceed-f64.h"
+#include "utils.h"
+
+// -----------------------------------------------------------------------------
+// We solve (v, u) = (v, uh), to project Hdiv to L2 space
+// This QFunction create post_rhs = (v, uh)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(PostProcessingRhs3D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar(*post_rhs) = out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+
+    // 1) Compute Piola map: uh = J*u/detJ
+    // 2) rhs = (v, uh) = uh*w*det_J ==> rhs = J*u*w
+    CeedScalar u1[3] = {u[0][i], u[1][i], u[2][i]}, rhs[3];
+    AlphaMatVecMult3x3(w[i], J, u1, rhs);
+
+    post_rhs[i + 0 * Q] = rhs[0];
+    post_rhs[i + 1 * Q] = rhs[1];
+    post_rhs[i + 2 * Q] = rhs[2];
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// We solve (v, u) = (v, uh), to project Hdiv to L2 space
+// This QFunction create mass matrix (v, u), then we solve using ksp to have
+// projected uh in L2 space and use it for post-processing
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(PostProcessingMass3D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+
+    // (v, u): v = u*w*detJ
+    for (CeedInt k = 0; k < 3; k++) {
+      v[k][i] = u[k][i] * w[i] * det_J;
+    }
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End of POST_PROCESSING3D_H
diff --git a/examples/Hdiv-mixed/qfunctions/pressure-boundary2d.h b/examples/Hdiv-mixed/qfunctions/pressure-boundary2d.h
new file mode 100644
index 0000000000..ed2b625439
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/pressure-boundary2d.h
@@ -0,0 +1,59 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Pressure boundary conditions 2D
+
+#ifndef pressure_bc_2d_h
+#define pressure_bc_2d_h
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u       = -\grad(p)      on \Omega
+//  \div(u) = f              on \Omega
+//  p = p0                   on \Gamma_D
+//  u.n = g                  on \Gamma_N
+// Weak form: Find (u,p) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, u) - (\div(v), p) = -<v, p0 n>_{\Gamma_D}
+// -(q, \div(u))          = -(q, f)
+// This QFunction sets up the pressure boundary conditions : -<v, p0 n>_{\Gamma_D}
+// Inputs:
+//   w     : weight of quadrature
+//   p0    : pressure value on the boundary
+//
+// Output:
+//   v     : p0 * N * w
+// Note that the Piola map of the H(div) basis and physical normal "n" got canceled
+// and we need to multiply by the reference normal "N" on each face
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(BCPressure2D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0];
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  // User context
+  CeedPragmaSIMD
+      // Quadrature Point Loop
+      for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar p0 = 10.;
+    for (CeedInt k = 0; k < 2; k++) {
+      v[k][i] += p0 * w[i];
+    }
+  }  // End of Quadrature Point Loop
+
+  // Return
+  return 0;
+}
+
+// *****************************************************************************
+
+#endif  // pressure_bc_2d_h
diff --git a/examples/Hdiv-mixed/qfunctions/pressure-boundary3d.h b/examples/Hdiv-mixed/qfunctions/pressure-boundary3d.h
new file mode 100644
index 0000000000..11f21675dc
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/pressure-boundary3d.h
@@ -0,0 +1,59 @@
+// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
+// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
+//
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// This file is part of CEED:  http://github.com/ceed
+
+/// @file
+/// Pressure boundary conditions 3D
+
+#ifndef pressure_bc_3d_h
+#define pressure_bc_3d_h
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u       = -\grad(p)      on \Omega
+//  \div(u) = f              on \Omega
+//  p = p0                   on \Gamma_D
+//  u.n = g                  on \Gamma_N
+// Weak form: Find (u,p) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, u) - (\div(v), p) = -<v, p0 n>_{\Gamma_D}
+// -(q, \div(u))          = -(q, f)
+// This QFunction sets up the pressure boundary conditions : -<v, p0 n>_{\Gamma_D}
+// Inputs:
+//   w     : weight of quadrature
+//   p0    : pressure value on the boundary
+//
+// Output:
+//   v     : p0 * N * w
+// Note that the Piola map of the H(div) basis and physical normal "n" got canceled
+// and we need to multiply by the reference normal "N" on each face
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(BCPressure3D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0];
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+  // *INDENT-OFF*
+
+  // User context
+  CeedPragmaSIMD
+      // Quadrature Point Loop
+      for (CeedInt i = 0; i < Q; i++) {
+    for (CeedInt k = 0; k < 3; k++) {
+      v[k][i] += 200. * w[i];
+    }
+  }  // End of Quadrature Point Loop
+
+  // Return
+  return 0;
+}
+
+// *****************************************************************************
+
+#endif  // pressure_bc_3d_h
diff --git a/examples/Hdiv-mixed/qfunctions/richard-ics2d.h b/examples/Hdiv-mixed/qfunctions/richard-ics2d.h
new file mode 100644
index 0000000000..9a084e9d44
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/richard-ics2d.h
@@ -0,0 +1,213 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Force of Richard problem 2D (quad element) using PETSc
+
+#ifndef RICHARD_ICS2D_H
+#define RICHARD_ICS2D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "ceed/ceed-f64.h"
+#include "utils.h"
+
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -rho*k_r*K *[grad(\psi) - rho*g_u]   in \Omega x [0,T]
+//  -\div(u) = -f  + d (rho*theta)/dt              in \Omega x [0,T]
+//  p        = p_b                                  on \Gamma_D x [0,T]
+//  u.n      = u_b                                  on \Gamma_N x [0,T]
+//  p        = p_0                                  in \Omega, t = 0
+//
+//  Where rho = rho_a/rho_a0, rho_a = rho_a0*exp(\beta * (p - p0)), p0 = 101325 Pa is atmospheric pressure
+//  rho_a0 is the density at p_0, g_u = g/norm(g) where g is gravity.
+//  k_r = b_a + alpha_a * (\psi - x2), where \psi = p / (rho_a0 * norm(g)) and x2 is vertical axis
+//
+// Weak form: Find (u, \psi) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, K^{-1}/rho*k_r * u) -(v, rho*g_u) -(\div(v), \psi) = -<v, p_b*n>_{\Gamma_D}
+// -(q, \div(u))  + (q, f) -(q, d (rho*\theta)/dt ) = 0
+//
+// We solve MMS for  K = kappa*I and beta=0 ==> rho=1 and \theta = alpha_a*\psi, so
+// -(q, d (rho*\theta)/dt ) = -alpha_a*(q, d(\psi)/dt )
+//
+// This QFunction setup the true solution and forcing f of the above equation
+// Inputs:
+//   coords: physical coordinate
+//
+// Output:
+//   true_force     : = div(u) + d (rho*theta)/dt
+//   true_solution  : = [\psi, u] where \psi, u are the exact solution solution
+// -----------------------------------------------------------------------------
+// We have 3 experiment parameters as described in Table 1:P1, P2, P3
+// Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+#ifndef RICHARD_CTX
+#define RICHARD_CTX
+typedef struct RICHARDContext_ *RICHARDContext;
+struct RICHARDContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar beta, p0;
+  CeedScalar t, t_final, dt;
+  CeedScalar gamma;
+  CeedScalar lx, ly;
+};
+#endif
+
+// -----------------------------------------------------------------------------
+// We solve (v, u) = (v, ue) at t=0, to project ue to Hdiv space
+// This QFunction create rhs_u0 = (v, ue)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardRhsU02D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*coords) = in[1], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar(*rhs_u0) = out[0];
+  // Context
+  RICHARDContext   context = (RICHARDContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q];
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    // psi = exp(-gamma*t)*sin(pi*x)*sin(pi*y)
+    CeedScalar psi1_x = PI_DOUBLE * cos(PI_DOUBLE * x) * sin(PI_DOUBLE * y);
+    CeedScalar psi1_y = PI_DOUBLE * sin(PI_DOUBLE * x) * cos(PI_DOUBLE * y);
+
+    // k_r = b_a + alpha_a * (1 - x*y)
+    CeedScalar k_r = b_a + alpha_a * (1 - x * y);
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.;
+    // ue = -rho*k_r*K *[grad(\psi)]
+    CeedScalar ue[2] = {-rho * k_r * kappa * psi1_x, -rho * k_r * kappa * psi1_y};
+    CeedScalar rhs1[2];
+    // rhs = (v, ue) = J^T*ue*w
+    AlphaMatTransposeVecMult2x2(w[i], J, ue, rhs1);
+    //
+    rhs_u0[i + 0 * Q] = rhs1[0];
+    rhs_u0[i + 1 * Q] = rhs1[1];
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// We solve (v, u) = (v, ue) at t=0, to project ue to Hdiv space
+// This QFunction create mass matrix (v, u), then we solve using ksp to have
+// projected ue in Hdiv space and use it for initial conditions
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardICsU2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+
+    // (v, u): v1 = J^T*J*u*w/detJ
+    // 1) Compute J^T *J
+    CeedScalar JT_J[2][2];
+    AlphaMatTransposeMatMult2x2(1, J, J, JT_J);
+
+    // 4) Compute v1 = J^T*J*u*w/detJ
+    CeedScalar u1[2] = {u[0][i], u[1][i]}, v1[2];
+    AlphaMatVecMult2x2(w[i] / det_J, JT_J, u1, v1);
+
+    // Output at quadrature points: (v, K^{-1}/rho*k_r u)
+    for (CeedInt k = 0; k < 2; k++) {
+      v[k][i] = v1[k];
+    }
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// We solve (q, p) = (q, pe) at t=0, to project pe to L2 space
+// This QFunction create rhs_p0 = (q, pe)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardRhsP02D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*coords) = in[1], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar(*rhs_p0) = out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q];
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+    // psi = exp(-gamma*t)*sin(pi*x)*sin(pi*y)
+    CeedScalar psi1 = sin(PI_DOUBLE * x) * sin(PI_DOUBLE * y);
+
+    // rhs = (q, pe) = pe*w*det_J
+    rhs_p0[i] = psi1 * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// We solve (q, p) = (q, pe) at t=0, to project pe to L2 space
+// This QFunction create mass matrix (q, p), then we solve using ksp to have
+// projected pe in L2 space and use it for initial conditions
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardICsP2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // *INDENT-OFF*
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1], (*p) = (const CeedScalar(*))in[2];
+
+  // Outputs
+  CeedScalar(*q) = (CeedScalar(*))out[0];
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // *INDENT-OFF*
+    // Setup, J = dx/dX
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+
+    // Output at quadrature points: (q, p)
+    q[i] = p[i] * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End of RICHARD_ICS2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/richard-ics3d.h b/examples/Hdiv-mixed/qfunctions/richard-ics3d.h
new file mode 100644
index 0000000000..a08640339f
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/richard-ics3d.h
@@ -0,0 +1,185 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Force of Richard problem 3D (quad element) using PETSc
+
+#ifndef RICHARD_ICS3D_H
+#define RICHARD_ICS3D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "ceed/ceed-f64.h"
+#include "utils.h"
+
+#ifndef RICHARD_CTX
+#define RICHARD_CTX
+typedef struct RICHARDContext_ *RICHARDContext;
+struct RICHARDContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar beta, p0;
+  CeedScalar t, t_final, dt;
+  CeedScalar gamma;
+  CeedScalar lx, ly, lz;
+};
+#endif
+
+// -----------------------------------------------------------------------------
+// We solve (v, u) = (v, ue) at t=0, to project ue to Hdiv space
+// This QFunction create rhs_u0 = (v, ue)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardRhsU03D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*coords) = in[1], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar(*rhs_u0) = out[0];
+  // Context
+  RICHARDContext   context = (RICHARDContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q], z = coords[i + 2 * Q];
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    // psi = exp(-gamma*t)*sin(pi*x)*sin(pi*y)
+    CeedScalar psi1_x = PI_DOUBLE * cos(PI_DOUBLE * x) * sin(PI_DOUBLE * y) * sin(PI_DOUBLE * z);
+    CeedScalar psi1_y = PI_DOUBLE * sin(PI_DOUBLE * x) * cos(PI_DOUBLE * y) * sin(PI_DOUBLE * z);
+    CeedScalar psi1_z = PI_DOUBLE * sin(PI_DOUBLE * x) * sin(PI_DOUBLE * y) * cos(PI_DOUBLE * z);
+
+    // k_r = b_a + alpha_a * (1 - x*y)
+    CeedScalar k_r = b_a + alpha_a * (1 - x * y * z);
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.;
+    // ue = -rho*k_r*K *[grad(\psi)]
+    CeedScalar ue[3] = {-rho * k_r * kappa * psi1_x, -rho * k_r * kappa * psi1_y, -rho * k_r * kappa * psi1_z};
+    CeedScalar rhs1[3];
+    // rhs = (v, ue) = J^T*ue*w
+    AlphaMatTransposeVecMult3x3(w[i], J, ue, rhs1);
+    //
+    rhs_u0[i + 0 * Q] = rhs1[0];
+    rhs_u0[i + 1 * Q] = rhs1[1];
+    rhs_u0[i + 2 * Q] = rhs1[2];
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// We solve (v, u) = (v, ue) at t=0, to project ue to Hdiv space
+// This QFunction create mass matrix (v, u), then we solve using ksp to have
+// projected ue in Hdiv space and use it for initial conditions
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardICsU3D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+
+    // (v, u): v1 = J^T*J*u*w/detJ
+    // 1) Compute J^T *J
+    CeedScalar JT_J[3][3];
+    AlphaMatTransposeMatMult3x3(1, J, J, JT_J);
+
+    // 4) Compute v1 = J^T*J*u*w/detJ
+    CeedScalar u1[3] = {u[0][i], u[1][i], u[2][i]}, v1[3];
+    AlphaMatVecMult3x3(w[i] / det_J, JT_J, u1, v1);
+
+    // Output at quadrature points: (v, K^{-1}/rho*k_r u)
+    for (CeedInt k = 0; k < 3; k++) {
+      v[k][i] = v1[k];
+    }
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// We solve (q, p) = (q, pe) at t=0, to project pe to L2 space
+// This QFunction create rhs_p0 = (q, pe)
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardRhsP03D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*coords) = in[1], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[2];
+  // Outputs
+  CeedScalar(*rhs_p0) = out[0];
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q], z = coords[i + 2 * Q];
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+    // psi = exp(-gamma*t)*sin(pi*x)*sin(pi*y)
+    CeedScalar psi1 = sin(PI_DOUBLE * x) * sin(PI_DOUBLE * y) * sin(PI_DOUBLE * z);
+
+    // rhs = (q, pe) = pe*w*det_J
+    rhs_p0[i] = psi1 * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// We solve (q, p) = (q, pe) at t=0, to project pe to L2 space
+// This QFunction create mass matrix (q, p), then we solve using ksp to have
+// projected pe in L2 space and use it for initial conditions
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardICsP3D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[1], (*p) = (const CeedScalar(*))in[2];
+
+  // Outputs
+  CeedScalar(*q) = (CeedScalar(*))out[0];
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+
+    // Output at quadrature points: (q, p)
+    q[i] = p[i] * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End of RICHARD_ICS3D_H
diff --git a/examples/Hdiv-mixed/qfunctions/richard-system2d.h b/examples/Hdiv-mixed/qfunctions/richard-system2d.h
new file mode 100644
index 0000000000..7cfa233361
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/richard-system2d.h
@@ -0,0 +1,264 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Richard problem 2D (quad element) using PETSc
+
+#ifndef RICHARD_SYSTEM2D_H
+#define RICHARD_SYSTEM2D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "ceed/ceed-f64.h"
+#include "utils.h"
+
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -rho*k_r*K *[grad(\psi) - rho*g_u]   in \Omega x [0,T]
+//  -\div(u) = -f  + d (rho*\theta)/dt              in \Omega x [0,T]
+//  p        = p_b                                  on \Gamma_D x [0,T]
+//  u.n      = u_b                                  on \Gamma_N x [0,T]
+//  p        = p_0                                  in \Omega, t = 0
+//
+//  Where rho = rho_a/rho_a0, rho_a = rho_a0*exp(\beta * (p - p0)), p0 = 101325 Pa is atmospheric pressure
+//  rho_a0 is the density at p_0, g_u = g/norm(g) where g is gravity.
+//  k_r = b_a + alpha_a * (\psi - x2), where \psi = p / (rho_a0 * norm(g)) and x2 is vertical axis
+//
+// Weak form: Find (u, \psi) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, K^{-1}/rho*k_r * u) -(v, rho*g_u) -(\div(v), \psi) = -<v, p_b*n>_{\Gamma_D}
+// -(q, \div(u))  + (q, f) -(q, d (rho*\theta)/dt ) = 0
+//
+// We solve MMS for  K = kappa*I and beta=0 ==> rho=1 and \theta = alpha_a*\psi, so
+// -(q, d (rho*\theta)/dt ) = -alpha_a*(q, d(\psi)/dt )
+//
+// This QFunction setup the mixed form of the above equation
+// Inputs:
+//   w     : weight of quadrature
+//   J     : dx/dX. x physical coordinate, X reference coordinate [-1,1]^dim
+//   u     : basis_u at quadrature points
+// div(u)  : divergence of basis_u at quadrature points
+//   p     : basis_p at quadrature points
+//   U_t   : time derivative of U = [p, u]
+//
+// Output:
+//   v     : (v, K^{-1}/rho*k_r u) = \int (v^T * K^{-1}/rho*k_r*u detJ*w)dX ==> \int (v^T J^T * K^{-1}/rho*k_r *J*u*w/detJ)dX
+//           -(v, rho*g_u)     = \int (v^T * rho*g_u detJ*w)dX ==> \int (v^T J^T * rho*g_u*w) dX
+// div(v)  : -(\div(v), \psi) = -\int (div(v)^T * \psi *w) dX
+//   q     : -(q, \div(u)) = -\int (q^T * div(u) * w) dX
+//            (q, f)       = \int( q^T * f * w*detJ )dX
+//            -alpha_a*(q, d\psi/dt) = -alpha_a \int (q^T * \psi_t*w*detJ)dX
+//
+// -----------------------------------------------------------------------------
+#ifndef RICHARD_CTX
+#define RICHARD_CTX
+typedef struct RICHARDContext_ *RICHARDContext;
+struct RICHARDContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar beta, p0;
+  CeedScalar t, t_final, dt;
+  CeedScalar gamma;
+  CeedScalar lx, ly;
+};
+#endif
+// -----------------------------------------------------------------------------
+// Residual evaluation for Richard problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardSystem2D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*div_u) = (const CeedScalar(*))in[3], (*p) = (const CeedScalar(*))in[4],
+        (*f) = in[5], (*coords) = in[6], (*p_t) = (const CeedScalar(*))in[7];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*div_v) = (CeedScalar(*))out[1], (*q) = (CeedScalar(*))out[2];
+  // Context
+  RICHARDContext   context = (RICHARDContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar rho_a0  = context->rho_a0;
+  const CeedScalar g       = context->g;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+  // const CeedScalar beta     = context->beta;
+  // const CeedScalar p0       = context->p0; // atmospheric pressure
+  const CeedScalar gamma = context->gamma;
+  CeedScalar       t     = context->t;
+  // CeedScalar dt              = context->dt;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q];
+    const CeedScalar J[2][2] = {
+        {dxdX[0][0][i], dxdX[1][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i]}
+    };
+    const CeedScalar det_J = MatDet2x2(J);
+
+    // \psi = p / (rho_a0 * norm(g))
+    CeedScalar psi = p[i] / (rho_a0 * g);
+    // k_r = b_a + alpha_a * (\psi - x2)
+    CeedScalar k_r = b_a + alpha_a * (1 - x * y);
+    // rho_a = rho_a0*exp(beta * (p - p0))
+    // CeedScalar rho_a = rho_a0 * exp(beta * (p[i] - p0));
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.;
+
+    // (v, K^{-1}/rho*k_r u): v = J^T* (K^{-1}/rho*k_r) *J*u*w/detJ
+    // 1) Compute K^{-1}, note K = kappa*I
+    CeedScalar K[2][2] = {
+        {kappa, 0.   },
+        {0.,    kappa}
+    };
+    const CeedScalar det_K = MatDet2x2(K);
+    CeedScalar       K_inv[2][2];
+    MatInverse2x2(K, det_K, K_inv);
+
+    // 2) (K^{-1}/rho*k_r) *J
+    CeedScalar Kinv_J[2][2];
+    AlphaMatMatMult2x2(1 / (rho * k_r), K_inv, J, Kinv_J);
+
+    // 3) Compute J^T* (K^{-1}/rho*k_r) *J
+    CeedScalar JT_Kinv_J[2][2];
+    AlphaMatTransposeMatMult2x2(1, J, Kinv_J, JT_Kinv_J);
+
+    // 4) Compute v1 = J^T* (K^{-1}/rho*k_r) *J*u*w/detJ
+    CeedScalar u1[2] = {u[0][i], u[1][i]}, v1[2];
+    AlphaMatVecMult2x2(w[i] / det_J, JT_Kinv_J, u1, v1);
+
+    // 5) -(v, rho*g_u): v2 = -J^T*rho*g_u*w
+    // CeedScalar g_u[2] = {0., 1.}, v2[2];
+    // AlphaMatTransposeVecMult2x2(-rho*w[i], J, g_u, v2);
+
+    // Output at quadrature points: (v, k*K^{-1} * u) -(v, rho*g)
+    for (CeedInt k = 0; k < 2; k++) {
+      v[k][i] = v1[k];  // + v2[k];
+    }
+    // Output at quadrature points: -(\div(v), \psi)
+    div_v[i] = -psi * w[i];
+
+    // Output at quadrature points:
+    //-(q, \div(u))  + (q, f) - alpha_a * (q, d\psi/dt )
+    CeedScalar dpsi_dt = p_t[i] / (rho_a0 * g);
+    q[i]               = -div_u[i] * w[i] + exp(-gamma * (t)) * f[i + 0 * Q] * w[i] * det_J - alpha_a * dpsi_dt * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+/*
+// -----------------------------------------------------------------------------
+// Jacobian evaluation for Richard problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(JacobianRichardSystem2D)(void *ctx, CeedInt Q,
+                                        const CeedScalar *const *in,
+                                        CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar (*w) = in[0],
+                   (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+                   (*du)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
+                   (*div_du) = (const CeedScalar(*))in[3],
+                   (*dp) = (const CeedScalar(*))in[4],
+                   (*coords) = in[5],
+                   (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[6],
+                   (*p) = (const CeedScalar(*))in[7];
+
+  // Outputs
+  CeedScalar (*dv)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
+             (*div_dv) = (CeedScalar(*))out[1],
+             (*dq) = (CeedScalar(*))out[2];
+  // Context
+  RICHARDContext  context = (RICHARDContext)ctx;
+  const CeedScalar kappa  = context->kappa;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+  const CeedScalar rho_a0   = context->rho_a0;
+  const CeedScalar beta    = context->beta;
+  const CeedScalar g       = context->g;
+  const CeedScalar p0      = context->p0;// atmospheric pressure
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD
+  for (CeedInt i=0; i<Q; i++) {
+    // Setup, J = dx/dX
+    CeedScalar y = coords[i+1*Q];
+    const CeedScalar J[2][2] = {{dxdX[0][0][i], dxdX[1][0][i]},
+                                {dxdX[0][1][i], dxdX[1][1][i]}};
+    const CeedScalar det_J = MatDet2x2(J);
+
+    // psi = p / (rho_a0 * norm(g))
+    CeedScalar psi = p[i] / (rho_a0 * g);
+    // k_r = b_a + alpha_a * (psi - x2)
+    CeedScalar k_r = b_a + alpha_a * (psi - y);
+    // rho = rho_a0*exp(beta * (p - p0))
+    CeedScalar rho = rho_a0 * exp(beta * (p[i] - p0));
+    //k = rho_a0^2*norm(g)/(rho*k_r)
+    CeedScalar k = rho_a0 * rho_a0 * g / (rho * k_r);
+
+    // Piola map: J^T*k*K^{-1}*J*u*w/detJ
+    // The jacobian term
+    // dv = J^T* (k*K^{-1}) *J*du*w/detJ - [(rho*k_r),p*dp/(rho*k_r)]*J^T*(k*K^{-1}) *J*u*w/detJ
+    //      -J^T * (beta*rho*g)*dp
+    // 1) Compute K^{-1}, note K = kappa*I
+    CeedScalar K[2][2] = {{kappa, 0.},{0., kappa}};
+    const CeedScalar det_K = MatDet2x2(K);
+    CeedScalar K_inv[2][2];
+    MatInverse2x2(K, det_K, K_inv);
+
+    // 2) Compute k*K^{-1}*J
+    CeedScalar kKinv_J[2][2];
+    AlphaMatMatMult2x2(k, K_inv, J, kKinv_J);
+
+    // 3) Compute J^T * (k*K^{-1}*J)
+    CeedScalar JT_kKinv_J[2][2];
+    AlphaMatTransposeMatMult2x2(1, J, kKinv_J, JT_kKinv_J);
+
+    // 4) Compute (J^T*k*K^{-1}*J) * du * w /detJ
+    CeedScalar du1[2] = {du[0][i], du[1][i]}, dv1[2];
+    AlphaMatVecMult2x2(w[i]/det_J, JT_kKinv_J, du1, dv1);
+
+    // 5) Compute -(rho*k_r),p*dp/(rho*k_r))
+    // (rho*k_r),p*dp = beta*rho*dp*k_r + rho*alpha*dp/(rho_a0*norm(g))
+    CeedScalar d_rhokr_dp = -(beta + alpha_a/(rho_a0*g*k_r))*dp[i];
+
+    // 6) -[(rho*k_r),p*dp/(rho*k_r)]*J^T*(k*K^{-1}) *J*u*w/detJ
+    CeedScalar u1[2] = {u[0][i], u[1][i]}, dv2[2];
+    AlphaMatVecMult2x2((d_rhokr_dp*w[i])/det_J, JT_kKinv_J, u1, dv2);
+
+    // 7) -(v, rho*g): dv = -J^T * (beta*rho*g*dp)*w
+    CeedScalar drho_g_dp[2] = {0., beta *rho *g *dp[i]}, dv3[2];
+    AlphaMatTransposeVecMult2x2(-w[i], J, drho_g_dp, dv3);
+
+    // Output at quadrature points
+    for (CeedInt k = 0; k < 2; k++) {
+      dv[k][i] = dv1[k] + dv2[k] + dv3[k];
+    }
+
+    div_dv[i] = -dp[i] * w[i];
+    dq[i] = -div_du[i] * w[i];
+  } // End of Quadrature Point Loop
+
+  return 0;
+}
+*/
+// -----------------------------------------------------------------------------
+
+#endif  // End of RICHARD_SYSTEM2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/richard-system3d.h b/examples/Hdiv-mixed/qfunctions/richard-system3d.h
new file mode 100644
index 0000000000..b459e47525
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/richard-system3d.h
@@ -0,0 +1,266 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Richard problem 2D (quad element) using PETSc
+
+#ifndef RICHARD_SYSTEM3D_H
+#define RICHARD_SYSTEM3D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "ceed/ceed-f64.h"
+#include "utils.h"
+
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -rho*k_r*K *[grad(\psi) - rho*g_u]   in \Omega x [0,T]
+//  -\div(u) = -f  + d (rho*\theta)/dt              in \Omega x [0,T]
+//  p        = p_b                                  on \Gamma_D x [0,T]
+//  u.n      = u_b                                  on \Gamma_N x [0,T]
+//  p        = p_0                                  in \Omega, t = 0
+//
+//  Where rho = rho_a/rho_a0, rho_a = rho_a0*exp(\beta * (p - p0)), p0 = 101325 Pa is atmospheric pressure
+//  rho_a0 is the density at p_0, g_u = g/norm(g) where g is gravity.
+//  k_r = b_a + alpha_a * (\psi - x2), where \psi = p / (rho_a0 * norm(g)) and x2 is vertical axis
+//
+// Weak form: Find (u, \psi) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, K^{-1}/rho*k_r * u) -(v, rho*g_u) -(\div(v), \psi) = -<v, p_b*n>_{\Gamma_D}
+// -(q, \div(u))  + (q, f) -(q, d (rho*\theta)/dt ) = 0
+//
+// We solve MMS for  K = kappa*I and beta=0 ==> rho=1 and \theta = alpha_a*\psi, so
+// -(q, d (rho*\theta)/dt ) = -alpha_a*(q, d(\psi)/dt )
+//
+// This QFunction setup the mixed form of the above equation
+// Inputs:
+//   w     : weight of quadrature
+//   J     : dx/dX. x physical coordinate, X reference coordinate [-1,1]^dim
+//   u     : basis_u at quadrature points
+// div(u)  : divergence of basis_u at quadrature points
+//   p     : basis_p at quadrature points
+//   U_t   : time derivative of U = [p, u]
+//
+// Output:
+//   v     : (v, K^{-1}/rho*k_r u) = \int (v^T * K^{-1}/rho*k_r*u detJ*w)dX ==> \int (v^T J^T * K^{-1}/rho*k_r *J*u*w/detJ)dX
+//           -(v, rho*g_u)     = \int (v^T * rho*g_u detJ*w)dX ==> \int (v^T J^T * rho*g_u*w) dX
+// div(v)  : -(\div(v), \psi) = -\int (div(v)^T * \psi *w) dX
+//   q     : -(q, \div(u)) = -\int (q^T * div(u) * w) dX
+//            (q, f)       = \int( q^T * f * w*detJ )dX
+//            -alpha_a*(q, d\psi/dt) = -alpha_a \int (q^T * \psi_t*w*detJ)dX
+//
+// -----------------------------------------------------------------------------
+#ifndef RICHARD_CTX
+#define RICHARD_CTX
+typedef struct RICHARDContext_ *RICHARDContext;
+struct RICHARDContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar beta, p0;
+  CeedScalar t, t_final, dt;
+  CeedScalar gamma;
+  CeedScalar lx, ly, lz;
+};
+#endif
+// -----------------------------------------------------------------------------
+// Residual evaluation for Richard problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardSystem3D)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*w) = in[0], (*dxdX)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[1],
+        (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], (*div_u) = (const CeedScalar(*))in[3], (*p) = (const CeedScalar(*))in[4],
+        (*f) = in[5], (*coords) = in[6], (*p_t) = (const CeedScalar(*))in[7];
+
+  // Outputs
+  CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], (*div_v) = (CeedScalar(*))out[1], (*q) = (CeedScalar(*))out[2];
+  // Context
+  RICHARDContext   context = (RICHARDContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar rho_a0  = context->rho_a0;
+  const CeedScalar g       = context->g;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+  // const CeedScalar beta     = context->beta;
+  // const CeedScalar p0       = context->p0; // atmospheric pressure
+  const CeedScalar gamma = context->gamma;
+  CeedScalar       t     = context->t;
+  // CeedScalar dt              = context->dt;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    // Setup, J = dx/dX
+    CeedScalar       x = coords[i + 0 * Q], y = coords[i + 1 * Q], z = coords[i + 2 * Q];
+    const CeedScalar J[3][3] = {
+        {dxdX[0][0][i], dxdX[1][0][i], dxdX[2][0][i]},
+        {dxdX[0][1][i], dxdX[1][1][i], dxdX[2][1][i]},
+        {dxdX[0][2][i], dxdX[1][2][i], dxdX[2][2][i]}
+    };
+    const CeedScalar det_J = MatDet3x3(J);
+
+    // \psi = p / (rho_a0 * norm(g))
+    CeedScalar psi = p[i] / (rho_a0 * g);
+    // k_r = b_a + alpha_a * (\psi - x2)
+    CeedScalar k_r = b_a + alpha_a * (1 - x * y * z);
+    // rho_a = rho_a0*exp(beta * (p - p0))
+    // CeedScalar rho_a = rho_a0 * exp(beta * (p[i] - p0));
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.;
+
+    // (v, K^{-1}/rho*k_r u): v = J^T* (K^{-1}/rho*k_r) *J*u*w/detJ
+    // 1) Compute K^{-1}, note K = kappa*I
+    CeedScalar K[3][3] = {
+        {kappa, 0.,    0.   },
+        {0.,    kappa, 0.   },
+        {0.,    0.,    kappa}
+    };
+    const CeedScalar det_K = MatDet3x3(K);
+    CeedScalar       K_inv[3][3];
+    MatInverse3x3(K, det_K, K_inv);
+
+    // 2) (K^{-1}/rho*k_r) *J
+    CeedScalar Kinv_J[3][3];
+    AlphaMatMatMult3x3(1 / (rho * k_r), K_inv, J, Kinv_J);
+
+    // 3) Compute J^T* (K^{-1}/rho*k_r) *J
+    CeedScalar JT_Kinv_J[3][3];
+    AlphaMatTransposeMatMult3x3(1, J, Kinv_J, JT_Kinv_J);
+
+    // 4) Compute v1 = J^T* (K^{-1}/rho*k_r) *J*u*w/detJ
+    CeedScalar u1[3] = {u[0][i], u[1][i], u[2][i]}, v1[3];
+    AlphaMatVecMult3x3(w[i] / det_J, JT_Kinv_J, u1, v1);
+
+    // 5) -(v, rho*g_u): v2 = -J^T*rho*g_u*w
+    // CeedScalar g_u[2] = {0., 1.}, v2[2];
+    // AlphaMatTransposeVecMult2x2(-rho*w[i], J, g_u, v2);
+
+    // Output at quadrature points: (v, k*K^{-1} * u) -(v, rho*g)
+    for (CeedInt k = 0; k < 3; k++) {
+      v[k][i] = v1[k];  // + v2[k];
+    }
+    // Output at quadrature points: -(\div(v), \psi)
+    div_v[i] = -psi * w[i];
+
+    // Output at quadrature points:
+    //-(q, \div(u))  + (q, f) - alpha_a * (q, d\psi/dt )
+    CeedScalar dpsi_dt = p_t[i] / (rho_a0 * g);
+    q[i]               = -div_u[i] * w[i] + exp(-gamma * (t)) * f[i + 0 * Q] * w[i] * det_J - alpha_a * dpsi_dt * w[i] * det_J;
+  }  // End of Quadrature Point Loop
+
+  return 0;
+}
+
+/*
+// -----------------------------------------------------------------------------
+// Jacobian evaluation for Richard problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(JacobianRichardSystem2D)(void *ctx, CeedInt Q,
+                                        const CeedScalar *const *in,
+                                        CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar (*w) = in[0],
+                   (*dxdX)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[1],
+                   (*du)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
+                   (*div_du) = (const CeedScalar(*))in[3],
+                   (*dp) = (const CeedScalar(*))in[4],
+                   (*coords) = in[5],
+                   (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[6],
+                   (*p) = (const CeedScalar(*))in[7];
+
+  // Outputs
+  CeedScalar (*dv)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
+             (*div_dv) = (CeedScalar(*))out[1],
+             (*dq) = (CeedScalar(*))out[2];
+  // Context
+  RICHARDContext  context = (RICHARDContext)ctx;
+  const CeedScalar kappa  = context->kappa;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+  const CeedScalar rho_a0   = context->rho_a0;
+  const CeedScalar beta    = context->beta;
+  const CeedScalar g       = context->g;
+  const CeedScalar p0      = context->p0;// atmospheric pressure
+
+// Quadrature Point Loop
+CeedPragmaSIMD
+for (CeedInt i=0; i<Q; i++) {
+    // Setup, J = dx/dX
+    CeedScalar y = coords[i+1*Q];
+    const CeedScalar J[2][2] = {{dxdX[0][0][i], dxdX[1][0][i]},
+                                {dxdX[0][1][i], dxdX[1][1][i]}};
+    const CeedScalar det_J = MatDet2x2(J);
+
+// psi = p / (rho_a0 * norm(g))
+CeedScalar psi = p[i] / (rho_a0 * g);
+// k_r = b_a + alpha_a * (psi - x2)
+CeedScalar k_r = b_a + alpha_a * (psi - y);
+// rho = rho_a0*exp(beta * (p - p0))
+CeedScalar rho = rho_a0 * exp(beta * (p[i] - p0));
+//k = rho_a0^2*norm(g)/(rho*k_r)
+CeedScalar k = rho_a0 * rho_a0 * g / (rho * k_r);
+
+// Piola map: J^T*k*K^{-1}*J*u*w/detJ
+// The jacobian term
+// dv = J^T* (k*K^{-1}) *J*du*w/detJ - [(rho*k_r),p*dp/(rho*k_r)]*J^T*(k*K^{-1}) *J*u*w/detJ
+//      -J^T * (beta*rho*g)*dp
+// 1) Compute K^{-1}, note K = kappa*I
+CeedScalar K[2][2] = {{kappa, 0.},{0., kappa}};
+const CeedScalar det_K = MatDet2x2(K);
+CeedScalar K_inv[2][2];
+MatInverse2x2(K, det_K, K_inv);
+
+// 2) Compute k*K^{-1}*J
+CeedScalar kKinv_J[2][2];
+AlphaMatMatMult2x2(k, K_inv, J, kKinv_J);
+
+// 3) Compute J^T * (k*K^{-1}*J)
+CeedScalar JT_kKinv_J[2][2];
+AlphaMatTransposeMatMult2x2(1, J, kKinv_J, JT_kKinv_J);
+
+// 4) Compute (J^T*k*K^{-1}*J) * du * w /detJ
+CeedScalar du1[2] = {du[0][i], du[1][i]}, dv1[2];
+AlphaMatVecMult2x2(w[i]/det_J, JT_kKinv_J, du1, dv1);
+
+// 5) Compute -(rho*k_r),p*dp/(rho*k_r))
+// (rho*k_r),p*dp = beta*rho*dp*k_r + rho*alpha*dp/(rho_a0*norm(g))
+CeedScalar d_rhokr_dp = -(beta + alpha_a/(rho_a0*g*k_r))*dp[i];
+
+// 6) -[(rho*k_r),p*dp/(rho*k_r)]*J^T*(k*K^{-1}) *J*u*w/detJ
+CeedScalar u1[2] = {u[0][i], u[1][i]}, dv2[2];
+AlphaMatVecMult2x2((d_rhokr_dp*w[i])/det_J, JT_kKinv_J, u1, dv2);
+
+// 7) -(v, rho*g): dv = -J^T * (beta*rho*g*dp)*w
+CeedScalar drho_g_dp[2] = {0., beta *rho *g *dp[i]}, dv3[2];
+AlphaMatTransposeVecMult2x2(-w[i], J, drho_g_dp, dv3);
+
+// Output at quadrature points
+for (CeedInt k = 0; k < 2; k++) {
+dv[k][i] = dv1[k] + dv2[k] + dv3[k];
+}
+
+div_dv[i] = -dp[i] * w[i];
+dq[i] = -div_du[i] * w[i];
+} // End of Quadrature Point Loop
+
+return 0;
+}
+*/
+// -----------------------------------------------------------------------------
+
+#endif  // End of RICHARD_SYSTEM3D_H
diff --git a/examples/Hdiv-mixed/qfunctions/richard-true2d.h b/examples/Hdiv-mixed/qfunctions/richard-true2d.h
new file mode 100644
index 0000000000..c93fa40fd3
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/richard-true2d.h
@@ -0,0 +1,126 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Force of Richard problem 2D (quad element) using PETSc
+
+#ifndef RICHARD_TRUE2D_H
+#define RICHARD_TRUE2D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -rho*k_r*K *[grad(\psi) - rho*g_u]   in \Omega x [0,T]
+//  -\div(u) = -f  + d (rho*theta)/dt              in \Omega x [0,T]
+//  p        = p_b                                  on \Gamma_D x [0,T]
+//  u.n      = u_b                                  on \Gamma_N x [0,T]
+//  p        = p_0                                  in \Omega, t = 0
+//
+//  Where rho = rho_a/rho_a0, rho_a = rho_a0*exp(\beta * (p - p0)), p0 = 101325 Pa is atmospheric pressure
+//  rho_a0 is the density at p_0, g_u = g/norm(g) where g is gravity.
+//  k_r = b_a + alpha_a * (\psi - x2), where \psi = p / (rho_a0 * norm(g)) and x2 is vertical axis
+//
+// Weak form: Find (u, \psi) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, K^{-1}/rho*k_r * u) -(v, rho*g_u) -(\div(v), \psi) = -<v, p_b*n>_{\Gamma_D}
+// -(q, \div(u))  + (q, f) -(q, d (rho*\theta)/dt ) = 0
+//
+// We solve MMS for  K = kappa*I and beta=0 ==> rho=1 and \theta = alpha_a*\psi, so
+// -(q, d (rho*\theta)/dt ) = -alpha_a*(q, d(\psi)/dt )
+//
+// This QFunction setup the true solution and forcing f of the above equation
+// Inputs:
+//   coords: physical coordinate
+//
+// Output:
+//   true_force     : = div(u) + d (rho*theta)/dt
+//   true_solution  : = [\psi, u] where \psi, u are the exact solution solution
+// -----------------------------------------------------------------------------
+// We have 3 experiment parameters as described in Table 1:P1, P2, P3
+// Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+#ifndef RICHARD_CTX
+#define RICHARD_CTX
+typedef struct RICHARDContext_ *RICHARDContext;
+struct RICHARDContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar beta, p0;
+  CeedScalar t, t_final, dt;
+  CeedScalar gamma;
+  CeedScalar lx, ly;
+};
+#endif
+
+// -----------------------------------------------------------------------------
+// True solution for Richard problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardTrue2D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*coords) = in[0];
+  // Outputs
+  CeedScalar(*true_force) = out[0], (*true_solution) = out[1];
+  // Context
+  RICHARDContext   context = (RICHARDContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+  const CeedScalar gamma   = context->gamma;
+  CeedScalar       t_final = context->t_final;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar x = coords[i + 0 * Q], y = coords[i + 1 * Q];
+    // psi = exp(-gamma*t)*sin(pi*x)*sin(pi*y)
+    // We factor exp() term
+    CeedScalar psi    = sin(PI_DOUBLE * x) * sin(PI_DOUBLE * y);
+    CeedScalar psi_x  = PI_DOUBLE * cos(PI_DOUBLE * x) * sin(PI_DOUBLE * y);
+    CeedScalar psi_xx = -PI_DOUBLE * PI_DOUBLE * psi;
+    CeedScalar psi_y  = PI_DOUBLE * sin(PI_DOUBLE * x) * cos(PI_DOUBLE * y);
+    CeedScalar psi_yy = -PI_DOUBLE * PI_DOUBLE * psi;
+    // k_r = b_a + alpha_a * (1 - x*y)
+    CeedScalar k_r  = b_a + alpha_a * (1 - x * y);
+    CeedScalar k_rx = -alpha_a * y;
+    CeedScalar k_ry = -alpha_a * x;
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.;
+    // u = -rho*k_r*K *[grad(\psi)]
+    CeedScalar u[2] = {-rho * kappa * exp(-gamma * t_final) * k_r * psi_x, -rho * kappa * exp(-gamma * t_final) * k_r * psi_y};
+    // CeedScalar div_u = -rho*kappa*exp(-gamma*t_final)*(k_rx*psi_x + k_r*psi_xx +
+    //                                                      k_ry*psi_y + k_r*psi_yy);
+    CeedScalar div_u = -rho * kappa * (k_rx * psi_x + k_r * psi_xx + k_ry * psi_y + k_r * psi_yy);
+    // True Force: f = \div(u) + d (rho*theta)/dt
+    // since the force is a function of time, and this qfunction get called once
+    // and the t variable doesn't get updated, we factored exp() term and update it
+    // in residual, thats why we have psi = exp() * psi1, ...
+    true_force[i + 0 * Q] = div_u - alpha_a * gamma * psi;
+    // True Solution
+    true_solution[i + 0 * Q] = exp(-gamma * t_final) * psi;
+    true_solution[i + 1 * Q] = u[0];
+    true_solution[i + 2 * Q] = u[1];
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End of RICHARD_TRUE2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/richard-true3d.h b/examples/Hdiv-mixed/qfunctions/richard-true3d.h
new file mode 100644
index 0000000000..b11f3ae78b
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/richard-true3d.h
@@ -0,0 +1,131 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Force of Richard problem 2D (quad element) using PETSc
+
+#ifndef RICHARD_TRUE3D_H
+#define RICHARD_TRUE3D_H
+
+#include <ceed.h>
+#include <math.h>
+
+#include "utils.h"
+
+// See Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+// -----------------------------------------------------------------------------
+// Strong form:
+//  u        = -rho*k_r*K *[grad(\psi) - rho*g_u]   in \Omega x [0,T]
+//  -\div(u) = -f  + d (rho*theta)/dt              in \Omega x [0,T]
+//  p        = p_b                                  on \Gamma_D x [0,T]
+//  u.n      = u_b                                  on \Gamma_N x [0,T]
+//  p        = p_0                                  in \Omega, t = 0
+//
+//  Where rho = rho_a/rho_a0, rho_a = rho_a0*exp(\beta * (p - p0)), p0 = 101325 Pa is atmospheric pressure
+//  rho_a0 is the density at p_0, g_u = g/norm(g) where g is gravity.
+//  k_r = b_a + alpha_a * (\psi - x2), where \psi = p / (rho_a0 * norm(g)) and x2 is vertical axis
+//
+// Weak form: Find (u, \psi) \in VxQ (V=H(div), Q=L^2) on \Omega
+//  (v, K^{-1}/rho*k_r * u) -(v, rho*g_u) -(\div(v), \psi) = -<v, p_b*n>_{\Gamma_D}
+// -(q, \div(u))  + (q, f) -(q, d (rho*\theta)/dt ) = 0
+//
+// We solve MMS for  K = kappa*I and beta=0 ==> rho=1 and \theta = alpha_a*\psi, so
+// -(q, d (rho*\theta)/dt ) = -alpha_a*(q, d(\psi)/dt )
+//
+// This QFunction setup the true solution and forcing f of the above equation
+// Inputs:
+//   coords: physical coordinate
+//
+// Output:
+//   true_force     : = div(u) + d (rho*theta)/dt
+//   true_solution  : = [\psi, u] where \psi, u are the exact solution solution
+// -----------------------------------------------------------------------------
+// We have 3 experiment parameters as described in Table 1:P1, P2, P3
+// Matthew Farthing, Christopher Kees, Cass Miller (2003)
+// https://www.sciencedirect.com/science/article/pii/S0309170802001872
+#ifndef RICHARD_CTX
+#define RICHARD_CTX
+typedef struct RICHARDContext_ *RICHARDContext;
+struct RICHARDContext_ {
+  CeedScalar kappa;
+  CeedScalar g;
+  CeedScalar rho_a0;
+  CeedScalar alpha_a, b_a;
+  CeedScalar beta, p0;
+  CeedScalar t, t_final, dt;
+  CeedScalar gamma;
+  CeedScalar lx, ly, lz;
+};
+#endif
+
+// -----------------------------------------------------------------------------
+// True solution for Richard problem
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION(RichardTrue3D)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) {
+  // Inputs
+  const CeedScalar(*coords) = in[0];
+  // Outputs
+  CeedScalar(*true_force) = out[0], (*true_solution) = out[1];
+  // Context
+  RICHARDContext   context = (RICHARDContext)ctx;
+  const CeedScalar kappa   = context->kappa;
+  const CeedScalar alpha_a = context->alpha_a;
+  const CeedScalar b_a     = context->b_a;
+  const CeedScalar gamma   = context->gamma;
+  CeedScalar       t_final = context->t_final;
+
+  // Quadrature Point Loop
+  CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) {
+    CeedScalar x = coords[i + 0 * Q], y = coords[i + 1 * Q], z = coords[i + 2 * Q];
+    // psi = exp(-gamma*t)*sin(pi*x)*sin(pi*y)
+    // We factor exp() term
+    CeedScalar psi    = sin(PI_DOUBLE * x) * sin(PI_DOUBLE * y) * sin(PI_DOUBLE * z);
+    CeedScalar psi_x  = PI_DOUBLE * cos(PI_DOUBLE * x) * sin(PI_DOUBLE * y) * sin(PI_DOUBLE * z);
+    CeedScalar psi_xx = -PI_DOUBLE * PI_DOUBLE * psi;
+    CeedScalar psi_y  = PI_DOUBLE * sin(PI_DOUBLE * x) * cos(PI_DOUBLE * y) * sin(PI_DOUBLE * z);
+    CeedScalar psi_yy = -PI_DOUBLE * PI_DOUBLE * psi;
+    CeedScalar psi_z  = PI_DOUBLE * sin(PI_DOUBLE * x) * sin(PI_DOUBLE * y) * cos(PI_DOUBLE * z);
+    CeedScalar psi_zz = -PI_DOUBLE * PI_DOUBLE * psi;
+    // k_r = b_a + alpha_a * (1 - x*y)
+    CeedScalar k_r  = b_a + alpha_a * (1 - x * y * z);
+    CeedScalar k_rx = -alpha_a * y * z;
+    CeedScalar k_ry = -alpha_a * x * z;
+    CeedScalar k_rz = -alpha_a * x * y;
+    // rho = rho_a/rho_a0
+    CeedScalar rho = 1.;
+    // u = -rho*k_r*K *[grad(\psi)]
+    CeedScalar u[3] = {-rho * kappa * exp(-gamma * t_final) * k_r * psi_x, -rho * kappa * exp(-gamma * t_final) * k_r * psi_y,
+                       -rho * kappa * exp(-gamma * t_final) * k_r * psi_z};
+    // CeedScalar div_u = -rho*kappa*exp(-gamma*t_final)*(k_rx*psi_x + k_r*psi_xx +
+    //                                                      k_ry*psi_y + k_r*psi_yy);
+    CeedScalar div_u = -rho * kappa * (k_rx * psi_x + k_r * psi_xx + k_ry * psi_y + k_r * psi_yy + k_rz * psi_z + k_r * psi_zz);
+    // True Force: f = \div(u) + d (rho*theta)/dt
+    // since the force is a function of time, and this qfunction get called once
+    // and the t variable doesn't get updated, we factored exp() term and update it
+    // in residual, thats why we have psi = exp() * psi1, ...
+    true_force[i + 0 * Q] = div_u - alpha_a * gamma * psi;
+    // True Solution
+    true_solution[i + 0 * Q] = exp(-gamma * t_final) * psi;
+    true_solution[i + 1 * Q] = u[0];
+    true_solution[i + 2 * Q] = u[1];
+    true_solution[i + 3 * Q] = u[2];
+  }  // End of Quadrature Point Loop
+  return 0;
+}
+// -----------------------------------------------------------------------------
+
+#endif  // End of RICHARD_TRUE2D_H
diff --git a/examples/Hdiv-mixed/qfunctions/utils.h b/examples/Hdiv-mixed/qfunctions/utils.h
new file mode 100644
index 0000000000..918d9ff953
--- /dev/null
+++ b/examples/Hdiv-mixed/qfunctions/utils.h
@@ -0,0 +1,192 @@
+/// @file
+/// Utility helpers QFunction source
+
+#ifndef utils_qf_h
+#define utils_qf_h
+
+#include <math.h>
+
+#include "ceed/ceed-f64.h"
+
+#define PI_DOUBLE 3.14159265358979323846
+
+// -----------------------------------------------------------------------------
+// Compute alpha * A * B = C
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatMatMult3x3(const CeedScalar alpha, const CeedScalar A[3][3], const CeedScalar B[3][3], CeedScalar C[3][3]) {
+  for (CeedInt j = 0; j < 3; j++) {
+    for (CeedInt k = 0; k < 3; k++) {
+      C[j][k] = 0;
+      for (CeedInt m = 0; m < 3; m++) {
+        C[j][k] += alpha * A[j][m] * B[m][k];
+      }
+    }
+  }
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Compute alpha * A^T * B = C
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatTransposeMatMult3x3(const CeedScalar alpha, const CeedScalar A[3][3], const CeedScalar B[3][3],
+                                                      CeedScalar C[3][3]) {
+  for (CeedInt j = 0; j < 3; j++) {
+    for (CeedInt k = 0; k < 3; k++) {
+      C[j][k] = 0;
+      for (CeedInt m = 0; m < 3; m++) {
+        C[j][k] += alpha * A[m][j] * B[m][k];
+      }
+    }
+  }
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Compute determinant of 3x3 matrix
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER CeedScalar MatDet3x3(const CeedScalar A[3][3]) {
+  // Compute det(A)
+  const CeedScalar B11 = A[1][1] * A[2][2] - A[1][2] * A[2][1];
+  const CeedScalar B12 = A[0][2] * A[2][1] - A[0][1] * A[2][2];
+  const CeedScalar B13 = A[0][1] * A[1][2] - A[0][2] * A[1][1];
+  return A[0][0] * B11 + A[1][0] * B12 + A[2][0] * B13;
+};
+
+// -----------------------------------------------------------------------------
+// Compute inverse of 3x3 symmetric matrix
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int MatInverse3x3(const CeedScalar A[3][3], const CeedScalar det_A, CeedScalar A_inv[3][3]) {
+  // Compute A^(-1) : A-Inverse
+  CeedScalar B[6] = {
+      A[1][1] * A[2][2] - A[1][2] * A[2][1], /* *NOPAD* */
+      A[0][0] * A[2][2] - A[0][2] * A[2][0], /* *NOPAD* */
+      A[0][0] * A[1][1] - A[0][1] * A[1][0], /* *NOPAD* */
+      A[0][2] * A[1][0] - A[0][0] * A[1][2], /* *NOPAD* */
+      A[0][1] * A[1][2] - A[0][2] * A[1][1], /* *NOPAD* */
+      A[0][2] * A[2][1] - A[0][1] * A[2][2]  /* *NOPAD* */
+  };
+  CeedScalar A_inv1[6];
+  for (CeedInt m = 0; m < 6; m++) {
+    A_inv1[m] = B[m] / (det_A);
+  }
+  A_inv[0][0] = A_inv1[0];
+  A_inv[0][1] = A_inv1[5];
+  A_inv[0][2] = A_inv1[4];
+  A_inv[1][0] = A_inv1[5];
+  A_inv[1][1] = A_inv1[1];
+  A_inv[1][2] = A_inv1[3];
+  A_inv[2][0] = A_inv1[4];
+  A_inv[2][1] = A_inv1[3];
+  A_inv[2][2] = A_inv1[2];
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute matrix-vector product: alpha*A*u
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatVecMult3x3(const CeedScalar alpha, const CeedScalar A[3][3], const CeedScalar u[3], CeedScalar v[3]) {
+  // Compute v = alpha*A*u
+  for (CeedInt k = 0; k < 3; k++) {
+    v[k] = 0;
+    for (CeedInt m = 0; m < 3; m++) v[k] += A[k][m] * u[m] * alpha;
+  }
+
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute matrix-vector product: alpha*A^T*u
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatTransposeVecMult3x3(const CeedScalar alpha, const CeedScalar A[3][3], const CeedScalar u[3], CeedScalar v[3]) {
+  // Compute v = alpha*A^T*u
+  for (CeedInt k = 0; k < 3; k++) {
+    v[k] = 0;
+    for (CeedInt m = 0; m < 3; m++) v[k] += A[m][k] * u[m] * alpha;
+  }
+
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute alpha * A * B = C
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatMatMult2x2(const CeedScalar alpha, const CeedScalar A[2][2], const CeedScalar B[2][2], CeedScalar C[2][2]) {
+  for (CeedInt j = 0; j < 2; j++) {
+    for (CeedInt k = 0; k < 2; k++) {
+      C[j][k] = 0;
+      for (CeedInt m = 0; m < 2; m++) {
+        C[j][k] += alpha * A[j][m] * B[m][k];
+      }
+    }
+  }
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Compute alpha * A^T * B = C
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatTransposeMatMult2x2(const CeedScalar alpha, const CeedScalar A[2][2], const CeedScalar B[2][2],
+                                                      CeedScalar C[2][2]) {
+  for (CeedInt j = 0; j < 2; j++) {
+    for (CeedInt k = 0; k < 2; k++) {
+      C[j][k] = 0;
+      for (CeedInt m = 0; m < 2; m++) {
+        C[j][k] += alpha * A[m][j] * B[m][k];
+      }
+    }
+  }
+
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Compute determinant of 2x2 matrix
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER CeedScalar MatDet2x2(const CeedScalar A[2][2]) {
+  // Compute det(A)
+  return A[0][0] * A[1][1] - A[1][0] * A[0][1];
+};
+
+// -----------------------------------------------------------------------------
+// Compute inverse of 2x2 symmetric matrix
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int MatInverse2x2(const CeedScalar A[2][2], const CeedScalar det_A, CeedScalar A_inv[2][2]) {
+  // Compute A^(-1) : A-Inverse
+  A_inv[0][0] = A[1][1] / det_A;
+  A_inv[0][1] = -A[0][1] / det_A;
+  A_inv[1][0] = -A[1][0] / det_A;
+  A_inv[1][1] = A[0][0] / det_A;
+
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute matrix-vector product: alpha*A*u
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatVecMult2x2(const CeedScalar alpha, const CeedScalar A[2][2], const CeedScalar u[2], CeedScalar v[2]) {
+  // Compute v = alpha*A*u
+  for (CeedInt k = 0; k < 2; k++) {
+    v[k] = 0;
+    for (CeedInt m = 0; m < 2; m++) v[k] += A[k][m] * u[m] * alpha;
+  }
+
+  return 0;
+};
+
+// -----------------------------------------------------------------------------
+// Compute matrix-vector product: alpha*A^T*u
+// -----------------------------------------------------------------------------
+CEED_QFUNCTION_HELPER int AlphaMatTransposeVecMult2x2(const CeedScalar alpha, const CeedScalar A[2][2], const CeedScalar u[2], CeedScalar v[2]) {
+  // Compute v = alpha*A^T*u
+  for (CeedInt k = 0; k < 2; k++) {
+    v[k] = 0;
+    for (CeedInt m = 0; m < 2; m++) v[k] += A[m][k] * u[m] * alpha;
+  }
+
+  return 0;
+};
+
+#endif  // utils_qf_h
diff --git a/examples/Hdiv-mixed/src/cl-options.c b/examples/Hdiv-mixed/src/cl-options.c
new file mode 100644
index 0000000000..0b8bf4e387
--- /dev/null
+++ b/examples/Hdiv-mixed/src/cl-options.c
@@ -0,0 +1,68 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+/// @file
+/// Command line option processing for H(div) example using PETSc
+
+#include "../include/cl-options.h"
+
+// Process general command line options
+PetscErrorCode ProcessCommandLineOptions(AppCtx app_ctx) {
+  PetscBool problem_flag = PETSC_FALSE;
+  PetscBool ceed_flag    = PETSC_FALSE;
+  PetscFunctionBeginUser;
+
+  PetscOptionsBegin(app_ctx->comm, NULL, "H(div) mixed-problem in PETSc with libCEED", NULL);
+
+  PetscCall(PetscOptionsString("-ceed", "CEED resource specifier", NULL, app_ctx->ceed_resource, app_ctx->ceed_resource,
+                               sizeof(app_ctx->ceed_resource), &ceed_flag));
+
+  // Provide default ceed resource if not specified
+  if (!ceed_flag) {
+    const char *ceed_resource = "/cpu/self";
+    strncpy(app_ctx->ceed_resource, ceed_resource, 10);
+  }
+
+  PetscCall(PetscOptionsFList("-problem", "Problem to solve", NULL, app_ctx->problems, app_ctx->problem_name, app_ctx->problem_name,
+                              sizeof(app_ctx->problem_name), &problem_flag));
+  // Provide default problem if not specified
+  if (!problem_flag) {
+    const char *problem_name = "darcy2d";
+    strncpy(app_ctx->problem_name, problem_name, 16);
+  }
+  app_ctx->degree = 1;
+  PetscCall(PetscOptionsInt("-degree", "Polynomial degree of finite elements", NULL, app_ctx->degree, &app_ctx->degree, NULL));
+
+  app_ctx->q_extra = 0;
+  PetscCall(PetscOptionsInt("-q_extra", "Number of extra quadrature points", NULL, app_ctx->q_extra, &app_ctx->q_extra, NULL));
+  app_ctx->view_solution = PETSC_FALSE;
+  PetscCall(PetscOptionsBool("-view_solution", "View solution in Paraview", NULL, app_ctx->view_solution, &(app_ctx->view_solution), NULL));
+  app_ctx->quartic = PETSC_FALSE;
+  PetscCall(PetscOptionsBool("-quartic", "To test PetscViewer", NULL, app_ctx->quartic, &(app_ctx->quartic), NULL));
+
+  PetscCall(PetscStrncpy(app_ctx->output_dir, ".", 2));
+  PetscCall(PetscOptionsString("-output_dir", "Output directory", NULL, app_ctx->output_dir, app_ctx->output_dir, sizeof(app_ctx->output_dir), NULL));
+
+  app_ctx->output_freq = 10;
+  PetscCall(PetscOptionsInt("-output_freq", "Frequency of output, in number of steps", NULL, app_ctx->output_freq, &app_ctx->output_freq, NULL));
+  app_ctx->bc_pressure_count = 16;
+  // we can set one face by: -bc_faces 1 OR multiple faces by :-bc_faces 1,2,3
+  PetscCall(PetscOptionsIntArray("-bc_faces", "Face IDs to apply pressure BC", NULL, app_ctx->bc_faces, &app_ctx->bc_pressure_count, NULL));
+
+  PetscOptionsEnd();
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mixed/src/post-processing.c b/examples/Hdiv-mixed/src/post-processing.c
new file mode 100644
index 0000000000..8bc99d0a04
--- /dev/null
+++ b/examples/Hdiv-mixed/src/post-processing.c
@@ -0,0 +1,248 @@
+#include "../include/post-processing.h"
+
+#include "../include/setup-solvers.h"
+#include "ceed/ceed.h"
+// -----------------------------------------------------------------------------
+// This function print the output
+// -----------------------------------------------------------------------------
+PetscErrorCode PrintOutput(DM dm, Ceed ceed, AppCtx app_ctx, PetscBool has_ts, TS ts, SNES snes, KSP ksp, Vec U, CeedScalar l2_error_u,
+                           CeedScalar l2_error_p) {
+  PetscFunctionBeginUser;
+
+  const char *used_resource;
+  CeedMemType mem_type_backend;
+  CeedGetResource(ceed, &used_resource);
+  CeedGetPreferredMemType(ceed, &mem_type_backend);
+  char hostname[PETSC_MAX_PATH_LEN];
+  PetscCall(PetscGetHostName(hostname, sizeof hostname));
+  PetscInt comm_size;
+  PetscCall(MPI_Comm_size(app_ctx->comm, &comm_size));
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "\n-- Mixed H(div) Example - libCEED + PETSc --\n"
+                        "  MPI:\n"
+                        "    Hostname                           : %s\n"
+                        "    Total ranks                        : %d\n"
+                        "  libCEED:\n"
+                        "    libCEED Backend                    : %s\n"
+                        "    libCEED Backend MemType            : %s\n",
+                        hostname, comm_size, used_resource, CeedMemTypes[mem_type_backend]));
+
+  MatType mat_type;
+  VecType vec_type;
+  PetscCall(DMGetMatType(dm, &mat_type));
+  PetscCall(DMGetVecType(dm, &vec_type));
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "  PETSc:\n"
+                        "    DM MatType                         : %s\n"
+                        "    DM VecType                         : %s\n",
+                        mat_type, vec_type));
+
+  PetscInt U_l_size, U_g_size;
+  PetscCall(VecGetSize(U, &U_g_size));
+  PetscCall(VecGetLocalSize(U, &U_l_size));
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "  Problem:\n"
+                        "    Problem Name                       : %s\n"
+                        "    Global nodes (u + p)               : %" PetscInt_FMT "\n"
+                        "    Owned nodes (u + p)                : %" PetscInt_FMT "\n",
+                        app_ctx->problem_name, U_g_size, U_l_size));
+  // --TS
+  if (has_ts) {
+    PetscInt          ts_steps;
+    TSType            ts_type;
+    TSConvergedReason ts_reason;
+    PetscCall(TSGetStepNumber(ts, &ts_steps));
+    PetscCall(TSGetType(ts, &ts_type));
+    PetscCall(TSGetConvergedReason(ts, &ts_reason));
+    PetscCall(PetscPrintf(app_ctx->comm,
+                          "  TS:\n"
+                          "    TS Type                            : %s\n"
+                          "    TS Convergence                     : %s\n"
+                          "    Number of TS steps                 : %" PetscInt_FMT "\n"
+                          "    Final time                         : %g\n",
+                          ts_type, TSConvergedReasons[ts_reason], ts_steps, (double)app_ctx->t_final));
+
+    PetscCall(TSGetSNES(ts, &snes));
+  }
+  // -- SNES
+  PetscInt its, snes_its = 0;
+  PetscCall(SNESGetIterationNumber(snes, &its));
+  snes_its += its;
+  SNESType            snes_type;
+  SNESConvergedReason snes_reason;
+  PetscReal           snes_rnorm;
+  PetscCall(SNESGetType(snes, &snes_type));
+  PetscCall(SNESGetConvergedReason(snes, &snes_reason));
+  PetscCall(SNESGetFunctionNorm(snes, &snes_rnorm));
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "  SNES:\n"
+                        "    SNES Type                          : %s\n"
+                        "    SNES Convergence                   : %s\n"
+                        "    Total SNES Iterations              : %" PetscInt_FMT "\n"
+                        "    Final rnorm                        : %e\n",
+                        snes_type, SNESConvergedReasons[snes_reason], snes_its, (double)snes_rnorm));
+  if (!has_ts) {
+    PetscInt ksp_its = 0;
+    PetscCall(SNESGetLinearSolveIterations(snes, &its));
+    ksp_its += its;
+    KSPType            ksp_type;
+    KSPConvergedReason ksp_reason;
+    PetscReal          ksp_rnorm;
+    PC                 pc;
+    PCType             pc_type;
+    PetscCall(KSPGetPC(ksp, &pc));
+    PetscCall(PCGetType(pc, &pc_type));
+    PetscCall(KSPGetType(ksp, &ksp_type));
+    PetscCall(KSPGetConvergedReason(ksp, &ksp_reason));
+    PetscCall(KSPGetIterationNumber(ksp, &ksp_its));
+    PetscCall(KSPGetResidualNorm(ksp, &ksp_rnorm));
+    PetscCall(PetscPrintf(app_ctx->comm,
+                          "  KSP:\n"
+                          "    KSP Type                           : %s\n"
+                          "    PC Type                            : %s\n"
+                          "    KSP Convergence                    : %s\n"
+                          "    Total KSP Iterations               : %" PetscInt_FMT "\n"
+                          "    Final rnorm                        : %e\n",
+                          ksp_type, pc_type, KSPConvergedReasons[ksp_reason], ksp_its, (double)ksp_rnorm));
+  }
+
+  PetscCall(PetscPrintf(app_ctx->comm,
+                        "  L2 Error (MMS):\n"
+                        "    L2 error of u and p                : %e, %e\n",
+                        (double)l2_error_u, (double)l2_error_p));
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// Setup operator context data for initial condition, u field
+// -----------------------------------------------------------------------------
+PetscErrorCode SetupProjectVelocityCtx_Hdiv(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_Hdiv) {
+  PetscFunctionBeginUser;
+
+  ctx_Hdiv->comm = comm;
+  ctx_Hdiv->dm   = dm;
+  PetscCall(DMCreateLocalVector(dm, &ctx_Hdiv->X_loc));
+  ctx_Hdiv->x_ceed = ceed_data->u_ceed;
+  // ctx_project_velocity->y_ceed = ceed_data->v0_ceed;
+  ctx_Hdiv->ceed = ceed;
+  // ctx_project_velocity->op_apply = ceed_data->op_ics_u;
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode SetupProjectVelocityCtx_H1(MPI_Comm comm, DM dm_H1, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_H1) {
+  PetscFunctionBeginUser;
+
+  VecType vec_type;
+  PetscCall(DMGetVecType(dm_H1, &vec_type));
+  ctx_H1->comm = comm;
+  ctx_H1->dm   = dm_H1;
+  PetscCall(DMCreateLocalVector(dm_H1, &ctx_H1->X_loc));
+  PetscCall(VecDuplicate(ctx_H1->X_loc, &ctx_H1->Y_loc));
+  ctx_H1->x_ceed          = ceed_data->up_ceed;
+  ctx_H1->y_ceed          = ceed_data->vp_ceed;
+  ctx_H1->x_coord         = ceed_data->x_coord;
+  ctx_H1->ceed            = ceed;
+  ctx_H1->op_apply        = ceed_data->op_post_mass;
+  ctx_H1->op_rhs_H1       = ceed_data->op_rhs_H1;
+  ctx_H1->elem_restr_u_H1 = ceed_data->elem_restr_u_H1;
+  ctx_H1->vec_type        = vec_type;
+  PetscFunctionReturn(0);
+}
+// -----------------------------------------------------------------------------
+// This function print the output
+// -----------------------------------------------------------------------------
+PetscErrorCode ProjectVelocity(AppCtx app_ctx, Vec U, Vec *U_H1) {
+  PetscFunctionBeginUser;
+  const PetscScalar *x;
+  PetscMemType       x_mem_type;
+
+  // ----------------------------------------------
+  // Create local rhs for u field
+  // ----------------------------------------------
+  Vec          rhs_loc_H1;
+  PetscScalar *ru;
+  PetscMemType ru_mem_type;
+  PetscCall(DMCreateLocalVector(app_ctx->ctx_H1->dm, &rhs_loc_H1));
+  PetscCall(VecZeroEntries(rhs_loc_H1));
+  PetscCall(VecGetArrayAndMemType(rhs_loc_H1, &ru, &ru_mem_type));
+  CeedElemRestrictionCreateVector(app_ctx->ctx_H1->elem_restr_u_H1, &app_ctx->ctx_H1->rhs_ceed_H1, NULL);
+  CeedVectorSetArray(app_ctx->ctx_H1->rhs_ceed_H1, MemTypeP2C(ru_mem_type), CEED_USE_POINTER, ru);
+
+  // Global-to-local: map final U in Hdiv space to local vector
+  PetscCall(DMGlobalToLocal(app_ctx->ctx_Hdiv->dm, U, INSERT_VALUES, app_ctx->ctx_Hdiv->X_loc));
+  // Place Hdiv PETSc vectors in CEED vectors
+  PetscCall(VecGetArrayReadAndMemType(app_ctx->ctx_Hdiv->X_loc, &x, &x_mem_type));
+  CeedVectorSetArray(app_ctx->ctx_Hdiv->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, (PetscScalar *)x);
+
+  // Apply operator to create RHS for u field
+  CeedOperatorApply(app_ctx->ctx_H1->op_rhs_H1, app_ctx->ctx_H1->x_coord, app_ctx->ctx_H1->rhs_ceed_H1, CEED_REQUEST_IMMEDIATE);
+
+  // Restore Hdiv vector
+  CeedVectorTakeArray(app_ctx->ctx_Hdiv->x_ceed, MemTypeP2C(x_mem_type), NULL);
+  PetscCall(VecRestoreArrayReadAndMemType(app_ctx->ctx_Hdiv->X_loc, &x));
+
+  // ----------------------------------------------
+  // Create global rhs for u field
+  // ----------------------------------------------
+  Vec rhs_H1;
+  CeedVectorTakeArray(app_ctx->ctx_H1->rhs_ceed_H1, MemTypeP2C(ru_mem_type), NULL);
+  PetscCall(VecRestoreArrayAndMemType(rhs_loc_H1, &ru));
+  PetscCall(DMCreateGlobalVector(app_ctx->ctx_H1->dm, &rhs_H1));
+  PetscCall(VecZeroEntries(rhs_H1));
+  PetscCall(DMLocalToGlobal(app_ctx->ctx_H1->dm, rhs_loc_H1, ADD_VALUES, rhs_H1));
+
+  // ----------------------------------------------
+  // Solve for U_H1, M*U_H1 = rhs_H1
+  // ----------------------------------------------
+  PetscInt UH1_g_size, UH1_l_size;
+  PetscCall(VecGetSize(*U_H1, &UH1_g_size));
+  // Local size for matShell
+  PetscCall(VecGetLocalSize(*U_H1, &UH1_l_size));
+
+  // Operator
+  Mat mat_ksp_projection;
+  // -- Form Action of residual on u
+  PetscCall(MatCreateShell(app_ctx->comm, UH1_l_size, UH1_l_size, UH1_g_size, UH1_g_size, app_ctx->ctx_H1, &mat_ksp_projection));
+  PetscCall(MatShellSetOperation(mat_ksp_projection, MATOP_MULT, (void (*)(void))ApplyMatOp));
+  PetscCall(MatShellSetVecType(mat_ksp_projection, app_ctx->ctx_H1->vec_type));
+
+  KSP ksp_projection;
+  PetscCall(KSPCreate(app_ctx->ctx_H1->comm, &ksp_projection));
+  PetscCall(KSPSetOperators(ksp_projection, mat_ksp_projection, mat_ksp_projection));
+  PetscCall(KSPSetFromOptions(ksp_projection));
+  PetscCall(KSPSetUp(ksp_projection));
+  PetscCall(VecZeroEntries(*U_H1));
+  PetscCall(KSPSolve(ksp_projection, rhs_H1, *U_H1));
+
+  // Clean up
+  PetscCall(VecDestroy(&rhs_loc_H1));
+  PetscCall(VecDestroy(&rhs_H1));
+  PetscCall(MatDestroy(&mat_ksp_projection));
+  PetscCall(KSPDestroy(&ksp_projection));
+  CeedVectorDestroy(&app_ctx->ctx_H1->rhs_ceed_H1);
+
+  PetscFunctionReturn(0);
+};
+
+PetscErrorCode CtxVecDestroy(AppCtx app_ctx) {
+  PetscFunctionBegin;
+  PetscCall(VecDestroy(&app_ctx->ctx_H1->X_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_H1->Y_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_Hdiv->X_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_initial_u0->X_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_initial_u0->Y_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_initial_p0->X_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_initial_p0->Y_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_residual_ut->X_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_residual_ut->X_t_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_residual_ut->Y_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_jacobian->Y_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_jacobian->X_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_residual->Y_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_residual->X_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_error->Y_loc));
+  PetscCall(VecDestroy(&app_ctx->ctx_error->X_loc));
+  PetscFunctionReturn(0);
+}
+// -----------------------------------------------------------------------------
diff --git a/examples/Hdiv-mixed/src/setup-boundary.c b/examples/Hdiv-mixed/src/setup-boundary.c
new file mode 100644
index 0000000000..81cd9bd42f
--- /dev/null
+++ b/examples/Hdiv-mixed/src/setup-boundary.c
@@ -0,0 +1,100 @@
+#include "../include/setup-boundary.h"
+
+// ---------------------------------------------------------------------------
+// Create boundary label
+// ---------------------------------------------------------------------------
+PetscErrorCode CreateBCLabel(DM dm, const char name[]) {
+  DMLabel label;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(DMCreateLabel(dm, name));
+  PetscCall(DMGetLabel(dm, name, &label));
+  PetscCall(DMPlexMarkBoundaryFaces(dm, PETSC_DETERMINE, label));
+  PetscCall(DMPlexLabelComplete(dm, label));
+
+  PetscFunctionReturn(0);
+};
+
+// ---------------------------------------------------------------------------
+// Add Dirichlet boundaries to DM
+// ---------------------------------------------------------------------------
+PetscErrorCode DMAddBoundariesDirichlet(DM dm) {
+  PetscFunctionBeginUser;
+
+  // BCs given by manufactured solution
+  PetscBool   has_label;
+  const char *name        = "MMS Face Sets";
+  PetscInt    face_ids[1] = {1};
+  PetscCall(DMHasLabel(dm, name, &has_label));
+  if (!has_label) {
+    PetscCall(CreateBCLabel(dm, name));
+  }
+  DMLabel label;
+  PetscCall(DMGetLabel(dm, name, &label));
+  PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "mms", label, 1, face_ids, 0, 0, NULL, (void (*)(void))BoundaryDirichletMMS, NULL, NULL, NULL));
+
+  PetscFunctionReturn(0);
+}
+
+// ---------------------------------------------------------------------------
+// Add Neumann boundaries to DM
+// ---------------------------------------------------------------------------
+PetscErrorCode DMAddBoundariesPressure(Ceed ceed, CeedData ceed_data, AppCtx app_ctx, ProblemData problem_data, DM dm, CeedVector bc_pressure) {
+  PetscInt      dim;
+  CeedQFunction qf_pressure;
+  CeedOperator  op_pressure;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(DMGetDimension(dm, &dim));
+
+  if (app_ctx->bc_pressure_count > 0) {
+    DMLabel domain_label;
+    PetscCall(DMGetLabel(dm, "Face Sets", &domain_label));
+    // Compute contribution on each boundary face
+    for (CeedInt i = 0; i < app_ctx->bc_pressure_count; i++) {
+      CeedQFunctionCreateInterior(ceed, 1, problem_data->bc_pressure, problem_data->bc_pressure_loc, &qf_pressure);
+
+      CeedQFunctionAddInput(qf_pressure, "weight", 1, CEED_EVAL_WEIGHT);
+      CeedQFunctionAddOutput(qf_pressure, "v", dim, CEED_EVAL_INTERP);
+      // -- Apply operator
+      CeedOperatorCreate(ceed, qf_pressure, NULL, NULL, &op_pressure);
+      CeedOperatorSetField(op_pressure, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+      CeedOperatorSetField(op_pressure, "v", ceed_data->elem_restr_u, ceed_data->basis_u_face, CEED_VECTOR_ACTIVE);
+      // ---- Compute pressure on face
+      CeedOperatorApplyAdd(op_pressure, ceed_data->x_coord, bc_pressure, CEED_REQUEST_IMMEDIATE);
+
+      // -- Cleanup
+      CeedQFunctionDestroy(&qf_pressure);
+      CeedOperatorDestroy(&op_pressure);
+    }
+  }
+
+  PetscFunctionReturn(0);
+}
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+// ---------------------------------------------------------------------------
+// Boundary function for manufactured solution
+// ---------------------------------------------------------------------------
+PetscErrorCode BoundaryDirichletMMS(PetscInt dim, PetscReal t, const PetscReal coords[], PetscInt num_comp_u, PetscScalar *u, void *ctx) {
+  PetscScalar x = coords[0];
+  PetscScalar y = coords[1];
+  PetscScalar z = coords[1];
+
+  PetscFunctionBeginUser;
+
+  if (dim == 2) {
+    u[0] = -M_PI * cos(M_PI * x) * sin(M_PI * y) - M_PI * y;
+    u[1] = -M_PI * sin(M_PI * x) * cos(M_PI * y) - M_PI * x;
+  } else {
+    u[0] = -M_PI * cos(M_PI * x) * sin(M_PI * y) * sin(M_PI * z) - M_PI * y * z;
+    u[1] = -M_PI * sin(M_PI * x) * cos(M_PI * y) * sin(M_PI * z) - M_PI * x * z;
+    u[2] = -M_PI * sin(M_PI * x) * sin(M_PI * y) * cos(M_PI * z) - M_PI * x * y;
+  }
+
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mixed/src/setup-dm.c b/examples/Hdiv-mixed/src/setup-dm.c
new file mode 100644
index 0000000000..186dd90eca
--- /dev/null
+++ b/examples/Hdiv-mixed/src/setup-dm.c
@@ -0,0 +1,145 @@
+#include "../include/setup-dm.h"
+
+#include "petscerror.h"
+
+// ---------------------------------------------------------------------------
+// Create DM
+// ---------------------------------------------------------------------------
+PetscErrorCode CreateDM(MPI_Comm comm, Ceed ceed, DM *dm) {
+  PetscFunctionBeginUser;
+
+  CeedMemType mem_type_backend;
+  CeedGetPreferredMemType(ceed, &mem_type_backend);
+
+  VecType vec_type = NULL;
+  MatType mat_type = NULL;
+  switch (mem_type_backend) {
+    case CEED_MEM_HOST:
+      vec_type = VECSTANDARD;
+      break;
+    case CEED_MEM_DEVICE: {
+      const char *resolved;
+      CeedGetResource(ceed, &resolved);
+      if (strstr(resolved, "/gpu/cuda")) vec_type = VECCUDA;
+      else if (strstr(resolved, "/gpu/hip/occa")) vec_type = VECSTANDARD;  // https://github.com/CEED/libCEED/issues/678
+      else if (strstr(resolved, "/gpu/hip")) vec_type = VECHIP;
+      else vec_type = VECSTANDARD;
+    }
+  }
+  if (strstr(vec_type, VECCUDA)) mat_type = MATAIJCUSPARSE;
+  else if (strstr(vec_type, VECKOKKOS)) mat_type = MATAIJKOKKOS;
+  else mat_type = MATAIJ;
+
+  // Create DMPLEX
+  PetscCall(DMCreate(comm, dm));
+  PetscCall(DMSetType(*dm, DMPLEX));
+  PetscCall(DMSetMatType(*dm, mat_type));
+  PetscCall(DMSetVecType(*dm, vec_type));
+  // Set Tensor elements
+  PetscCall(PetscOptionsSetValue(NULL, "-dm_plex_simplex", "0"));
+  // Set CL options
+  PetscCall(DMSetFromOptions(*dm));
+  PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view"));
+
+  PetscFunctionReturn(0);
+};
+
+PetscErrorCode PerturbVerticesSmooth(DM dm) {
+  Vec          coordinates;
+  PetscSection coordSection;
+  PetscScalar *coords;
+  PetscInt     v, vStart, vEnd, offset, dim;
+  PetscReal    x, y, z;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(DMGetDimension(dm, &dim));
+  PetscCall(DMGetCoordinateSection(dm, &coordSection));
+  PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
+  PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
+  PetscCall(VecGetArray(coordinates, &coords));
+  for (v = vStart; v < vEnd; v++) {
+    PetscCall(PetscSectionGetOffset(coordSection, v, &offset));
+    if (dim == 2) {
+      PetscReal domain_min[2], domain_max[2], domain_size[2];
+      PetscCall(DMGetBoundingBox(dm, domain_min, domain_max));
+      for (PetscInt i = 0; i < 2; i++) domain_size[i] = domain_max[i] - domain_min[i];
+      x = coords[offset];
+      y = coords[offset + 1];
+      coords[offset] =
+          x + (0.06 * domain_size[0]) * PetscSinReal(2.0 * PETSC_PI * x / domain_size[0]) * PetscSinReal(2.0 * PETSC_PI * y / domain_size[1]);
+      coords[offset + 1] =
+          y - (0.05 * domain_size[1]) * PetscSinReal(2.0 * PETSC_PI * x / domain_size[0]) * PetscSinReal(2.0 * PETSC_PI * y / domain_size[1]);
+    } else {
+      PetscReal domain_min[3], domain_max[3], domain_size[3];
+      PetscCall(DMGetBoundingBox(dm, domain_min, domain_max));
+      for (PetscInt i = 0; i < 3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+      x              = coords[offset];
+      y              = coords[offset + 1];
+      z              = coords[offset + 2];
+      coords[offset] = x + (0.03 * domain_size[0]) * PetscSinReal(3 * PETSC_PI * x / domain_size[0]) *
+                               PetscCosReal(3 * PETSC_PI * y / domain_size[1]) * PetscCosReal(3 * PETSC_PI * z / domain_size[2]);
+      coords[offset + 1] = y - (0.04 * domain_size[1]) * PetscCosReal(3 * PETSC_PI * x / domain_size[0]) *
+                                   PetscSinReal(3 * PETSC_PI * y / domain_size[1]) * PetscCosReal(3 * PETSC_PI * z / domain_size[2]);
+      coords[offset + 2] = z + (0.05 * domain_size[2]) * PetscCosReal(3 * PETSC_PI * x / domain_size[0]) *
+                                   PetscCosReal(3 * PETSC_PI * y / domain_size[1]) * PetscSinReal(3 * PETSC_PI * z / domain_size[2]);
+    }
+  }
+  PetscCall(VecRestoreArray(coordinates, &coords));
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode PerturbVerticesRandom(DM dm) {
+  PetscFunctionBegin;
+  Vec          coordinates;
+  PetscSection coordSection;
+  PetscScalar *coords;
+  PetscInt     v, vStart, vEnd, offset, dim;
+  PetscReal    x, y, z;
+
+  PetscCall(DMGetDimension(dm, &dim));
+  PetscCall(DMGetCoordinateSection(dm, &coordSection));
+  PetscCall(DMGetCoordinatesLocal(dm, &coordinates));
+  PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
+  PetscCall(VecGetArray(coordinates, &coords));
+  PetscInt c_end, c_start, num_elem;
+  PetscCall(DMPlexGetHeightStratum(dm, 0, &c_start, &c_end));
+  num_elem = c_end - c_start;
+
+  for (v = vStart; v < vEnd; v++) {
+    PetscCall(PetscSectionGetOffset(coordSection, v, &offset));
+    if (dim == 2) {
+      PetscScalar nx = sqrt(num_elem);
+      PetscReal   domain_min[2], domain_max[2], domain_size[2];
+      PetscCall(DMGetBoundingBox(dm, domain_min, domain_max));
+      for (PetscInt i = 0; i < 2; i++) domain_size[i] = domain_max[i] - domain_min[i];
+      PetscReal hx = domain_size[0] / nx, hy = domain_size[1] / nx;
+      x = coords[offset];
+      y = coords[offset + 1];
+      // perturb randomly O(h*sqrt(2)/3)
+      PetscReal rx       = ((PetscReal)rand()) / ((PetscReal)RAND_MAX) * (hx * 0.471404);
+      PetscReal ry       = ((PetscReal)rand()) / ((PetscReal)RAND_MAX) * (hy * 0.471404);
+      PetscReal t        = ((PetscReal)rand()) / ((PetscReal)RAND_MAX) * PETSC_PI;
+      coords[offset]     = x + rx * PetscCosReal(t);
+      coords[offset + 1] = y + ry * PetscSinReal(t);
+    } else {
+      PetscScalar nx = cbrt(num_elem);
+      PetscReal   domain_min[3], domain_max[3], domain_size[3];
+      PetscCall(DMGetBoundingBox(dm, domain_min, domain_max));
+      for (PetscInt i = 0; i < 3; i++) domain_size[i] = domain_max[i] - domain_min[i];
+      PetscReal hx = domain_size[0] / nx, hy = domain_size[1] / nx, hz = domain_size[2] / nx;
+      x = coords[offset];
+      y = coords[offset + 1], z = coords[offset + 2];
+      // This is because 'boundary' is broken in 3D
+      PetscReal rx       = ((PetscReal)rand()) / ((PetscReal)RAND_MAX) * (hx * 0.471404);
+      PetscReal ry       = ((PetscReal)rand()) / ((PetscReal)RAND_MAX) * (hy * 0.471404);
+      PetscReal rz       = ((PetscReal)rand()) / ((PetscReal)RAND_MAX) * (hz * 0.471404);
+      PetscReal t        = ((PetscReal)rand()) / ((PetscReal)RAND_MAX) * PETSC_PI;
+      coords[offset]     = x + rx * PetscCosReal(t);
+      coords[offset + 1] = y + ry * PetscCosReal(t);
+      coords[offset + 2] = z + rz * PetscSinReal(t);
+    }
+  }
+  PetscCall(VecRestoreArray(coordinates, &coords));
+  PetscFunctionReturn(0);
+}
diff --git a/examples/Hdiv-mixed/src/setup-fe.c b/examples/Hdiv-mixed/src/setup-fe.c
new file mode 100644
index 0000000000..da4b000055
--- /dev/null
+++ b/examples/Hdiv-mixed/src/setup-fe.c
@@ -0,0 +1,281 @@
+#include "../include/setup-fe.h"
+
+#include "petscerror.h"
+
+// -----------------------------------------------------------------------------
+// Convert PETSc MemType to libCEED MemType
+// -----------------------------------------------------------------------------
+CeedMemType MemTypeP2C(PetscMemType mem_type) { return PetscMemTypeDevice(mem_type) ? CEED_MEM_DEVICE : CEED_MEM_HOST; }
+
+// ---------------------------------------------------------------------------
+// Setup FE
+// ---------------------------------------------------------------------------
+PetscErrorCode SetupFEHdiv(MPI_Comm comm, DM dm, DM dm_u0, DM dm_p0) {
+  PetscSection sec, sec_u0, sec_p0;
+  PetscInt     dofs_per_face;
+  PetscInt     p_start, p_end;
+  PetscInt     c_start, c_end;  // cells
+  PetscInt     f_start, f_end;  // faces
+  PetscInt     v_start, v_end;  // vertices
+
+  PetscFunctionBeginUser;
+
+  // Get plex limits
+  PetscCall(DMPlexGetChart(dm, &p_start, &p_end));
+  PetscCall(DMPlexGetHeightStratum(dm, 0, &c_start, &c_end));
+  PetscCall(DMPlexGetHeightStratum(dm, 1, &f_start, &f_end));
+  PetscCall(DMPlexGetDepthStratum(dm, 0, &v_start, &v_end));
+  // Create section for coupled problem
+  PetscCall(PetscSectionCreate(comm, &sec));
+  PetscCall(PetscSectionSetNumFields(sec, 2));
+  PetscCall(PetscSectionSetFieldName(sec, 0, "Velocity"));
+  PetscCall(PetscSectionSetFieldComponents(sec, 0, 1));
+  PetscCall(PetscSectionSetFieldName(sec, 1, "Pressure"));
+  PetscCall(PetscSectionSetFieldComponents(sec, 1, 1));
+  PetscCall(PetscSectionSetChart(sec, p_start, p_end));
+  // Create section for initial conditions u0
+  PetscCall(PetscSectionCreate(comm, &sec_u0));
+  PetscCall(PetscSectionSetNumFields(sec_u0, 1));
+  PetscCall(PetscSectionSetFieldName(sec_u0, 0, "Velocity"));
+  PetscCall(PetscSectionSetFieldComponents(sec_u0, 0, 1));
+  PetscCall(PetscSectionSetChart(sec_u0, p_start, p_end));
+  // Create section for initial conditions p0
+  PetscCall(PetscSectionCreate(comm, &sec_p0));
+  PetscCall(PetscSectionSetNumFields(sec_p0, 1));
+  PetscCall(PetscSectionSetFieldName(sec_p0, 0, "Pressure"));
+  PetscCall(PetscSectionSetFieldComponents(sec_p0, 0, 1));
+  PetscCall(PetscSectionSetChart(sec_p0, p_start, p_end));
+  // Setup dofs per face for velocity field
+  for (PetscInt f = f_start; f < f_end; f++) {
+    PetscCall(DMPlexGetConeSize(dm, f, &dofs_per_face));
+    PetscCall(PetscSectionSetFieldDof(sec, f, 0, dofs_per_face));
+    PetscCall(PetscSectionSetDof(sec, f, dofs_per_face));
+
+    PetscCall(DMPlexGetConeSize(dm_u0, f, &dofs_per_face));
+    PetscCall(PetscSectionSetFieldDof(sec_u0, f, 0, dofs_per_face));
+    PetscCall(PetscSectionSetDof(sec_u0, f, dofs_per_face));
+  }
+  // Setup 1 dof per cell for pressure field
+  for (PetscInt c = c_start; c < c_end; c++) {
+    PetscCall(PetscSectionSetFieldDof(sec, c, 1, 1));
+    PetscCall(PetscSectionSetDof(sec, c, 1));
+
+    PetscCall(PetscSectionSetFieldDof(sec_p0, c, 0, 1));
+    PetscCall(PetscSectionSetDof(sec_p0, c, 1));
+  }
+  PetscCall(PetscSectionSetUp(sec));
+  PetscCall(DMSetSection(dm, sec));
+  PetscCall(DMCreateDS(dm));
+  PetscCall(PetscSectionDestroy(&sec));
+  PetscCall(PetscSectionSetUp(sec_u0));
+  PetscCall(DMSetSection(dm_u0, sec_u0));
+  PetscCall(DMCreateDS(dm_u0));
+  PetscCall(PetscSectionDestroy(&sec_u0));
+  PetscCall(PetscSectionSetUp(sec_p0));
+  PetscCall(DMSetSection(dm_p0, sec_p0));
+  PetscCall(DMCreateDS(dm_p0));
+  PetscCall(PetscSectionDestroy(&sec_p0));
+
+  PetscFunctionReturn(0);
+};
+
+// ---------------------------------------------------------------------------
+// Set-up FE for H1 space
+// ---------------------------------------------------------------------------
+PetscErrorCode SetupFEH1(ProblemData problem_data, AppCtx app_ctx, DM dm_H1) {
+  // Two FE space for displacement and pressure
+  PetscFE fe;
+  // number of quadrature points
+  PetscInt  q_degree   = app_ctx->degree + 2 + app_ctx->q_extra;
+  PetscBool is_simplex = PETSC_TRUE;
+  PetscFunctionBeginUser;
+
+  // Check if simplex or tensor-product element
+  PetscCall(DMPlexIsSimplex(dm_H1, &is_simplex));
+  // Create FE space
+  PetscCall(PetscFECreateLagrange(app_ctx->comm, problem_data->dim, problem_data->dim, is_simplex, app_ctx->degree, q_degree, &fe));
+  PetscCall(PetscObjectSetName((PetscObject)fe, "U"));
+  PetscCall(DMAddField(dm_H1, NULL, (PetscObject)fe));
+  PetscCall(DMCreateDS(dm_H1));
+
+  {
+    // create FE field for coordinates
+    //  PetscFE fe_coords;
+    //  PetscInt num_comp_coord;
+    //  PetscCall( DMGetCoordinateDim(dm_H1, &num_comp_coord) );
+    //  PetscCall( PetscFECreateLagrange(app_ctx->comm, problem_data->dim,
+    //                                   num_comp_coord,
+    //                                   is_simplex, 1, q_degree,
+    //                                   &fe_coords) );
+    //  PetscCall( DMProjectCoordinates(dm_H1, fe_coords) );
+    //  PetscCall( PetscFEDestroy(&fe_coords) );
+  }
+  PetscCall(DMPlexSetClosurePermutationTensor(dm_H1, PETSC_DETERMINE, NULL));
+  // Cleanup
+  PetscCall(PetscFEDestroy(&fe));
+
+  // Empty name for conserved field (because there is only one field)
+  PetscSection section;
+  PetscCall(DMGetLocalSection(dm_H1, &section));
+  PetscCall(PetscSectionSetFieldName(section, 0, "Velocity"));
+  if (problem_data->dim == 2) {
+    PetscCall(PetscSectionSetComponentName(section, 0, 0, "Velocity_X"));
+    PetscCall(PetscSectionSetComponentName(section, 0, 1, "Velocity_Y"));
+  } else {
+    PetscCall(PetscSectionSetComponentName(section, 0, 0, "Velocity_X"));
+    PetscCall(PetscSectionSetComponentName(section, 0, 1, "Velocity_Y"));
+    PetscCall(PetscSectionSetComponentName(section, 0, 2, "Velocity_Z"));
+  }
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// Utility function - convert from DMPolytopeType to CeedElemTopology
+// -----------------------------------------------------------------------------
+CeedElemTopology ElemTopologyP2C(DMPolytopeType cell_type) {
+  switch (cell_type) {
+    case DM_POLYTOPE_TRIANGLE:
+      return CEED_TOPOLOGY_TRIANGLE;
+    case DM_POLYTOPE_QUADRILATERAL:
+      return CEED_TOPOLOGY_QUAD;
+    case DM_POLYTOPE_TETRAHEDRON:
+      return CEED_TOPOLOGY_TET;
+    case DM_POLYTOPE_HEXAHEDRON:
+      return CEED_TOPOLOGY_HEX;
+    default:
+      return 0;
+  }
+};
+
+// -----------------------------------------------------------------------------
+// Utility function - essential BC dofs are encoded in closure indices as -(i+1)
+// -----------------------------------------------------------------------------
+PetscInt Involute(PetscInt i) { return i >= 0 ? i : -(i + 1); };
+
+// -----------------------------------------------------------------------------
+// Get CEED restriction data from DMPlex
+// -----------------------------------------------------------------------------
+PetscErrorCode CreateRestrictionFromPlex(Ceed ceed, DM dm, CeedInt height, DMLabel domain_label, CeedInt value, CeedElemRestriction *elem_restr) {
+  PetscInt num_elem, elem_size, num_dof, num_comp, *elem_restr_offsets;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(DMPlexGetLocalOffsets(dm, domain_label, value, height, 0, &num_elem, &elem_size, &num_comp, &num_dof, &elem_restr_offsets));
+
+  CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp, 1, num_dof, CEED_MEM_HOST, CEED_COPY_VALUES, elem_restr_offsets, elem_restr);
+  PetscCall(PetscFree(elem_restr_offsets));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// Get Oriented CEED restriction data from DMPlex
+// -----------------------------------------------------------------------------
+PetscErrorCode CreateRestrictionFromPlexOriented(Ceed ceed, DM dm, DM dm_u0, DM dm_p0, CeedInt P, CeedElemRestriction *elem_restr_u,
+                                                 CeedElemRestriction *elem_restr_p, CeedElemRestriction *elem_restr_u0,
+                                                 CeedElemRestriction *elem_restr_p0) {
+  PetscSection section, section_u0, section_p0;
+  PetscInt     p, num_elem, num_dof, num_dof_u0, num_dof_p0, *restr_indices_u, *restr_indices_p, *restr_indices_u0, *restr_indices_p0, elem_offset,
+      num_fields, num_fields_u0, num_fields_p0, dim, c_start, c_end;
+  Vec             U_loc;
+  const PetscInt *ornt;  // this is for orientation of dof
+  PetscFunctionBeginUser;
+  // Section for mixed problem
+  PetscCall(DMGetDimension(dm, &dim));
+  PetscCall(DMGetLocalSection(dm, &section));
+  PetscCall(PetscSectionGetNumFields(section, &num_fields));
+  PetscInt num_comp[num_fields], field_offsets[num_fields + 1];
+  field_offsets[0] = 0;
+  for (PetscInt f = 0; f < num_fields; f++) {
+    PetscCall(PetscSectionGetFieldComponents(section, f, &num_comp[f]));
+    field_offsets[f + 1] = field_offsets[f] + num_comp[f];
+  }
+  // Section for initial conditions u0
+  PetscCall(DMGetLocalSection(dm_u0, &section_u0));
+  PetscCall(PetscSectionGetNumFields(section_u0, &num_fields_u0));
+  PetscInt num_comp_u0[num_fields_u0], field_offsets_u0[num_fields_u0 + 1];
+  field_offsets_u0[0] = 0;
+  for (PetscInt f = 0; f < num_fields_u0; f++) {
+    PetscCall(PetscSectionGetFieldComponents(section_u0, f, &num_comp_u0[f]));
+    field_offsets_u0[f + 1] = field_offsets_u0[f] + num_comp_u0[f];
+  }
+  // Section for initial conditions p0
+  PetscCall(DMGetLocalSection(dm_p0, &section_p0));
+  PetscCall(PetscSectionGetNumFields(section_p0, &num_fields_p0));
+  PetscInt num_comp_p0[num_fields_p0], field_offsets_p0[num_fields_p0 + 1];
+  field_offsets_p0[0] = 0;
+  for (PetscInt f = 0; f < num_fields_p0; f++) {
+    PetscCall(PetscSectionGetFieldComponents(section_p0, f, &num_comp_p0[f]));
+    field_offsets_p0[f + 1] = field_offsets_p0[f] + num_comp_p0[f];
+  }
+
+  PetscCall(DMPlexGetHeightStratum(dm, 0, &c_start, &c_end));
+  num_elem = c_end - c_start;
+  PetscCall(PetscMalloc1(num_elem * dim * PetscPowInt(P, dim), &restr_indices_u));
+  PetscCall(PetscMalloc1(num_elem * dim * PetscPowInt(P, dim), &restr_indices_u0));
+  PetscCall(PetscMalloc1(num_elem, &restr_indices_p));
+  PetscCall(PetscMalloc1(num_elem, &restr_indices_p0));
+  bool *orient_indices_u, *orient_indices_u0;  // to flip the dof
+  PetscCall(PetscMalloc1(num_elem * dim * PetscPowInt(P, dim), &orient_indices_u));
+  PetscCall(PetscMalloc1(num_elem * dim * PetscPowInt(P, dim), &orient_indices_u0));
+  for (p = 0, elem_offset = 0; p < num_elem; p++) {
+    PetscInt num_indices, *indices, faces_per_elem, dofs_per_face, num_indices_u0, *indices_u0, num_indices_p0, *indices_p0;
+    PetscCall(DMPlexGetClosureIndices(dm, section, section, p, PETSC_TRUE, &num_indices, &indices, NULL, NULL));
+    PetscCall(DMPlexGetClosureIndices(dm_u0, section_u0, section_u0, p, PETSC_TRUE, &num_indices_u0, &indices_u0, NULL, NULL));
+    PetscCall(DMPlexGetClosureIndices(dm_p0, section_p0, section_p0, p, PETSC_TRUE, &num_indices_p0, &indices_p0, NULL, NULL));
+    restr_indices_p[p]  = indices[num_indices - 1];
+    restr_indices_p0[p] = indices_p0[0];
+    PetscCall(DMPlexGetConeOrientation(dm, p, &ornt));
+    // Get number of faces per element
+    PetscCall(DMPlexGetConeSize(dm, p, &faces_per_elem));
+    dofs_per_face = faces_per_elem - 2;
+    for (PetscInt f = 0; f < faces_per_elem; f++) {
+      for (PetscInt i = 0; i < dofs_per_face; i++) {
+        PetscInt ii = dofs_per_face * f + i;
+        // Essential boundary conditions are encoded as -(loc+1), but we don't care so we decode.
+        PetscInt loc                 = Involute(indices[ii * num_comp[0]]);
+        restr_indices_u[elem_offset] = loc;
+        // Set orientation
+        orient_indices_u[elem_offset] = ornt[f] < 0;
+        PetscInt loc_u0               = Involute(indices_u0[ii * num_comp_u0[0]]);
+        restr_indices_u0[elem_offset] = loc_u0;
+        // Set orientation
+        orient_indices_u0[elem_offset] = ornt[f] < 0;
+        elem_offset++;
+      }
+    }
+    PetscCall(DMPlexRestoreClosureIndices(dm, section, section, p, PETSC_TRUE, &num_indices, &indices, NULL, NULL));
+    PetscCall(DMPlexRestoreClosureIndices(dm_u0, section_u0, section_u0, p, PETSC_TRUE, &num_indices_u0, &indices_u0, NULL, NULL));
+    PetscCall(DMPlexRestoreClosureIndices(dm_p0, section_p0, section_p0, p, PETSC_TRUE, &num_indices_p0, &indices_p0, NULL, NULL));
+  }
+  // if (elem_offset != num_elem*dim*PetscPowInt(P, dim))
+  //   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB,
+  //           "ElemRestriction of size (%" PetscInt_FMT ", %" PetscInt_FMT" )
+  //           initialized %" PetscInt_FMT " nodes", num_elem,
+  //           dim*PetscPowInt(P, dim),elem_offset);
+
+  PetscCall(DMGetLocalVector(dm, &U_loc));
+  PetscCall(VecGetLocalSize(U_loc, &num_dof));
+  PetscCall(DMRestoreLocalVector(dm, &U_loc));
+  // dof per element in Hdiv is dim*P^dim, for linear element P=2
+  CeedElemRestrictionCreateOriented(ceed, num_elem, dim * PetscPowInt(P, dim), 1, 1, num_dof, CEED_MEM_HOST, CEED_COPY_VALUES, restr_indices_u,
+                                    orient_indices_u, elem_restr_u);
+  CeedElemRestrictionCreate(ceed, num_elem, 1, 1, 1, num_dof, CEED_MEM_HOST, CEED_COPY_VALUES, restr_indices_p, elem_restr_p);
+  PetscCall(DMGetLocalVector(dm_u0, &U_loc));
+  PetscCall(VecGetLocalSize(U_loc, &num_dof_u0));
+  PetscCall(DMRestoreLocalVector(dm_u0, &U_loc));
+  // dof per element in Hdiv is dim*P^dim, for linear element P=2
+  CeedElemRestrictionCreateOriented(ceed, num_elem, dim * PetscPowInt(P, dim), 1, 1, num_dof_u0, CEED_MEM_HOST, CEED_COPY_VALUES, restr_indices_u0,
+                                    orient_indices_u0, elem_restr_u0);
+  PetscCall(DMGetLocalVector(dm_p0, &U_loc));
+  PetscCall(VecGetLocalSize(U_loc, &num_dof_p0));
+  PetscCall(DMRestoreLocalVector(dm_p0, &U_loc));
+  CeedElemRestrictionCreate(ceed, num_elem, 1, 1, 1, num_dof_p0, CEED_MEM_HOST, CEED_COPY_VALUES, restr_indices_p0, elem_restr_p0);
+  PetscCall(PetscFree(restr_indices_p));
+  PetscCall(PetscFree(restr_indices_u));
+  PetscCall(PetscFree(orient_indices_u));
+  PetscCall(PetscFree(restr_indices_u0));
+  PetscCall(PetscFree(orient_indices_u0));
+  PetscCall(PetscFree(restr_indices_p0));
+  PetscFunctionReturn(0);
+};
diff --git a/examples/Hdiv-mixed/src/setup-libceed.c b/examples/Hdiv-mixed/src/setup-libceed.c
new file mode 100644
index 0000000000..bdb0f238d3
--- /dev/null
+++ b/examples/Hdiv-mixed/src/setup-libceed.c
@@ -0,0 +1,459 @@
+#include "../include/setup-libceed.h"
+
+#include <stdio.h>
+
+#include "../basis/Hdiv-hex.h"
+#include "../basis/Hdiv-quad.h"
+#include "../basis/L2-P0.h"
+#include "../include/petsc-macros.h"
+#include "../include/setup-boundary.h"
+#include "ceed/ceed.h"
+
+// -----------------------------------------------------------------------------
+// Destroy libCEED objects
+// -----------------------------------------------------------------------------
+PetscErrorCode CeedDataDestroy(CeedData ceed_data, ProblemData problem_data) {
+  PetscFunctionBegin;
+
+  // Vectors
+  CeedVectorDestroy(&ceed_data->x_ceed);
+  CeedVectorDestroy(&ceed_data->y_ceed);
+  CeedVectorDestroy(&ceed_data->x_t_ceed);
+  CeedVectorDestroy(&ceed_data->x_coord);
+  // Restrictions
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_x);
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_u);
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_U_i);  // U = [p,u]
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_p);
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_p_i);
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_u0);
+  CeedElemRestrictionDestroy(&ceed_data->elem_restr_p0);
+  // Bases
+  CeedBasisDestroy(&ceed_data->basis_x);
+  CeedBasisDestroy(&ceed_data->basis_u);
+  CeedBasisDestroy(&ceed_data->basis_p);
+  CeedBasisDestroy(&ceed_data->basis_u_face);
+  if (problem_data->has_ts) {
+    CeedVectorDestroy(&ceed_data->u0_ceed);
+    CeedVectorDestroy(&ceed_data->v0_ceed);
+    CeedVectorDestroy(&ceed_data->p0_ceed);
+    CeedVectorDestroy(&ceed_data->q0_ceed);
+    CeedVectorDestroy(&ceed_data->rhs_u0_ceed);
+    CeedVectorDestroy(&ceed_data->rhs_p0_ceed);
+    CeedQFunctionDestroy(&ceed_data->qf_rhs_u0);
+    CeedOperatorDestroy(&ceed_data->op_rhs_u0);
+    CeedQFunctionDestroy(&ceed_data->qf_ics_u);
+    CeedOperatorDestroy(&ceed_data->op_ics_u);
+    CeedQFunctionDestroy(&ceed_data->qf_rhs_p0);
+    CeedOperatorDestroy(&ceed_data->op_rhs_p0);
+    CeedQFunctionDestroy(&ceed_data->qf_ics_p);
+    CeedOperatorDestroy(&ceed_data->op_ics_p);
+  }
+  // QFunctions
+  CeedQFunctionDestroy(&ceed_data->qf_residual);
+  CeedQFunctionDestroy(&ceed_data->qf_error);
+  // Operators
+  CeedOperatorDestroy(&ceed_data->op_residual);
+  CeedOperatorDestroy(&ceed_data->op_error);
+  if (!problem_data->has_ts) {
+    // QFunctions
+    CeedQFunctionDestroy(&ceed_data->qf_jacobian);
+    // Operators
+    CeedOperatorDestroy(&ceed_data->op_jacobian);
+  }
+
+  // data for post-processing
+  if (problem_data->view_solution) {
+    CeedVectorDestroy(&ceed_data->up_ceed);
+    CeedVectorDestroy(&ceed_data->vp_ceed);
+    CeedVectorDestroy(&ceed_data->u_ceed);
+    CeedElemRestrictionDestroy(&ceed_data->elem_restr_u_H1);
+    CeedQFunctionDestroy(&ceed_data->qf_rhs_H1);
+    CeedOperatorDestroy(&ceed_data->op_rhs_H1);
+    CeedQFunctionDestroy(&ceed_data->qf_post_mass);
+    CeedOperatorDestroy(&ceed_data->op_post_mass);
+  }
+  PetscCall(PetscFree(ceed_data));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// Set up libCEED on the fine grid for a given degree
+// -----------------------------------------------------------------------------
+PetscErrorCode SetupLibceed(DM dm, DM dm_u0, DM dm_p0, DM dm_H1, Ceed ceed, AppCtx app_ctx, ProblemData problem_data, CeedData ceed_data) {
+  CeedInt P = app_ctx->degree + 1;
+  // Number of quadratures in 1D, q_extra is set in cl-options.c
+  CeedInt            Q = P + 1 + app_ctx->q_extra;
+  CeedInt            dim, num_comp_x, num_comp_u, num_comp_p;
+  DM                 dm_coord;
+  Vec                coords;
+  PetscInt           c_start, c_end, num_elem;
+  const PetscScalar *coordArray;
+  CeedQFunction      qf_true, qf_residual, qf_jacobian, qf_error;
+  CeedOperator       op_true, op_residual, op_jacobian, op_error;
+
+  PetscFunctionBeginUser;
+  // ---------------------------------------------------------------------------
+  // libCEED bases:Hdiv basis_u and Lagrange basis_x
+  // ---------------------------------------------------------------------------
+  dim        = problem_data->dim;
+  num_comp_x = dim;
+  num_comp_u = 1;  // one vector dof
+  num_comp_p = 1;  // one scalar dof
+  // Number of quadratures per element
+  CeedInt num_qpts = PetscPowInt(Q, dim);
+  // Pressure and velocity dof per element
+  CeedInt    P_p = 1, P_u = dim * PetscPowInt(P, dim);
+  CeedScalar q_ref[dim * num_qpts], q_weights[num_qpts];
+  CeedScalar div[P_u * num_qpts], interp_u[dim * P_u * num_qpts], interp_p[P_p * num_qpts], *grad = NULL;
+  if (dim == 2) {
+    HdivBasisQuad(Q, q_ref, q_weights, interp_u, div, problem_data->quadrature_mode);
+    CeedBasisCreateHdiv(ceed, CEED_TOPOLOGY_QUAD, num_comp_u, P_u, num_qpts, interp_u, div, q_ref, q_weights, &ceed_data->basis_u);
+    L2BasisP0(dim, Q, q_ref, q_weights, interp_p, problem_data->quadrature_mode);
+    CeedBasisCreateH1(ceed, CEED_TOPOLOGY_QUAD, num_comp_p, 1, num_qpts, interp_p, grad, q_ref, q_weights, &ceed_data->basis_p);
+    HdivBasisQuad(Q, q_ref, q_weights, interp_u, div, CEED_GAUSS_LOBATTO);
+    CeedBasisCreateHdiv(ceed, CEED_TOPOLOGY_QUAD, num_comp_u, P_u, num_qpts, interp_u, div, q_ref, q_weights, &ceed_data->basis_u_face);
+  } else {
+    HdivBasisHex(Q, q_ref, q_weights, interp_u, div, problem_data->quadrature_mode);
+    CeedBasisCreateHdiv(ceed, CEED_TOPOLOGY_HEX, num_comp_u, P_u, num_qpts, interp_u, div, q_ref, q_weights, &ceed_data->basis_u);
+    L2BasisP0(dim, Q, q_ref, q_weights, interp_p, problem_data->quadrature_mode);
+    CeedBasisCreateH1(ceed, CEED_TOPOLOGY_HEX, num_comp_p, 1, num_qpts, interp_p, grad, q_ref, q_weights, &ceed_data->basis_p);
+    HdivBasisHex(Q, q_ref, q_weights, interp_u, div, CEED_GAUSS_LOBATTO);
+    CeedBasisCreateHdiv(ceed, CEED_TOPOLOGY_HEX, num_comp_u, P_u, num_qpts, interp_u, div, q_ref, q_weights, &ceed_data->basis_u_face);
+  }
+
+  CeedBasisCreateTensorH1Lagrange(ceed, dim, num_comp_x, 2, Q, problem_data->quadrature_mode, &ceed_data->basis_x);
+
+  // ---------------------------------------------------------------------------
+  // libCEED restrictions
+  // ---------------------------------------------------------------------------
+  PetscCall(DMGetCoordinateDM(dm, &dm_coord));
+  PetscCall(DMPlexSetClosurePermutationTensor(dm_coord, PETSC_DETERMINE, NULL));
+  CeedInt  height       = 0;  // 0 means no boundary conditions
+  DMLabel  domain_label = 0;
+  PetscInt value        = 0;
+  // -- Coordinate restriction
+  PetscCall(CreateRestrictionFromPlex(ceed, dm_coord, height, domain_label, value, &ceed_data->elem_restr_x));
+  // -- Solution restriction
+  PetscCall(CreateRestrictionFromPlexOriented(ceed, dm, dm_u0, dm_p0, P, &ceed_data->elem_restr_u, &ceed_data->elem_restr_p,
+                                              &ceed_data->elem_restr_u0, &ceed_data->elem_restr_p0));
+  // -- Geometric ceed_data restriction
+  PetscCall(DMPlexGetHeightStratum(dm, 0, &c_start, &c_end));
+  num_elem            = c_end - c_start;
+  ceed_data->num_elem = num_elem;
+  CeedElemRestrictionCreateStrided(ceed, num_elem, num_qpts, (dim + 1), (dim + 1) * num_elem * num_qpts, CEED_STRIDES_BACKEND,
+                                   &ceed_data->elem_restr_U_i);
+  CeedElemRestrictionCreateStrided(ceed, num_elem, num_qpts, 1, 1 * num_elem * num_qpts, CEED_STRIDES_BACKEND, &ceed_data->elem_restr_p_i);
+
+  // ---------------------------------------------------------------------------
+  // Element coordinates
+  // ---------------------------------------------------------------------------
+  PetscCall(DMGetCoordinatesLocal(dm, &coords));
+  PetscCall(VecGetArrayRead(coords, &coordArray));
+  CeedElemRestrictionCreateVector(ceed_data->elem_restr_x, &ceed_data->x_coord, NULL);
+  CeedVectorSetArray(ceed_data->x_coord, CEED_MEM_HOST, CEED_COPY_VALUES, (PetscScalar *)coordArray);
+  PetscCall(VecRestoreArrayRead(coords, &coordArray));
+
+  // ---------------------------------------------------------------------------
+  // Setup true solution for [p,u]
+  // ---------------------------------------------------------------------------
+  CeedVector true_vec, true_force;
+  CeedVectorCreate(ceed, num_elem * num_qpts * (dim + 1), &true_vec);
+  CeedVectorCreate(ceed, num_elem * num_qpts * 1, &true_force);
+  // Create the q-function that sets up the RHS and true solution
+  CeedQFunctionCreateInterior(ceed, 1, problem_data->true_solution, problem_data->true_solution_loc, &qf_true);
+  CeedQFunctionSetContext(qf_true, problem_data->true_qfunction_ctx);
+  CeedQFunctionContextDestroy(&problem_data->true_qfunction_ctx);
+  CeedQFunctionAddInput(qf_true, "x", num_comp_x, CEED_EVAL_INTERP);
+  CeedQFunctionAddOutput(qf_true, "true force", 1, CEED_EVAL_NONE);
+  CeedQFunctionAddOutput(qf_true, "true solution", dim + 1, CEED_EVAL_NONE);
+  // Create the operator that builds the RHS and true solution
+  CeedOperatorCreate(ceed, qf_true, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_true);
+  if (problem_data->has_ts) {
+    double final_time = app_ctx->t_final;
+    CeedOperatorGetContextFieldLabel(op_true, "final_time", &app_ctx->ctx_residual_ut->final_time_label);
+    CeedOperatorSetContextDouble(op_true, app_ctx->ctx_residual_ut->final_time_label, &final_time);
+  }
+  CeedOperatorSetField(op_true, "x", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+  CeedOperatorSetField(op_true, "true force", ceed_data->elem_restr_p_i, CEED_BASIS_COLLOCATED, true_force);
+  CeedOperatorSetField(op_true, "true solution", ceed_data->elem_restr_U_i, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
+  // Setup true solution
+  CeedOperatorApply(op_true, ceed_data->x_coord, true_vec, CEED_REQUEST_IMMEDIATE);
+
+  // ---------------------------------------------------------------------------
+  // Setup initial conditions
+  // ---------------------------------------------------------------------------
+  if (problem_data->has_ts) {
+    // ---------------------------------------------------------------------------
+    // Setup RHS for u field
+    // ---------------------------------------------------------------------------
+    CeedQFunction qf_rhs_u0;
+    CeedOperator  op_rhs_u0;
+    // Create the q-function that sets up the RHS
+    CeedQFunctionCreateInterior(ceed, 1, problem_data->rhs_u0, problem_data->rhs_u0_loc, &qf_rhs_u0);
+    CeedQFunctionSetContext(qf_rhs_u0, problem_data->rhs_u0_qfunction_ctx);
+    CeedQFunctionContextDestroy(&problem_data->rhs_u0_qfunction_ctx);
+    CeedQFunctionAddInput(qf_rhs_u0, "weight", 1, CEED_EVAL_WEIGHT);
+    CeedQFunctionAddInput(qf_rhs_u0, "x", num_comp_x, CEED_EVAL_INTERP);
+    CeedQFunctionAddInput(qf_rhs_u0, "dx", dim * dim, CEED_EVAL_GRAD);
+    CeedQFunctionAddOutput(qf_rhs_u0, "rhs_u0", dim, CEED_EVAL_INTERP);
+    // Create the operator that builds the RHS
+    CeedOperatorCreate(ceed, qf_rhs_u0, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_rhs_u0);
+    CeedOperatorSetField(op_rhs_u0, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+    CeedOperatorSetField(op_rhs_u0, "x", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    CeedOperatorSetField(op_rhs_u0, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    CeedOperatorSetField(op_rhs_u0, "rhs_u0", ceed_data->elem_restr_u0, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+
+    // -- Save libCEED data to apply operator in setup-ts.c
+    ceed_data->qf_rhs_u0 = qf_rhs_u0;
+    ceed_data->op_rhs_u0 = op_rhs_u0;
+    // ---------------------------------------------------------------------------
+    // Setup qfunction for initial conditions u0
+    // ---------------------------------------------------------------------------
+    CeedQFunction qf_ics_u;
+    CeedOperator  op_ics_u;
+    CeedQFunctionCreateInterior(ceed, 1, problem_data->ics_u, problem_data->ics_u_loc, &qf_ics_u);
+    CeedQFunctionAddInput(qf_ics_u, "weight", 1, CEED_EVAL_WEIGHT);
+    CeedQFunctionAddInput(qf_ics_u, "dx", dim * dim, CEED_EVAL_GRAD);
+    CeedQFunctionAddInput(qf_ics_u, "u", dim, CEED_EVAL_INTERP);
+    CeedQFunctionAddOutput(qf_ics_u, "v", dim, CEED_EVAL_INTERP);
+    // Create the operator that builds the initial conditions
+    CeedOperatorCreate(ceed, qf_ics_u, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_ics_u);
+    CeedOperatorSetField(op_ics_u, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+    CeedOperatorSetField(op_ics_u, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    CeedOperatorSetField(op_ics_u, "u", ceed_data->elem_restr_u0, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+    CeedOperatorSetField(op_ics_u, "v", ceed_data->elem_restr_u0, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+    // -- Save libCEED data to apply operator in setup-ts.c
+    ceed_data->qf_ics_u = qf_ics_u;
+    ceed_data->op_ics_u = op_ics_u;
+    // -- Operator action variables: we use them in setup-ts.c
+    CeedElemRestrictionCreateVector(ceed_data->elem_restr_u0, &ceed_data->u0_ceed, NULL);
+    CeedElemRestrictionCreateVector(ceed_data->elem_restr_u0, &ceed_data->v0_ceed, NULL);
+    // ---------------------------------------------------------------------------
+    // Setup RHS for p field
+    // ---------------------------------------------------------------------------
+    CeedQFunction qf_rhs_p0;
+    CeedOperator  op_rhs_p0;
+    // Create the q-function that sets up the RHS
+    CeedQFunctionCreateInterior(ceed, 1, problem_data->rhs_p0, problem_data->rhs_p0_loc, &qf_rhs_p0);
+    CeedQFunctionAddInput(qf_rhs_p0, "weight", 1, CEED_EVAL_WEIGHT);
+    CeedQFunctionAddInput(qf_rhs_p0, "x", num_comp_x, CEED_EVAL_INTERP);
+    CeedQFunctionAddInput(qf_rhs_p0, "dx", dim * dim, CEED_EVAL_GRAD);
+    CeedQFunctionAddOutput(qf_rhs_p0, "rhs_p0", 1, CEED_EVAL_INTERP);
+    // Create the operator that builds the RHS
+    CeedOperatorCreate(ceed, qf_rhs_p0, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_rhs_p0);
+    CeedOperatorSetField(op_rhs_p0, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+    CeedOperatorSetField(op_rhs_p0, "x", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    CeedOperatorSetField(op_rhs_p0, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    CeedOperatorSetField(op_rhs_p0, "rhs_p0", ceed_data->elem_restr_p0, ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+
+    // -- Save libCEED data to apply operator in setup-ts.c
+    ceed_data->qf_rhs_p0 = qf_rhs_p0;
+    ceed_data->op_rhs_p0 = op_rhs_p0;
+    // ---------------------------------------------------------------------------
+    // Setup qfunction for initial conditions p0
+    // ---------------------------------------------------------------------------
+    CeedQFunction qf_ics_p;
+    CeedOperator  op_ics_p;
+    CeedQFunctionCreateInterior(ceed, 1, problem_data->ics_p, problem_data->ics_p_loc, &qf_ics_p);
+    CeedQFunctionAddInput(qf_ics_p, "weight", 1, CEED_EVAL_WEIGHT);
+    CeedQFunctionAddInput(qf_ics_p, "dx", dim * dim, CEED_EVAL_GRAD);
+    CeedQFunctionAddInput(qf_ics_p, "p", 1, CEED_EVAL_INTERP);
+    CeedQFunctionAddOutput(qf_ics_p, "q", 1, CEED_EVAL_INTERP);
+    // Create the operator that builds the initial conditions
+    CeedOperatorCreate(ceed, qf_ics_p, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_ics_p);
+    CeedOperatorSetField(op_ics_p, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+    CeedOperatorSetField(op_ics_p, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    CeedOperatorSetField(op_ics_p, "p", ceed_data->elem_restr_p0, ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+    CeedOperatorSetField(op_ics_p, "q", ceed_data->elem_restr_p0, ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+    // -- Save libCEED data to apply operator in setup-ts.c
+    ceed_data->qf_ics_p = qf_ics_p;
+    ceed_data->op_ics_p = op_ics_p;
+    // -- Operator action variables: we use them in setup-ts.c
+    CeedElemRestrictionCreateVector(ceed_data->elem_restr_p0, &ceed_data->p0_ceed, NULL);
+    CeedElemRestrictionCreateVector(ceed_data->elem_restr_p0, &ceed_data->q0_ceed, NULL);
+  }
+
+  // ---------------------------------------------------------------------------
+  // Persistent libCEED vectors
+  // ---------------------------------------------------------------------------
+  // -- Operator action variables: we use them in setup-solvers.c/setup-ts.c
+  CeedElemRestrictionCreateVector(ceed_data->elem_restr_u, &ceed_data->x_ceed, NULL);
+  CeedElemRestrictionCreateVector(ceed_data->elem_restr_u, &ceed_data->y_ceed, NULL);
+  // -- Operator action variables: we use them in setup-ts.c
+  CeedElemRestrictionCreateVector(ceed_data->elem_restr_u, &ceed_data->x_t_ceed, NULL);
+  // Local residual evaluator
+  // ---------------------------------------------------------------------------
+  // Create the QFunction and Operator that computes the residual of the PDE.
+  // ---------------------------------------------------------------------------
+  // -- QFunction
+  CeedQFunctionCreateInterior(ceed, 1, problem_data->residual, problem_data->residual_loc, &qf_residual);
+  CeedQFunctionSetContext(qf_residual, problem_data->residual_qfunction_ctx);
+  CeedQFunctionContextDestroy(&problem_data->residual_qfunction_ctx);
+  CeedQFunctionAddInput(qf_residual, "weight", 1, CEED_EVAL_WEIGHT);
+  CeedQFunctionAddInput(qf_residual, "dx", dim * dim, CEED_EVAL_GRAD);
+  CeedQFunctionAddInput(qf_residual, "u", dim, CEED_EVAL_INTERP);
+  CeedQFunctionAddInput(qf_residual, "div_u", 1, CEED_EVAL_DIV);
+  CeedQFunctionAddInput(qf_residual, "p", 1, CEED_EVAL_INTERP);
+  CeedQFunctionAddInput(qf_residual, "true force", 1, CEED_EVAL_NONE);
+  CeedQFunctionAddInput(qf_residual, "x", num_comp_x, CEED_EVAL_INTERP);
+  if (problem_data->has_ts) {
+    CeedQFunctionAddInput(qf_residual, "p_t", 1, CEED_EVAL_INTERP);
+  }
+  CeedQFunctionAddOutput(qf_residual, "v", dim, CEED_EVAL_INTERP);
+  CeedQFunctionAddOutput(qf_residual, "div_v", 1, CEED_EVAL_DIV);
+  CeedQFunctionAddOutput(qf_residual, "q", 1, CEED_EVAL_INTERP);
+
+  // -- Operator
+  CeedOperatorCreate(ceed, qf_residual, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_residual);
+  if (problem_data->has_ts) {
+    // double t = app_ctx->ctx_residual_ut->t;
+    CeedOperatorGetContextFieldLabel(op_residual, "time", &app_ctx->ctx_residual_ut->solution_time_label);
+    // CeedOperatorContextGetFieldLabel(op_residual, "time_step",
+    //                                  &app_ctx->ctx_residual_ut->timestep_label);
+    // CeedOperatorContextSetDouble(op_residual,
+    //                              app_ctx->ctx_residual_ut->solution_time_label, &t);
+  }
+  CeedOperatorSetField(op_residual, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+  CeedOperatorSetField(op_residual, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+  CeedOperatorSetField(op_residual, "u", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_residual, "div_u", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_residual, "p", ceed_data->elem_restr_p, ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_residual, "true force", ceed_data->elem_restr_p_i, CEED_BASIS_COLLOCATED, true_force);
+  CeedOperatorSetField(op_residual, "x", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+  if (problem_data->has_ts) {
+    CeedOperatorSetField(op_residual, "p_t", ceed_data->elem_restr_p, ceed_data->basis_p, ceed_data->x_t_ceed);
+  }
+  CeedOperatorSetField(op_residual, "v", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_residual, "div_v", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_residual, "q", ceed_data->elem_restr_p, ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+  // -- Save libCEED data to apply operator in matops.c
+  ceed_data->qf_residual = qf_residual;
+  ceed_data->op_residual = op_residual;
+  if (!problem_data->has_ts) {
+    // ---------------------------------------------------------------------------
+    // Add Pressure boundary condition. See setup-boundary.c
+    // ---------------------------------------------------------------------------
+    // DMAddBoundariesPressure(ceed, ceed_data, app_ctx, problem_data, dm);
+
+    // Local jacobian evaluator
+    // ---------------------------------------------------------------------------
+    // Create the QFunction and Operator that computes the jacobian of the PDE.
+    // ---------------------------------------------------------------------------
+    // -- QFunction
+    CeedQFunctionCreateInterior(ceed, 1, problem_data->jacobian, problem_data->jacobian_loc, &qf_jacobian);
+    CeedQFunctionSetContext(qf_jacobian, problem_data->jacobian_qfunction_ctx);
+    CeedQFunctionContextDestroy(&problem_data->jacobian_qfunction_ctx);
+    CeedQFunctionAddInput(qf_jacobian, "weight", 1, CEED_EVAL_WEIGHT);
+    CeedQFunctionAddInput(qf_jacobian, "dx", dim * dim, CEED_EVAL_GRAD);
+    CeedQFunctionAddInput(qf_jacobian, "du", dim, CEED_EVAL_INTERP);
+    CeedQFunctionAddInput(qf_jacobian, "div_du", 1, CEED_EVAL_DIV);
+    CeedQFunctionAddInput(qf_jacobian, "dp", 1, CEED_EVAL_INTERP);
+    CeedQFunctionAddInput(qf_jacobian, "x", num_comp_x, CEED_EVAL_INTERP);
+    // CeedQFunctionAddInput(qf_jacobian, "u", dim, CEED_EVAL_INTERP);
+    // CeedQFunctionAddInput(qf_jacobian, "p", 1, CEED_EVAL_INTERP);
+    CeedQFunctionAddOutput(qf_jacobian, "dv", dim, CEED_EVAL_INTERP);
+    CeedQFunctionAddOutput(qf_jacobian, "div_dv", 1, CEED_EVAL_DIV);
+    CeedQFunctionAddOutput(qf_jacobian, "dq", 1, CEED_EVAL_INTERP);
+    // -- Operator
+    CeedOperatorCreate(ceed, qf_jacobian, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_jacobian);
+    CeedOperatorSetField(op_jacobian, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+    CeedOperatorSetField(op_jacobian, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    CeedOperatorSetField(op_jacobian, "du", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+    CeedOperatorSetField(op_jacobian, "div_du", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+    CeedOperatorSetField(op_jacobian, "dp", ceed_data->elem_restr_p, ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+    CeedOperatorSetField(op_jacobian, "x", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    // CeedOperatorSetField(op_jacobian, "u", ceed_data->elem_restr_u,
+    //                      ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+    // CeedOperatorSetField(op_jacobian, "p", ceed_data->elem_restr_p,
+    //                      ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+    CeedOperatorSetField(op_jacobian, "dv", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+    CeedOperatorSetField(op_jacobian, "div_dv", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+    CeedOperatorSetField(op_jacobian, "dq", ceed_data->elem_restr_p, ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+    // -- Save libCEED data to apply operator in matops.c
+    ceed_data->qf_jacobian = qf_jacobian;
+    ceed_data->op_jacobian = op_jacobian;
+  }
+  // ---------------------------------------------------------------------------
+  // Setup Error Qfunction
+  // ---------------------------------------------------------------------------
+  // Create the q-function that sets up the error
+  CeedQFunctionCreateInterior(ceed, 1, problem_data->error, problem_data->error_loc, &qf_error);
+  CeedQFunctionSetContext(qf_error, problem_data->error_qfunction_ctx);
+  CeedQFunctionContextDestroy(&problem_data->error_qfunction_ctx);
+  CeedQFunctionAddInput(qf_error, "weight", 1, CEED_EVAL_WEIGHT);
+  CeedQFunctionAddInput(qf_error, "dx", dim * dim, CEED_EVAL_GRAD);
+  CeedQFunctionAddInput(qf_error, "u", dim, CEED_EVAL_INTERP);
+  CeedQFunctionAddInput(qf_error, "p", 1, CEED_EVAL_INTERP);
+  CeedQFunctionAddInput(qf_error, "true solution", dim + 1, CEED_EVAL_NONE);
+  CeedQFunctionAddOutput(qf_error, "error", dim + 1, CEED_EVAL_NONE);
+  // Create the operator that builds the error
+  CeedOperatorCreate(ceed, qf_error, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_error);
+  CeedOperatorSetField(op_error, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+  CeedOperatorSetField(op_error, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+  CeedOperatorSetField(op_error, "u", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_error, "p", ceed_data->elem_restr_p, ceed_data->basis_p, CEED_VECTOR_ACTIVE);
+  CeedOperatorSetField(op_error, "true solution", ceed_data->elem_restr_U_i, CEED_BASIS_COLLOCATED, true_vec);
+  CeedOperatorSetField(op_error, "error", ceed_data->elem_restr_U_i, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
+  // -- Save libCEED data to apply operator in matops.c
+  ceed_data->qf_error = qf_error;
+  ceed_data->op_error = op_error;
+
+  if (app_ctx->view_solution) {
+    // -- Post processing
+    PetscCall(CreateRestrictionFromPlex(ceed, dm_H1, height, domain_label, value, &ceed_data->elem_restr_u_H1));
+    // ---------------------------------------------------------------------------
+    // Setup RHS for post processing
+    // ---------------------------------------------------------------------------
+    // -- Operator action variables: we use them in post-processing.c
+    CeedElemRestrictionCreateVector(ceed_data->elem_restr_u, &ceed_data->u_ceed, NULL);
+    CeedQFunction qf_rhs_H1;
+    CeedOperator  op_rhs_H1;
+    // Create the q-function that sets up the RHS
+    CeedQFunctionCreateInterior(ceed, 1, problem_data->post_rhs, problem_data->post_rhs_loc, &qf_rhs_H1);
+    CeedQFunctionAddInput(qf_rhs_H1, "weight", 1, CEED_EVAL_WEIGHT);
+    CeedQFunctionAddInput(qf_rhs_H1, "dx", dim * dim, CEED_EVAL_GRAD);
+    CeedQFunctionAddInput(qf_rhs_H1, "u_post", dim, CEED_EVAL_INTERP);
+    CeedQFunctionAddOutput(qf_rhs_H1, "rhs_post", dim, CEED_EVAL_INTERP);
+    // Create the operator that builds the RHS
+    CeedOperatorCreate(ceed, qf_rhs_H1, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_rhs_H1);
+    CeedOperatorSetField(op_rhs_H1, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+    CeedOperatorSetField(op_rhs_H1, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    CeedOperatorSetField(op_rhs_H1, "u_post", ceed_data->elem_restr_u, ceed_data->basis_u, ceed_data->u_ceed);
+    CeedOperatorSetField(op_rhs_H1, "rhs_post", ceed_data->elem_restr_u_H1, ceed_data->basis_x, CEED_VECTOR_ACTIVE);
+    // -- Save libCEED data to apply operator in post-processing.c
+    ceed_data->qf_rhs_H1 = qf_rhs_H1;
+    ceed_data->op_rhs_H1 = op_rhs_H1;
+    // ---------------------------------------------------------------------------
+    // Setup qfunction for initial conditions u0
+    // ---------------------------------------------------------------------------
+    CeedQFunction qf_post_mass;
+    CeedOperator  op_post_mass;
+    CeedQFunctionCreateInterior(ceed, 1, problem_data->post_mass, problem_data->post_mass_loc, &qf_post_mass);
+    CeedQFunctionAddInput(qf_post_mass, "weight", 1, CEED_EVAL_WEIGHT);
+    CeedQFunctionAddInput(qf_post_mass, "dx", dim * dim, CEED_EVAL_GRAD);
+    CeedQFunctionAddInput(qf_post_mass, "u", dim, CEED_EVAL_INTERP);
+    CeedQFunctionAddOutput(qf_post_mass, "v", dim, CEED_EVAL_INTERP);
+    // Create the operator that builds the initial conditions
+    CeedOperatorCreate(ceed, qf_post_mass, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_post_mass);
+    CeedOperatorSetField(op_post_mass, "weight", CEED_ELEMRESTRICTION_NONE, ceed_data->basis_x, CEED_VECTOR_NONE);
+    CeedOperatorSetField(op_post_mass, "dx", ceed_data->elem_restr_x, ceed_data->basis_x, ceed_data->x_coord);
+    CeedOperatorSetField(op_post_mass, "u", ceed_data->elem_restr_u_H1, ceed_data->basis_x, CEED_VECTOR_ACTIVE);
+    CeedOperatorSetField(op_post_mass, "v", ceed_data->elem_restr_u_H1, ceed_data->basis_x, CEED_VECTOR_ACTIVE);
+    // -- Save libCEED data to apply operator in post-processing.c
+    ceed_data->qf_post_mass = qf_post_mass;
+    ceed_data->op_post_mass = op_post_mass;
+    // -- Operator action variables: we use them in post-processing.c
+    CeedElemRestrictionCreateVector(ceed_data->elem_restr_u_H1, &ceed_data->up_ceed, NULL);
+    CeedElemRestrictionCreateVector(ceed_data->elem_restr_u_H1, &ceed_data->vp_ceed, NULL);
+  }
+  // -- Cleanup
+  CeedVectorDestroy(&true_vec);
+  CeedVectorDestroy(&true_force);
+  CeedQFunctionDestroy(&qf_true);
+  CeedOperatorDestroy(&op_true);
+
+  PetscFunctionReturn(0);
+};
+// -----------------------------------------------------------------------------
\ No newline at end of file
diff --git a/examples/Hdiv-mixed/src/setup-matops.c b/examples/Hdiv-mixed/src/setup-matops.c
new file mode 100644
index 0000000000..d90a24e525
--- /dev/null
+++ b/examples/Hdiv-mixed/src/setup-matops.c
@@ -0,0 +1,85 @@
+#include "../include/setup-matops.h"
+
+#include <stdio.h>
+
+#include "../include/setup-libceed.h"
+#include "ceed/ceed.h"
+
+// -----------------------------------------------------------------------------
+// Apply the local action of a libCEED operator and store result in PETSc vector
+// i.e. compute A X = Y
+// -----------------------------------------------------------------------------
+PetscErrorCode ApplyLocalCeedOp(Vec X, Vec Y, OperatorApplyContext op_apply_ctx) {
+  PetscFunctionBeginUser;
+
+  // Zero target vector
+  PetscCall(VecZeroEntries(Y));
+
+  // Sum into target vector
+  PetscCall(ApplyAddLocalCeedOp(X, Y, op_apply_ctx));
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode ApplyAddLocalCeedOp(Vec X, Vec Y, OperatorApplyContext op_apply_ctx) {
+  PetscScalar *x, *y;
+  PetscMemType x_mem_type, y_mem_type;
+
+  PetscFunctionBeginUser;
+
+  // Global-to-local
+  PetscCall(DMGlobalToLocal(op_apply_ctx->dm, X, INSERT_VALUES, op_apply_ctx->X_loc));
+
+  // Setup libCEED vectors
+  PetscCall(VecGetArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x, &x_mem_type));
+  PetscCall(VecGetArrayAndMemType(op_apply_ctx->Y_loc, &y, &y_mem_type));
+  CeedVectorSetArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, x);
+  CeedVectorSetArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, y);
+
+  // Apply libCEED operator
+  CeedOperatorApply(op_apply_ctx->op_apply, op_apply_ctx->x_ceed, op_apply_ctx->y_ceed, CEED_REQUEST_IMMEDIATE);
+
+  // Restore PETSc vectors
+  CeedVectorTakeArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), NULL);
+  CeedVectorTakeArray(op_apply_ctx->y_ceed, MemTypeP2C(y_mem_type), NULL);
+  PetscCall(VecRestoreArrayReadAndMemType(op_apply_ctx->X_loc, (const PetscScalar **)&x));
+  PetscCall(VecRestoreArrayAndMemType(op_apply_ctx->Y_loc, &y));
+
+  // Local-to-global
+  PetscCall(DMLocalToGlobal(op_apply_ctx->dm, op_apply_ctx->Y_loc, ADD_VALUES, Y));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// This function returns the computed diagonal of the operator
+// -----------------------------------------------------------------------------
+PetscErrorCode GetDiagonal(Mat A, Vec D) {
+  OperatorApplyContext op_apply_ctx;
+  PetscScalar         *x;
+  PetscMemType         x_mem_type;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(MatShellGetContext(A, &op_apply_ctx));
+
+  // -- Place PETSc vector in libCEED vector
+  PetscCall(VecGetArrayAndMemType(op_apply_ctx->X_loc, &x, &x_mem_type));
+  CeedVectorSetArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, x);
+
+  // -- Compute Diagonal
+  CeedOperatorLinearAssembleDiagonal(op_apply_ctx->op_apply, op_apply_ctx->x_ceed, CEED_REQUEST_IMMEDIATE);
+  CeedVectorView(op_apply_ctx->x_ceed, "%12.8f", stdout);
+  // -- Local-to-Global
+  CeedVectorTakeArray(op_apply_ctx->x_ceed, MemTypeP2C(x_mem_type), NULL);
+  PetscCall(VecRestoreArrayAndMemType(op_apply_ctx->X_loc, &x));
+  PetscCall(VecZeroEntries(D));
+  PetscCall(DMLocalToGlobal(op_apply_ctx->dm, op_apply_ctx->X_loc, ADD_VALUES, D));
+
+  // Cleanup
+  PetscCall(VecZeroEntries(op_apply_ctx->X_loc));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
diff --git a/examples/Hdiv-mixed/src/setup-solvers.c b/examples/Hdiv-mixed/src/setup-solvers.c
new file mode 100644
index 0000000000..3c0d773489
--- /dev/null
+++ b/examples/Hdiv-mixed/src/setup-solvers.c
@@ -0,0 +1,242 @@
+#include "../include/setup-solvers.h"
+
+#include "../include/setup-libceed.h"
+#include "../include/setup-matops.h"
+#include "petscvec.h"
+
+// -----------------------------------------------------------------------------
+// Setup operator context data
+// -----------------------------------------------------------------------------
+PetscErrorCode SetupJacobianOperatorCtx(DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_jacobian) {
+  PetscFunctionBeginUser;
+
+  VecType vec_type;
+  PetscCall(DMGetVecType(dm, &vec_type));
+  ctx_jacobian->dm = dm;
+  PetscCall(DMCreateLocalVector(dm, &ctx_jacobian->X_loc));
+  PetscCall(VecDuplicate(ctx_jacobian->X_loc, &ctx_jacobian->Y_loc));
+  ctx_jacobian->x_ceed   = ceed_data->x_ceed;
+  ctx_jacobian->y_ceed   = ceed_data->y_ceed;
+  ctx_jacobian->ceed     = ceed;
+  ctx_jacobian->op_apply = ceed_data->op_jacobian;
+  ctx_jacobian->vec_type = vec_type;
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode SetupResidualOperatorCtx(DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_residual) {
+  PetscFunctionBeginUser;
+
+  ctx_residual->dm = dm;
+  PetscCall(DMCreateLocalVector(dm, &ctx_residual->X_loc));
+  PetscCall(VecDuplicate(ctx_residual->X_loc, &ctx_residual->Y_loc));
+  ctx_residual->x_ceed   = ceed_data->x_ceed;
+  ctx_residual->y_ceed   = ceed_data->y_ceed;
+  ctx_residual->ceed     = ceed;
+  ctx_residual->op_apply = ceed_data->op_residual;
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode SetupErrorOperatorCtx(DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_error) {
+  PetscFunctionBeginUser;
+
+  ctx_error->dm = dm;
+  PetscCall(DMCreateLocalVector(dm, &ctx_error->X_loc));
+  PetscCall(VecDuplicate(ctx_error->X_loc, &ctx_error->Y_loc));
+  ctx_error->x_ceed   = ceed_data->x_ceed;
+  ctx_error->y_ceed   = ceed_data->y_ceed;
+  ctx_error->ceed     = ceed;
+  ctx_error->op_apply = ceed_data->op_error;
+
+  PetscFunctionReturn(0);
+}
+
+// -----------------------------------------------------------------------------
+// This function wraps the libCEED operator for a MatShell
+// -----------------------------------------------------------------------------
+PetscErrorCode ApplyMatOp(Mat A, Vec X, Vec Y) {
+  OperatorApplyContext op_apply_ctx;
+
+  PetscFunctionBeginUser;
+
+  PetscCall(MatShellGetContext(A, &op_apply_ctx));
+
+  // libCEED for local action of residual evaluator
+  PetscCall(ApplyLocalCeedOp(X, Y, op_apply_ctx));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// This function uses libCEED to compute the non-linear residual
+// -----------------------------------------------------------------------------
+PetscErrorCode SNESFormResidual(SNES snes, Vec X, Vec Y, void *ctx_residual) {
+  OperatorApplyContext ctx = (OperatorApplyContext)ctx_residual;
+
+  PetscFunctionBeginUser;
+
+  // Use computed BCs
+  // PetscCall( DMPlexInsertBoundaryValues(ctx->dm, PETSC_TRUE,
+  //                                      ctx->X_loc,
+  //                                      1.0, NULL, NULL, NULL) );
+
+  // libCEED for local action of residual evaluator
+  PetscCall(ApplyLocalCeedOp(X, Y, ctx));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// Jacobian setup
+// -----------------------------------------------------------------------------
+PetscErrorCode SNESFormJacobian(SNES snes, Vec U, Mat J, Mat J_pre, void *ctx_jacobian) {
+  // OperatorApplyContext ctx = (OperatorApplyContext)ctx_jacobian;
+  PetscFunctionBeginUser;
+
+  // J_pre might be AIJ (e.g., when using coloring), so we need to assemble it
+  PetscCall(MatAssemblyBegin(J_pre, MAT_FINAL_ASSEMBLY));
+  PetscCall(MatAssemblyEnd(J_pre, MAT_FINAL_ASSEMBLY));
+  if (J != J_pre) {
+    PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
+    PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
+  }
+  PetscFunctionReturn(0);
+};
+
+// ---------------------------------------------------------------------------
+// Setup Solver
+// ---------------------------------------------------------------------------
+PetscErrorCode PDESolver(CeedData ceed_data, AppCtx app_ctx, SNES snes, KSP ksp, Vec *U) {
+  PetscInt U_l_size, U_g_size;
+
+  PetscFunctionBeginUser;
+
+  // Create global unknown solution U
+  PetscCall(VecGetSize(*U, &U_g_size));
+  // Local size for matShell
+  PetscCall(VecGetLocalSize(*U, &U_l_size));
+  Vec R;
+  PetscCall(VecDuplicate(*U, &R));
+
+  // ---------------------------------------------------------------------------
+  // Setup SNES
+  // ---------------------------------------------------------------------------
+  // Operator
+  Mat mat_jacobian;
+  PetscCall(SNESSetDM(snes, app_ctx->ctx_jacobian->dm));
+  // -- Form Action of Jacobian on delta_u
+  PetscCall(MatCreateShell(app_ctx->comm, U_l_size, U_l_size, U_g_size, U_g_size, app_ctx->ctx_jacobian, &mat_jacobian));
+  PetscCall(MatShellSetOperation(mat_jacobian, MATOP_MULT, (void (*)(void))ApplyMatOp));
+  PetscCall(MatShellSetOperation(mat_jacobian, MATOP_GET_DIAGONAL, (void (*)(void))GetDiagonal));
+  PetscCall(MatShellSetVecType(mat_jacobian, app_ctx->ctx_jacobian->vec_type));
+
+  // Set SNES residual evaluation function
+  PetscCall(SNESSetFunction(snes, R, SNESFormResidual, app_ctx->ctx_residual));
+  // -- SNES Jacobian
+  PetscCall(SNESSetJacobian(snes, mat_jacobian, mat_jacobian, SNESFormJacobian, app_ctx->ctx_jacobian));
+
+  // Setup KSP
+  PetscCall(KSPSetType(ksp, KSPGMRES));
+  PetscCall(KSPSetNormType(ksp, KSP_NORM_PRECONDITIONED));
+  // PC setup
+  PC pc;
+  PetscCall(KSPGetPC(ksp, &pc));
+  PetscCall(PCSetType(pc, PCJACOBI));
+  PetscCall(PCJacobiSetType(pc, PC_JACOBI_DIAGONAL));
+  // Set user options and view
+  PetscCall(KSPSetFromOptions(ksp));
+  PetscCall(KSPViewFromOptions(ksp, NULL, "-ksp_view"));
+  PetscCall(PCViewFromOptions(pc, NULL, "-pc_view"));
+  // Default to critical-point (CP) line search (related to Wolfe's curvature condition)
+  SNESLineSearch line_search;
+
+  PetscCall(SNESGetLineSearch(snes, &line_search));
+  PetscCall(SNESLineSearchSetType(line_search, SNESLINESEARCHCP));
+  PetscCall(SNESSetFromOptions(snes));
+
+  // Solve
+  PetscCall(VecSet(*U, 0.0));
+  PetscCall(SNESSolve(snes, NULL, *U));
+
+  // Free PETSc objects
+  PetscCall(MatDestroy(&mat_jacobian));
+  PetscCall(VecDestroy(&R));
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
+// This function calculates the L2 error in the final solution
+// -----------------------------------------------------------------------------
+PetscErrorCode ComputeL2Error(CeedData ceed_data, AppCtx app_ctx, Vec U, CeedScalar *l2_error_u, CeedScalar *l2_error_p) {
+  PetscScalar *x;
+  PetscMemType mem_type;
+  CeedVector   collocated_error;
+
+  PetscFunctionBeginUser;
+
+  CeedInt dim, num_elem, num_qpts;
+  PetscCall(DMGetDimension(app_ctx->ctx_error->dm, &dim));
+  CeedBasisGetNumQuadraturePoints(ceed_data->basis_u, &num_qpts);
+  num_elem = ceed_data->num_elem;
+  CeedVectorCreate(app_ctx->ctx_error->ceed, num_elem * num_qpts * (dim + 1), &collocated_error);
+
+  // Global-to-local
+  PetscCall(DMGlobalToLocal(app_ctx->ctx_error->dm, U, INSERT_VALUES, app_ctx->ctx_error->X_loc));
+
+  // Setup CEED vector
+  PetscCall(VecGetArrayAndMemType(app_ctx->ctx_error->X_loc, &x, &mem_type));
+  CeedVectorSetArray(app_ctx->ctx_error->x_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, x);
+
+  // Apply CEED operator
+  CeedOperatorApply(app_ctx->ctx_error->op_apply, app_ctx->ctx_error->x_ceed, collocated_error, CEED_REQUEST_IMMEDIATE);
+  // Restore PETSc vector
+  CeedVectorTakeArray(app_ctx->ctx_error->x_ceed, MemTypeP2C(mem_type), NULL);
+  PetscCall(VecRestoreArrayReadAndMemType(app_ctx->ctx_error->X_loc, (const PetscScalar **)&x));
+  // Compute L2 error for each field
+  CeedInt           cent_qpts = num_qpts / 2;
+  CeedVector        collocated_error_u, collocated_error_p;
+  const CeedScalar *E_U;  // to store total error
+  CeedInt           length_u, length_p;
+  length_p = num_elem;
+  length_u = num_elem * num_qpts * dim;
+  CeedScalar e_u[length_u], e_p[length_p];
+  CeedVectorCreate(app_ctx->ctx_error->ceed, length_p, &collocated_error_p);
+  CeedVectorCreate(app_ctx->ctx_error->ceed, length_u, &collocated_error_u);
+  // E_U is ordered as [p_0,u_0/.../p_n,u_n] for 0 to n elements
+  // For each element p_0 size is num_qpts, and u_0 is dim*num_qpts
+  CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &E_U);
+  for (CeedInt n = 0; n < num_elem; n++) {
+    for (CeedInt i = 0; i < 1; i++) {
+      CeedInt j = i + n * 1;
+      CeedInt k = cent_qpts + n * num_qpts * (dim + 1);
+      e_p[j]    = E_U[k];
+    }
+  }
+
+  for (CeedInt n = 0; n < num_elem; n++) {
+    for (CeedInt i = 0; i < dim * num_qpts; i++) {
+      CeedInt j = i + n * num_qpts * dim;
+      CeedInt k = num_qpts + i + n * num_qpts * (dim + 1);
+      e_u[j]    = E_U[k];
+    }
+  }
+
+  CeedVectorSetArray(collocated_error_p, CEED_MEM_HOST, CEED_USE_POINTER, e_p);
+  CeedVectorSetArray(collocated_error_u, CEED_MEM_HOST, CEED_USE_POINTER, e_u);
+  CeedVectorRestoreArrayRead(collocated_error, &E_U);
+
+  CeedScalar error_u, error_p;
+  CeedVectorNorm(collocated_error_u, CEED_NORM_1, &error_u);
+  CeedVectorNorm(collocated_error_p, CEED_NORM_1, &error_p);
+  *l2_error_u = sqrt(error_u);
+  *l2_error_p = sqrt(error_p);
+  // Cleanup
+  CeedVectorDestroy(&collocated_error);
+  CeedVectorDestroy(&collocated_error_u);
+  CeedVectorDestroy(&collocated_error_p);
+
+  PetscFunctionReturn(0);
+};
+
+// -----------------------------------------------------------------------------
diff --git a/examples/Hdiv-mixed/src/setup-ts.c b/examples/Hdiv-mixed/src/setup-ts.c
new file mode 100644
index 0000000000..47208750c7
--- /dev/null
+++ b/examples/Hdiv-mixed/src/setup-ts.c
@@ -0,0 +1,349 @@
+#include "../include/setup-ts.h"
+
+#include <stdio.h>
+
+#include "../include/post-processing.h"
+#include "../include/setup-libceed.h"
+#include "../include/setup-matops.h"
+#include "../include/setup-solvers.h"
+#include "ceed/ceed.h"
+#include "petscerror.h"
+#include "petscsystypes.h"
+
+// -----------------------------------------------------------------------------
+// Setup operator context data for initial condition, u field
+// -----------------------------------------------------------------------------
+PetscErrorCode SetupResidualOperatorCtx_U0(MPI_Comm comm, DM dm_u0, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_initial_u0) {
+  PetscFunctionBeginUser;
+
+  ctx_initial_u0->comm = comm;
+  ctx_initial_u0->dm   = dm_u0;
+  PetscCall(DMCreateLocalVector(dm_u0, &ctx_initial_u0->X_loc));
+  PetscCall(VecDuplicate(ctx_initial_u0->X_loc, &ctx_initial_u0->Y_loc));
+  ctx_initial_u0->x_ceed   = ceed_data->u0_ceed;
+  ctx_initial_u0->y_ceed   = ceed_data->v0_ceed;
+  ctx_initial_u0->ceed     = ceed;
+  ctx_initial_u0->op_apply = ceed_data->op_ics_u;
+
+  PetscFunctionReturn(0);
+}
+
+// -----------------------------------------------------------------------------
+// Setup operator context data for initial condition, p field
+// -----------------------------------------------------------------------------
+PetscErrorCode SetupResidualOperatorCtx_P0(MPI_Comm comm, DM dm_p0, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_initial_p0) {
+  PetscFunctionBeginUser;
+
+  ctx_initial_p0->comm = comm;
+  ctx_initial_p0->dm   = dm_p0;
+  PetscCall(DMCreateLocalVector(dm_p0, &ctx_initial_p0->X_loc));
+  PetscCall(VecDuplicate(ctx_initial_p0->X_loc, &ctx_initial_p0->Y_loc));
+  ctx_initial_p0->x_ceed   = ceed_data->p0_ceed;
+  ctx_initial_p0->y_ceed   = ceed_data->q0_ceed;
+  ctx_initial_p0->ceed     = ceed;
+  ctx_initial_p0->op_apply = ceed_data->op_ics_p;
+
+  PetscFunctionReturn(0);
+}
+
+// -----------------------------------------------------------------------------
+// Setup operator context data for Residual of Richard problem
+// -----------------------------------------------------------------------------
+PetscErrorCode SetupResidualOperatorCtx_Ut(MPI_Comm comm, DM dm, Ceed ceed, CeedData ceed_data, OperatorApplyContext ctx_residual_ut) {
+  PetscFunctionBeginUser;
+
+  ctx_residual_ut->comm = comm;
+  ctx_residual_ut->dm   = dm;
+  PetscCall(DMCreateLocalVector(dm, &ctx_residual_ut->X_loc));
+  PetscCall(VecDuplicate(ctx_residual_ut->X_loc, &ctx_residual_ut->Y_loc));
+  PetscCall(VecDuplicate(ctx_residual_ut->X_loc, &ctx_residual_ut->X_t_loc));
+  ctx_residual_ut->x_ceed   = ceed_data->x_ceed;
+  ctx_residual_ut->x_t_ceed = ceed_data->x_t_ceed;
+  ctx_residual_ut->y_ceed   = ceed_data->y_ceed;
+  ctx_residual_ut->ceed     = ceed;
+  ctx_residual_ut->op_apply = ceed_data->op_residual;
+
+  PetscFunctionReturn(0);
+}
+
+// -----------------------------------------------------------------------------
+// Create global initial conditions vector
+// -----------------------------------------------------------------------------
+PetscErrorCode CreateInitialConditions(CeedData ceed_data, AppCtx app_ctx, Vec U) {
+  PetscFunctionBeginUser;
+  // ----------------------------------------------
+  // Create local rhs for u field
+  // ----------------------------------------------
+  Vec          rhs_u_loc;
+  PetscScalar *ru;
+  PetscMemType ru_mem_type;
+  VecType      vec_type;
+  PetscCall(DMGetVecType(app_ctx->ctx_initial_u0->dm, &vec_type));
+  PetscCall(DMCreateLocalVector(app_ctx->ctx_initial_u0->dm, &rhs_u_loc));
+  PetscCall(VecZeroEntries(rhs_u_loc));
+  PetscCall(VecGetArrayAndMemType(rhs_u_loc, &ru, &ru_mem_type));
+  CeedElemRestrictionCreateVector(ceed_data->elem_restr_u0, &ceed_data->rhs_u0_ceed, NULL);
+  CeedVectorSetArray(ceed_data->rhs_u0_ceed, MemTypeP2C(ru_mem_type), CEED_USE_POINTER, ru);
+
+  // Apply operator to create RHS for u field
+  CeedOperatorApply(ceed_data->op_rhs_u0, ceed_data->x_coord, ceed_data->rhs_u0_ceed, CEED_REQUEST_IMMEDIATE);
+
+  // ----------------------------------------------
+  // Create global rhs for u field
+  // ----------------------------------------------
+  Vec rhs_u0;
+  CeedVectorTakeArray(ceed_data->rhs_u0_ceed, MemTypeP2C(ru_mem_type), NULL);
+  PetscCall(VecRestoreArrayAndMemType(rhs_u_loc, &ru));
+  PetscCall(DMCreateGlobalVector(app_ctx->ctx_initial_u0->dm, &rhs_u0));
+  PetscCall(VecZeroEntries(rhs_u0));
+  PetscCall(DMLocalToGlobal(app_ctx->ctx_initial_u0->dm, rhs_u_loc, ADD_VALUES, rhs_u0));
+
+  // ----------------------------------------------
+  // Solve for U0, M*U0 = rhs_u0
+  // ----------------------------------------------
+  Vec U0;
+  PetscCall(DMCreateGlobalVector(app_ctx->ctx_initial_u0->dm, &U0));
+  PetscCall(VecZeroEntries(U0));
+  PetscInt U0_g_size, U0_l_size;
+  PetscCall(VecGetSize(U0, &U0_g_size));
+  // Local size for matShell
+  PetscCall(VecGetLocalSize(U0, &U0_l_size));
+
+  // Operator
+  Mat mat_ksp_u0;
+  // -- Form Action of residual on u
+  PetscCall(MatCreateShell(app_ctx->comm, U0_l_size, U0_l_size, U0_g_size, U0_g_size, app_ctx->ctx_initial_u0, &mat_ksp_u0));
+  PetscCall(MatShellSetOperation(mat_ksp_u0, MATOP_MULT, (void (*)(void))ApplyMatOp));
+  PetscCall(MatShellSetVecType(mat_ksp_u0, vec_type));
+
+  KSP ksp_u0;
+  PetscCall(KSPCreate(app_ctx->comm, &ksp_u0));
+  PetscCall(KSPSetOperators(ksp_u0, mat_ksp_u0, mat_ksp_u0));
+  PetscCall(KSPSetFromOptions(ksp_u0));
+  PetscCall(KSPSetUp(ksp_u0));
+  PetscCall(KSPSolve(ksp_u0, rhs_u0, U0));
+
+  // ----------------------------------------------
+  // Create local rhs for p field
+  // ----------------------------------------------
+  Vec          rhs_p_loc;
+  PetscScalar *rp;
+  PetscMemType rp_mem_type;
+  PetscCall(DMCreateLocalVector(app_ctx->ctx_initial_p0->dm, &rhs_p_loc));
+  PetscCall(VecZeroEntries(rhs_p_loc));
+  PetscCall(VecGetArrayAndMemType(rhs_p_loc, &rp, &rp_mem_type));
+  CeedElemRestrictionCreateVector(ceed_data->elem_restr_p0, &ceed_data->rhs_p0_ceed, NULL);
+  CeedVectorSetArray(ceed_data->rhs_p0_ceed, MemTypeP2C(rp_mem_type), CEED_USE_POINTER, rp);
+
+  // Apply operator to create RHS for p field
+  CeedOperatorApply(ceed_data->op_rhs_p0, ceed_data->x_coord, ceed_data->rhs_p0_ceed, CEED_REQUEST_IMMEDIATE);
+
+  // ----------------------------------------------
+  // Create global rhs for p field
+  // ----------------------------------------------
+  Vec rhs_p0;
+  CeedVectorTakeArray(ceed_data->rhs_p0_ceed, MemTypeP2C(rp_mem_type), NULL);
+  PetscCall(VecRestoreArrayAndMemType(rhs_p_loc, &rp));
+  PetscCall(DMCreateGlobalVector(app_ctx->ctx_initial_p0->dm, &rhs_p0));
+  PetscCall(VecZeroEntries(rhs_p0));
+  PetscCall(DMLocalToGlobal(app_ctx->ctx_initial_p0->dm, rhs_p_loc, ADD_VALUES, rhs_p0));
+
+  // ----------------------------------------------
+  // Solve for P0, M*P0 = rhs_p0
+  // ----------------------------------------------
+  Vec P0;
+  PetscCall(DMCreateGlobalVector(app_ctx->ctx_initial_p0->dm, &P0));
+  PetscCall(VecZeroEntries(P0));
+  PetscInt P0_g_size, P0_l_size;
+  PetscCall(VecGetSize(P0, &P0_g_size));
+  // Local size for matShell
+  PetscCall(VecGetLocalSize(P0, &P0_l_size));
+
+  // Operator
+  Mat mat_ksp_p0;
+  // -- Form Action of residual on u
+  PetscCall(MatCreateShell(app_ctx->comm, P0_l_size, P0_l_size, P0_g_size, P0_g_size, app_ctx->ctx_initial_p0, &mat_ksp_p0));
+  PetscCall(MatShellSetOperation(mat_ksp_p0, MATOP_MULT, (void (*)(void))ApplyMatOp));
+  PetscCall(MatShellSetVecType(mat_ksp_p0, vec_type));
+
+  KSP ksp_p0;
+  PetscCall(KSPCreate(app_ctx->comm, &ksp_p0));
+  PetscCall(KSPSetOperators(ksp_p0, mat_ksp_p0, mat_ksp_p0));
+  PetscCall(KSPSetFromOptions(ksp_p0));
+  PetscCall(KSPSetUp(ksp_p0));
+  PetscCall(KSPSolve(ksp_p0, rhs_p0, P0));
+
+  // ----------------------------------------------
+  // Create final initial conditions U
+  // ----------------------------------------------
+  // Global-to-local for U0, P0
+  PetscCall(DMGlobalToLocal(app_ctx->ctx_initial_u0->dm, U0, INSERT_VALUES, app_ctx->ctx_initial_u0->X_loc));
+  PetscCall(DMGlobalToLocal(app_ctx->ctx_initial_p0->dm, P0, INSERT_VALUES, app_ctx->ctx_initial_p0->X_loc));
+  // Get array u0,p0
+  const PetscScalar *u0, *p0;
+  PetscCall(VecGetArrayRead(app_ctx->ctx_initial_u0->X_loc, &u0));
+  PetscCall(VecGetArrayRead(app_ctx->ctx_initial_p0->X_loc, &p0));
+
+  // Get array of local vector U = [p,u]
+  PetscScalar *u;
+  PetscInt     U_l_size;
+  PetscCall(VecGetLocalSize(U, &U_l_size));
+  PetscCall(VecZeroEntries(app_ctx->ctx_residual_ut->X_loc));
+  PetscCall(VecGetArray(app_ctx->ctx_residual_ut->X_loc, &u));
+  for (PetscInt i = 0; i < ceed_data->num_elem; i++) {
+    u[i] = p0[i];
+  }
+  for (PetscInt i = ceed_data->num_elem; i < U_l_size; i++) {
+    u[i] = u0[i - ceed_data->num_elem];
+  }
+  PetscCall(VecRestoreArray(app_ctx->ctx_residual_ut->X_loc, &u));
+  PetscCall(VecRestoreArrayRead(app_ctx->ctx_initial_p0->X_loc, &p0));
+  PetscCall(VecRestoreArrayRead(app_ctx->ctx_initial_u0->X_loc, &u0));
+  PetscCall(DMLocalToGlobal(app_ctx->ctx_residual_ut->dm, app_ctx->ctx_residual_ut->X_loc, ADD_VALUES, U));
+
+  // Clean up
+  PetscCall(VecDestroy(&rhs_u_loc));
+  PetscCall(VecDestroy(&rhs_u0));
+  PetscCall(VecDestroy(&U0));
+  PetscCall(VecDestroy(&rhs_p_loc));
+  PetscCall(VecDestroy(&rhs_p0));
+  PetscCall(VecDestroy(&P0));
+  PetscCall(MatDestroy(&mat_ksp_p0));
+  PetscCall(MatDestroy(&mat_ksp_u0));
+  PetscCall(KSPDestroy(&ksp_p0));
+  PetscCall(KSPDestroy(&ksp_u0));
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode TSFormIResidual(TS ts, PetscReal time, Vec X, Vec X_t, Vec Y, void *ctx_residual_ut) {
+  OperatorApplyContext ctx = (OperatorApplyContext)ctx_residual_ut;
+  const PetscScalar   *x, *x_t;
+  PetscScalar         *y;
+  PetscMemType         x_mem_type, x_t_mem_type, y_mem_type;
+  PetscFunctionBeginUser;
+
+  // Update time dependent data
+  if (ctx->t != time) {
+    CeedOperatorSetContextDouble(ctx->op_apply, ctx->solution_time_label, &time);
+    ctx->t = time;
+  }
+  // PetscScalar dt;
+  // PetscCall( TSGetTimeStep(ts, &dt) );
+  // if (ctx->dt != dt) {
+  //   CeedOperatorContextSetDouble(ctx->op_apply,
+  //                                ctx->timestep_label, &dt);
+  //   ctx->dt = dt;
+  // }
+  //  Global-to-local
+  PetscCall(DMGlobalToLocal(ctx->dm, X, INSERT_VALUES, ctx->X_loc));
+  PetscCall(DMGlobalToLocal(ctx->dm, X_t, INSERT_VALUES, ctx->X_t_loc));
+
+  // Place PETSc vectors in CEED vectors
+  PetscCall(VecGetArrayReadAndMemType(ctx->X_loc, &x, &x_mem_type));
+  PetscCall(VecGetArrayReadAndMemType(ctx->X_t_loc, &x_t, &x_t_mem_type));
+  PetscCall(VecGetArrayAndMemType(ctx->Y_loc, &y, &y_mem_type));
+  CeedVectorSetArray(ctx->x_ceed, MemTypeP2C(x_mem_type), CEED_USE_POINTER, (PetscScalar *)x);
+  CeedVectorSetArray(ctx->x_t_ceed, MemTypeP2C(x_t_mem_type), CEED_USE_POINTER, (PetscScalar *)x_t);
+  CeedVectorSetArray(ctx->y_ceed, MemTypeP2C(y_mem_type), CEED_USE_POINTER, y);
+
+  // Apply CEED operator
+  CeedOperatorApply(ctx->op_apply, ctx->x_ceed, ctx->y_ceed, CEED_REQUEST_IMMEDIATE);
+
+  // Restore vectors
+  CeedVectorTakeArray(ctx->x_ceed, MemTypeP2C(x_mem_type), NULL);
+  CeedVectorTakeArray(ctx->x_t_ceed, MemTypeP2C(x_t_mem_type), NULL);
+  CeedVectorTakeArray(ctx->y_ceed, MemTypeP2C(y_mem_type), NULL);
+  PetscCall(VecRestoreArrayReadAndMemType(ctx->X_loc, &x));
+  PetscCall(VecRestoreArrayReadAndMemType(ctx->X_t_loc, &x_t));
+  PetscCall(VecRestoreArrayAndMemType(ctx->Y_loc, &y));
+
+  // Local-to-Global
+  PetscCall(VecZeroEntries(Y));
+  PetscCall(DMLocalToGlobal(ctx->dm, ctx->Y_loc, ADD_VALUES, Y));
+
+  PetscFunctionReturn(0);
+}
+
+PetscErrorCode WriteOutput(Vec U, PetscInt steps, PetscScalar time, AppCtx app_ctx) {
+  char        output_filename[PETSC_MAX_PATH_LEN];
+  PetscViewer viewer_p, viewer_u;
+  PetscMPIInt rank;
+  PetscFunctionBeginUser;
+
+  // Create output directory
+  MPI_Comm_rank(app_ctx->comm, &rank);
+  if (!rank) {
+    PetscCall(PetscMkdir(app_ctx->output_dir));
+  }
+
+  // Build file name
+  PetscCall(PetscSNPrintf(output_filename, sizeof output_filename, "%s/richard_pressure-%03" PetscInt_FMT ".vtu", app_ctx->output_dir, steps));
+  PetscCall(PetscViewerVTKOpen(app_ctx->comm, output_filename, FILE_MODE_WRITE, &viewer_p));
+  PetscCall(VecView(U, viewer_p));
+  PetscCall(PetscViewerDestroy(&viewer_p));
+
+  // Project velocity to H1
+  Vec U_H1;  // velocity in H1 space for post-processing
+  PetscCall(DMCreateGlobalVector(app_ctx->ctx_H1->dm, &U_H1));
+  PetscCall(ProjectVelocity(app_ctx, U, &U_H1));
+  // Build file name
+  PetscCall(PetscSNPrintf(output_filename, sizeof output_filename, "%s/richard_velocity-%03" PetscInt_FMT ".vtu", app_ctx->output_dir, steps));
+  PetscCall(PetscViewerVTKOpen(app_ctx->comm, output_filename, FILE_MODE_WRITE, &viewer_u));
+  PetscCall(VecView(U_H1, viewer_u));
+  PetscCall(PetscViewerDestroy(&viewer_u));
+  PetscCall(VecDestroy(&U_H1));
+  PetscFunctionReturn(0);
+}
+
+// User provided TS Monitor
+PetscErrorCode TSMonitorRichard(TS ts, PetscInt steps, PetscReal time, Vec U, void *ctx) {
+  AppCtx app_ctx = (AppCtx)ctx;
+
+  PetscFunctionBeginUser;
+
+  // Print every 'output_freq' steps
+  if (app_ctx->output_freq <= 0 || steps % app_ctx->output_freq != 0) PetscFunctionReturn(0);
+
+  PetscCall(WriteOutput(U, steps, time, app_ctx));
+
+  PetscFunctionReturn(0);
+}
+
+// TS: Create, setup, and solve
+PetscErrorCode TSSolveRichard(CeedData ceed_data, AppCtx app_ctx, TS ts, Vec *U) {
+  TSAdapt adapt;
+  PetscFunctionBeginUser;
+
+  PetscCall(TSSetDM(ts, app_ctx->ctx_residual_ut->dm));
+  PetscCall(TSSetType(ts, TSBDF));
+  PetscCall(TSSetIFunction(ts, NULL, TSFormIResidual, app_ctx->ctx_residual_ut));
+
+  PetscCall(TSSetMaxTime(ts, app_ctx->t_final));
+  PetscCall(TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER));
+  PetscCall(TSSetTimeStep(ts, 1.e-2));
+  PetscCall(TSGetAdapt(ts, &adapt));
+  PetscCall(TSAdaptSetStepLimits(adapt, 1.e-12, 1.e2));
+  PetscCall(TSSetFromOptions(ts));
+  app_ctx->ctx_residual_ut->t = -1.0;
+  // ceed_data->ctx_residual_ut->dt = -1.0;
+  if (app_ctx->view_solution) {
+    PetscCall(TSMonitorSet(ts, TSMonitorRichard, app_ctx, NULL));
+  }
+  // Solve
+  PetscScalar start_time;
+  PetscCall(TSGetTime(ts, &start_time));
+
+  PetscCall(TSSetTime(ts, start_time));
+  PetscCall(TSSetStepNumber(ts, 0));
+
+  PetscCall(PetscBarrier((PetscObject)ts));
+  PetscCall(TSSolve(ts, *U));
+
+  PetscScalar final_time;
+  PetscCall(TSGetSolveTime(ts, &final_time));
+  app_ctx->t_final = final_time;
+
+  PetscFunctionReturn(0);
+}
+
+// -----------------------------------------------------------------------------
diff --git a/tests/t330-basis.h b/tests/t330-basis.h
index fd7069cb61..260cfb6ee8 100644
--- a/tests/t330-basis.h
+++ b/tests/t330-basis.h
@@ -76,4 +76,4 @@ static void BuildHdivQuadrilateral(CeedInt q, CeedScalar *q_ref, CeedScalar *q_w
       }
     }
   }
-}
\ No newline at end of file
+}