diff --git a/include/clad/Differentiator/BuiltinDerivatives.h b/include/clad/Differentiator/BuiltinDerivatives.h
index 62296ab92..d309ababa 100644
--- a/include/clad/Differentiator/BuiltinDerivatives.h
+++ b/include/clad/Differentiator/BuiltinDerivatives.h
@@ -199,31 +199,31 @@ CUDA_HOST_DEVICE inline void __builtin_powf_pullback(float x, float exponent,
 // FIXME: Add the rest of the __builtin_ routines for log, sqrt, abs, etc.
 
 namespace std {
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> abs_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> abs_pushforward(T x, dT d_x) {
   if (x >= 0)
     return {x, d_x};
   else
     return {-x, -d_x};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> exp_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> exp_pushforward(T x, dT d_x) {
   return {::std::exp(x), ::std::exp(x) * d_x};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> sin_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> sin_pushforward(T x, dT d_x) {
   return {::std::sin(x), ::std::cos(x) * d_x};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> cos_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> cos_pushforward(T x, dT d_x) {
   return {::std::cos(x), (-1) * ::std::sin(x) * d_x};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> sqrt_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> sqrt_pushforward(T x, dT d_x) {
   return {::std::sqrt(x), (((T)1) / (((T)2) * ::std::sqrt(x))) * d_x};
 }
 
@@ -232,9 +232,9 @@ CUDA_HOST_DEVICE ValueAndPushforward<T, T> floor_pushforward(T x, T /*d_x*/) {
   return {::std::floor(x), (T)0};
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> atan2_pushforward(T y, T x, T d_y,
-                                                             T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> atan2_pushforward(T y, T x, dT d_y,
+                                                              dT d_x) {
   return {::std::atan2(y, x),
           -(y / ((x * x) + (y * y))) * d_x + x / ((x * x) + (y * y)) * d_y};
 }
@@ -246,8 +246,8 @@ CUDA_HOST_DEVICE void atan2_pullback(T y, T x, U d_z, T* d_y, T* d_x) {
   *d_x += -(y / ((x * x) + (y * y))) * d_z;
 }
 
-template <typename T>
-CUDA_HOST_DEVICE ValueAndPushforward<T, T> acos_pushforward(T x, T d_x) {
+template <typename T, typename dT>
+CUDA_HOST_DEVICE ValueAndPushforward<T, dT> acos_pushforward(T x, dT d_x) {
   return {::std::acos(x), ((-1) / (::std::sqrt(1 - x * x))) * d_x};
 }
 
@@ -263,17 +263,27 @@ ValueAndPushforward<float, float> sqrtf_pushforward(float x, float d_x) {
 
 #endif
 
-template <typename T1, typename T2>
-CUDA_HOST_DEVICE ValueAndPushforward<decltype(::std::pow(T1(), T2())),
-                                     decltype(::std::pow(T1(), T2()))>
-pow_pushforward(T1 x, T2 exponent, T1 d_x, T2 d_exponent) {
-  auto val = ::std::pow(x, exponent);
-  auto derivative = (exponent * ::std::pow(x, exponent - 1)) * d_x;
+template <typename T, typename dT> struct AdjOutType {
+  using type = T;
+};
+
+template <typename T, typename dT> struct AdjOutType<T, clad::array<dT>> {
+  using type = clad::array<T>;
+};
+
+template <typename T1, typename T2, typename dT1, typename dT2,
+          typename T_out = decltype(::std::pow(T1(), T2())),
+          typename dT_out = typename AdjOutType<T_out, dT1>::type>
+CUDA_HOST_DEVICE ValueAndPushforward<T_out, dT_out>
+pow_pushforward(T1 x, T2 exponent, dT1 d_x, dT2 d_exponent) {
+  T_out val = ::std::pow(x, exponent);
+  dT_out derivative = (exponent * ::std::pow(x, exponent - 1)) * d_x;
   // Only add directional derivative of base^exp w.r.t exp if the directional
-  // seed d_exponent is non-zero. This is required because if base is less than or
-  // equal to 0, then log(base) is undefined, and therefore if user only requested
-  // directional derivative of base^exp w.r.t base -- which is valid --, the result would
-  // be undefined because as per C++ valid number + NaN * 0 = NaN.
+  // seed d_exponent is non-zero. This is required because if base is less than
+  // or equal to 0, then log(base) is undefined, and therefore if user only
+  // requested directional derivative of base^exp w.r.t base -- which is valid
+  // --, the result would be undefined because as per C++ valid number + NaN * 0
+  // = NaN.
   if (d_exponent)
     derivative += (::std::pow(x, exponent) * ::std::log(x)) * d_exponent;
   return {val, derivative};
diff --git a/include/clad/Differentiator/Differentiator.h b/include/clad/Differentiator/Differentiator.h
index a4d450aff..1ae60d961 100644
--- a/include/clad/Differentiator/Differentiator.h
+++ b/include/clad/Differentiator/Differentiator.h
@@ -624,12 +624,13 @@ CUDA_HOST_DEVICE T push(tape<T>& to, ArgsT... val) {
             typename F, typename DerivedFnType = JacobianDerivedFnTraits_t<F>,
             typename = typename std::enable_if<
                 !std::is_class<remove_reference_and_pointer_t<F>>::value>::type>
-  constexpr CladFunction<
-      DerivedFnType, ExtractFunctorTraits_t<F>> __attribute__((annotate("J")))
+  constexpr CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>,
+                         /*EnablePadding=*/true> __attribute__((annotate("J")))
   jacobian(F f, ArgSpec args = "",
            DerivedFnType derivedFn = static_cast<DerivedFnType>(nullptr),
            const char* code = "") {
-    return CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>>(
+    return CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>,
+                        /*EnablePadding=*/true>(
         derivedFn /* will be replaced by Jacobian*/, code);
   }
 
@@ -640,12 +641,13 @@ CUDA_HOST_DEVICE T push(tape<T>& to, ArgsT... val) {
             typename F, typename DerivedFnType = JacobianDerivedFnTraits_t<F>,
             typename = typename std::enable_if<
                 std::is_class<remove_reference_and_pointer_t<F>>::value>::type>
-  constexpr CladFunction<
-      DerivedFnType, ExtractFunctorTraits_t<F>> __attribute__((annotate("J")))
+  constexpr CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>,
+                         /*EnablePadding=*/true> __attribute__((annotate("J")))
   jacobian(F&& f, ArgSpec args = "",
            DerivedFnType derivedFn = static_cast<DerivedFnType>(nullptr),
            const char* code = "") {
-    return CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>>(
+    return CladFunction<DerivedFnType, ExtractFunctorTraits_t<F>,
+                        /*EnablePadding=*/true>(
         derivedFn /* will be replaced by Jacobian*/, code, f);
   }
 
diff --git a/include/clad/Differentiator/FunctionTraits.h b/include/clad/Differentiator/FunctionTraits.h
index bc568e51d..9f8cda94f 100644
--- a/include/clad/Differentiator/FunctionTraits.h
+++ b/include/clad/Differentiator/FunctionTraits.h
@@ -2,6 +2,7 @@
 #define FUNCTION_TRAITS
 
 #include "clad/Differentiator/ArrayRef.h"
+#include "clad/Differentiator/Matrix.h"
 
 #include <type_traits>
 
@@ -548,15 +549,25 @@ namespace clad {
     using type = NoFunction*;
   };
 
-  template <class... Args> struct SelectLast;
+  // OutputVecParamType is used to deduce the type of derivative arguments
+  // for vector forward mode.
+  template <class T, class R> struct OutputVecParamType {
+    using type = array_ref<typename std::remove_pointer<R>::type>;
+  };
 
-  template <class... Args>
-  using SelectLast_t = typename SelectLast<Args...>::type;
+  template <class T, class R>
+  using OutputVecParamType_t = typename OutputVecParamType<T, R>::type;
+
+  /// Specialization for vector forward mode type.
+  template <class F, class = void> struct ExtractDerivedFnTraitsVecForwMode {};
 
-  template <class T> struct SelectLast<T> { using type = T; };
+  template <class F>
+  using ExtractDerivedFnTraitsVecForwMode_t =
+      typename ExtractDerivedFnTraitsVecForwMode<F>::type;
 
-  template <class T, class... Args> struct SelectLast<T, Args...> {
-    using type = typename SelectLast<Args...>::type;
+  template <class ReturnType, class... Args>
+  struct ExtractDerivedFnTraitsVecForwMode<ReturnType (*)(Args...)> {
+    using type = void (*)(Args..., OutputVecParamType_t<Args, void>...);
   };
 
   template <class T, class = void> struct JacobianDerivedFnTraits {};
@@ -569,7 +580,7 @@ namespace clad {
   // JacobianDerivedFnTraits specializations for pure function pointer types
   template <class ReturnType, class... Args>
   struct JacobianDerivedFnTraits<ReturnType (*)(Args...)> {
-    using type = void (*)(Args..., SelectLast_t<Args...>);
+    using type = void (*)(Args..., OutputParamType_t<Args, void>...);
   };
 
   /// These macro expansions are used to cover all possible cases of
@@ -581,11 +592,12 @@ namespace clad {
   /// qualifier and reference respectively. The AddNOEX adds cases for noexcept
   /// qualifier only if it is supported and finally AddSPECS declares the
   /// function with all the cases
-#define JacobianDerivedFnTraits_AddSPECS(var, cv, vol, ref, noex)              \
-  template <typename R, typename C, typename... Args>                          \
-  struct JacobianDerivedFnTraits<R (C::*)(Args...) cv vol ref noex> {          \
-    using type = void (C::*)(Args..., SelectLast_t<Args...>) cv vol ref noex;  \
-  };
+#define JacobianDerivedFnTraits_AddSPECS(var, cv, vol, ref, noex)            \
+    template <typename R, typename C, typename... Args>                        \
+    struct JacobianDerivedFnTraits<R (C::*)(Args...) cv vol ref noex> {        \
+      using type = void (C::*)(                                                \
+          Args..., OutputParamType_t<Args, void>...) cv vol ref noex;          \
+    };
 
 #if __cpp_noexcept_function_type > 0
 #define JacobianDerivedFnTraits_AddNOEX(var, con, vol, ref)                    \
@@ -739,27 +751,6 @@ namespace clad {
   using ExtractDerivedFnTraitsForwMode_t =
       typename ExtractDerivedFnTraitsForwMode<F>::type;
 
-  // OutputVecParamType is used to deduce the type of derivative arguments
-  // for vector forward mode.
-  template <class T, class R> struct OutputVecParamType {
-    using type = array_ref<typename std::remove_pointer<R>::type>;
-  };
-
-  template <class T, class R>
-  using OutputVecParamType_t = typename OutputVecParamType<T, R>::type;
-
-  /// Specialization for vector forward mode type.
-  template <class F, class = void> struct ExtractDerivedFnTraitsVecForwMode {};
-
-  template <class F>
-  using ExtractDerivedFnTraitsVecForwMode_t =
-      typename ExtractDerivedFnTraitsVecForwMode<F>::type;
-
-  template <class ReturnType, class... Args>
-  struct ExtractDerivedFnTraitsVecForwMode<ReturnType (*)(Args...)> {
-    using type = void (*)(Args..., OutputVecParamType_t<Args, void>...);
-  };
-
   /// Specialization for free function pointer type
   template <class F>
   struct ExtractDerivedFnTraitsForwMode<
diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h
index a73513f94..ba0cf7c14 100644
--- a/include/clad/Differentiator/ReverseModeVisitor.h
+++ b/include/clad/Differentiator/ReverseModeVisitor.h
@@ -489,10 +489,6 @@ namespace clad {
            "attempt to differentiate unsupported operator, ignored.",
            args);
     }
-    /// Builds an overload for the gradient function that has derived params for
-    /// all the arguments of the requested function and it calls the original
-    /// gradient function internally
-    clang::FunctionDecl* CreateGradientOverload();
 
     /// Returns the type that should be used to represent the derivative of a
     /// variable of type `yType` with respect to a parameter variable of type
diff --git a/include/clad/Differentiator/VectorForwardModeVisitor.h b/include/clad/Differentiator/VectorForwardModeVisitor.h
index 6890b622a..6955dc5ed 100644
--- a/include/clad/Differentiator/VectorForwardModeVisitor.h
+++ b/include/clad/Differentiator/VectorForwardModeVisitor.h
@@ -9,7 +9,7 @@ namespace clad {
 /// A visitor for processing the function code in vector forward mode.
 /// Used to compute derivatives by clad::vector_forward_differentiate.
 class VectorForwardModeVisitor : public BaseForwardModeVisitor {
-private:
+protected:
   llvm::SmallVector<const clang::ValueDecl*, 16> m_IndependentVars;
   /// Map used to keep track of parameter variables w.r.t which the
   /// the derivative is being computed. This is separate from the
diff --git a/include/clad/Differentiator/VisitorBase.h b/include/clad/Differentiator/VisitorBase.h
index 210f82112..2ab7e35bc 100644
--- a/include/clad/Differentiator/VisitorBase.h
+++ b/include/clad/Differentiator/VisitorBase.h
@@ -628,6 +628,12 @@ namespace clad {
     /// Returns type clad::ConstructorReverseForwTag<T>
     clang::QualType GetCladConstructorReverseForwTagOfType(clang::QualType T);
 
+    /// Builds an overload for the derivative function that has derived params
+    /// for all the arguments of the requested function and it calls the
+    /// original derivative function internally. Used in gradient and jacobian
+    /// modes.
+    clang::FunctionDecl* CreateDerivativeOverload();
+
   public:
     /// Rebuild a sequence of nested namespaces ending with DC.
     clang::NamespaceDecl* RebuildEnclosingNamespaces(clang::DeclContext* DC);
diff --git a/lib/Differentiator/BaseForwardModeVisitor.cpp b/lib/Differentiator/BaseForwardModeVisitor.cpp
index 8015b8fdb..4a9c81412 100644
--- a/lib/Differentiator/BaseForwardModeVisitor.cpp
+++ b/lib/Differentiator/BaseForwardModeVisitor.cpp
@@ -1228,7 +1228,16 @@ StmtDiff BaseForwardModeVisitor::VisitCallExpr(const CallExpr* CE) {
   callDiff = m_Builder.BuildCallToCustomDerivativeOrNumericalDiff(
       customPushforward, customDerivativeArgs, getCurrentScope(),
       const_cast<DeclContext*>(FD->getDeclContext()));
-
+  // Custom derivative templates can be written in a
+  // general way that works for both vectorized and non-vectorized
+  // modes. We have to also look for the pushforward with the regular name.
+  if (!callDiff && m_DiffReq.Mode != DiffMode::forward) {
+    customPushforward =
+        clad::utils::ComputeEffectiveFnName(FD) + "_pushforward";
+    callDiff = m_Builder.BuildCallToCustomDerivativeOrNumericalDiff(
+        customPushforward, customDerivativeArgs, getCurrentScope(),
+        const_cast<DeclContext*>(FD->getDeclContext()));
+  }
   if (!isLambda) {
     // Check if it is a recursive call.
     if (!callDiff && (FD == m_DiffReq.Function) &&
@@ -1446,7 +1455,9 @@ BaseForwardModeVisitor::VisitBinaryOperator(const BinaryOperator* BinOp) {
       derivedR = BuildParens(derivedR);
     opDiff = BuildOp(opCode, derivedL, derivedR);
   } else if (BinOp->isAssignmentOp()) {
-    if (Ldiff.getExpr_dx()->isModifiableLvalue(m_Context) != Expr::MLV_Valid) {
+    if ((Ldiff.getExpr_dx()->isModifiableLvalue(m_Context) !=
+         Expr::MLV_Valid) &&
+        !isCladArrayType(Ldiff.getExpr_dx()->getType())) {
       diag(DiagnosticsEngine::Warning, BinOp->getEndLoc(),
            "derivative of an assignment attempts to assign to unassignable "
            "expr, assignment ignored");
diff --git a/lib/Differentiator/CMakeLists.txt b/lib/Differentiator/CMakeLists.txt
index 7f928b2ac..177608449 100644
--- a/lib/Differentiator/CMakeLists.txt
+++ b/lib/Differentiator/CMakeLists.txt
@@ -31,6 +31,7 @@ llvm_add_library(cladDifferentiator
   DiffPlanner.cpp
   ErrorEstimator.cpp
   EstimationModel.cpp
+  JacobianModeVisitor.cpp
   HessianModeVisitor.cpp
   MultiplexExternalRMVSource.cpp
   PushForwardModeVisitor.cpp
diff --git a/lib/Differentiator/DerivativeBuilder.cpp b/lib/Differentiator/DerivativeBuilder.cpp
index 946a68719..465f35aee 100644
--- a/lib/Differentiator/DerivativeBuilder.cpp
+++ b/lib/Differentiator/DerivativeBuilder.cpp
@@ -6,6 +6,7 @@
 
 #include "clad/Differentiator/DerivativeBuilder.h"
 
+#include "JacobianModeVisitor.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/TemplateBase.h"
 #include "clang/Sema/Lookup.h"
@@ -432,8 +433,8 @@ static void registerDerivative(FunctionDecl* derivedFD, Sema& semaRef) {
       HessianModeVisitor H(*this, request);
       result = H.Derive();
     } else if (request.Mode == DiffMode::jacobian) {
-      ReverseModeVisitor R(*this, request);
-      result = R.Derive();
+      JacobianModeVisitor J(*this, request);
+      result = J.DeriveJacobian();
     } else if (request.Mode == DiffMode::error_estimation) {
       ReverseModeVisitor R(*this, request);
       InitErrorEstimation(m_ErrorEstHandler, m_EstModel, *this, request);
diff --git a/lib/Differentiator/DiffPlanner.cpp b/lib/Differentiator/DiffPlanner.cpp
index 8f4cb12f6..1f1fe761f 100644
--- a/lib/Differentiator/DiffPlanner.cpp
+++ b/lib/Differentiator/DiffPlanner.cpp
@@ -579,7 +579,7 @@ namespace clad {
       
       // If the function has no parameters, then we cannot differentiate it."
       // and if the DiffMode is Jacobian, we must have atleast 2 parameters.
-      if (params.empty() || (params.size()==1 && this->Mode == DiffMode::jacobian)) {
+      if (params.empty()) {
         utils::EmitDiag(semaRef, DiagnosticsEngine::Error,
                         CallContext->getEndLoc(),
                         "Attempted to differentiate a function without "
@@ -587,13 +587,6 @@ namespace clad {
         return;
       }
 
-      // If it is a Vector valued function, the last parameter is to store the
-      // output vector and hence is not a differentiable parameter, so we must
-      // pop it out
-      if (this->Mode == DiffMode::jacobian){
-        params.pop_back();
-      }
-
       // insert an empty index for each parameter.
       for (unsigned i=0; i<params.size(); ++i) {
         DiffInputVarInfo dVarInfo(params[i], IndexInterval());
diff --git a/lib/Differentiator/JacobianModeVisitor.cpp b/lib/Differentiator/JacobianModeVisitor.cpp
new file mode 100644
index 000000000..09f118478
--- /dev/null
+++ b/lib/Differentiator/JacobianModeVisitor.cpp
@@ -0,0 +1,284 @@
+#include "JacobianModeVisitor.h"
+
+#include "ConstantFolder.h"
+#include "clad/Differentiator/CladUtils.h"
+
+#include "llvm/Support/SaveAndRestore.h"
+
+using namespace clang;
+
+namespace clad {
+JacobianModeVisitor::JacobianModeVisitor(DerivativeBuilder& builder,
+                                         const DiffRequest& request)
+    : VectorPushForwardModeVisitor(builder, request) {}
+
+DerivativeAndOverload JacobianModeVisitor::DeriveJacobian() {
+  const FunctionDecl* FD = m_DiffReq.Function;
+  assert(m_DiffReq.Mode == DiffMode::jacobian);
+
+  DiffParams args{};
+  for (const DiffInputVarInfo& dParam : m_DiffReq.DVI)
+    args.push_back(dParam.param);
+
+  // Generate name for the derivative function.
+  std::string derivedFnName = m_DiffReq.BaseFunctionName + "_jac";
+  if (args.size() != FD->getNumParams()) {
+    for (const ValueDecl* arg : args) {
+      const auto* it = std::find(FD->param_begin(), FD->param_end(), arg);
+      auto idx = std::distance(FD->param_begin(), it);
+      derivedFnName += ('_' + std::to_string(idx));
+    }
+  }
+  IdentifierInfo* II = &m_Context.Idents.get(derivedFnName);
+  SourceLocation loc{m_DiffReq->getLocation()};
+  DeclarationNameInfo name(II, loc);
+
+  llvm::SmallVector<QualType, 8> paramTypes;
+
+  // Generate the function type for the derivative.
+  paramTypes.reserve(m_DiffReq->getNumParams() + args.size());
+  for (auto* PVD : m_DiffReq->parameters())
+    paramTypes.push_back(PVD->getType());
+  for (auto* PVD : m_DiffReq->parameters()) {
+    QualType paramTy = PVD->getType();
+    if (utils::isArrayOrPointerType(paramTy) || paramTy->isReferenceType())
+      paramTypes.push_back(getParamAdjointType(paramTy));
+  }
+  QualType vectorDiffFunctionType = m_Context.getFunctionType(
+      m_Context.VoidTy,
+      llvm::ArrayRef<QualType>(paramTypes.data(), paramTypes.size()),
+      // Cast to function pointer.
+      dyn_cast<FunctionProtoType>(m_DiffReq->getType())->getExtProtoInfo());
+
+  // Create the function declaration for the derivative.
+  // FIXME: We should not use const_cast to get the decl context here.
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
+  auto* DC = const_cast<DeclContext*>(m_DiffReq->getDeclContext());
+  m_Sema.CurContext = DC;
+  DeclWithContext result = m_Builder.cloneFunction(
+      m_DiffReq.Function, *this, DC, loc, name, vectorDiffFunctionType);
+  FunctionDecl* vectorDiffFD = result.first;
+  m_Derivative = vectorDiffFD;
+
+  // Function declaration scope
+  llvm::SaveAndRestore<DeclContext*> SaveContext(m_Sema.CurContext);
+  llvm::SaveAndRestore<Scope*> SaveScope(getCurrentScope());
+  beginScope(Scope::FunctionPrototypeScope | Scope::FunctionDeclarationScope |
+             Scope::DeclScope);
+  m_Sema.PushFunctionScope();
+  m_Sema.PushDeclContext(getCurrentScope(), m_Derivative);
+
+  // Create the body of the derivative.
+  beginScope(Scope::FnScope | Scope::DeclScope);
+  m_DerivativeFnScope = getCurrentScope();
+  beginBlock();
+
+  // Count the number of non-array independent variables requested for
+  // differentiation.
+  size_t nonArrayIndVarCount = 0;
+
+  // Set the parameters for the derivative.
+  llvm::SmallVector<ParmVarDecl*, 16> params;
+  llvm::SmallVector<ParmVarDecl*, 16> derivedParams;
+  llvm::SmallVector<DeclStmt*, 8> adjointDecls;
+  for (const auto* PVD : FD->parameters()) {
+    IdentifierInfo* PVDII = PVD->getIdentifier();
+    auto* newPVD = CloneParmVarDecl(PVD, PVDII,
+                                    /*pushOnScopeChains=*/true,
+                                    /*cloneDefaultArg=*/false);
+    params.push_back(newPVD);
+
+    if (!BaseForwardModeVisitor::IsDifferentiableType(PVD->getType()))
+      continue;
+    auto derivedPVDName = "_d_vector_" + std::string(PVDII->getName());
+    IdentifierInfo* derivedPVDII = CreateUniqueIdentifier(derivedPVDName);
+    Expr* derivedExpr = nullptr;
+    if (utils::isArrayOrPointerType(PVD->getType())) {
+      ParmVarDecl* derivedPVD = utils::BuildParmVarDecl(
+          m_Sema, m_Derivative, derivedPVDII,
+          getParamAdjointType(PVD->getType()), PVD->getStorageClass());
+      derivedParams.push_back(derivedPVD);
+      derivedExpr =
+          BuildOp(UO_Deref, BuildDeclRef(derivedPVD), PVD->getBeginLoc());
+      Expr* getSize = BuildCallExprToMemFn(BuildDeclRef(derivedPVD),
+                                           /*MemberFunctionName=*/"rows", {});
+      if (!m_IndVarCountExpr)
+        m_IndVarCountExpr = getSize;
+      else
+        m_IndVarCountExpr =
+            BuildOp(BinaryOperatorKind::BO_Add, m_IndVarCountExpr, getSize);
+    } else if (PVD->getType()->isReferenceType()) {
+      ParmVarDecl* derivedPVD = utils::BuildParmVarDecl(
+          m_Sema, m_Derivative, derivedPVDII,
+          getParamAdjointType(PVD->getType()), PVD->getStorageClass());
+      derivedParams.push_back(derivedPVD);
+      derivedExpr =
+          BuildOp(UO_Deref, BuildDeclRef(derivedPVD), PVD->getBeginLoc());
+      nonArrayIndVarCount += 1;
+    } else {
+      VarDecl* derivedPVD = BuildVarDecl(
+          GetPushForwardDerivativeType(PVD->getType()), derivedPVDII);
+      adjointDecls.push_back(BuildDeclStmt(derivedPVD));
+      derivedExpr = BuildDeclRef(derivedPVD);
+      nonArrayIndVarCount += 1;
+    }
+    m_Variables[newPVD] = derivedExpr;
+  }
+
+  params.insert(params.end(), derivedParams.begin(), derivedParams.end());
+
+  // Process the expression for the number independent variables.
+  // This will be the sum of the sizes of all array parameters and the number
+  // of non-array parameters.
+  Expr* nonArrayIndVarCountExpr = ConstantFolder::synthesizeLiteral(
+      m_Context.UnsignedLongTy, m_Context, nonArrayIndVarCount);
+  if (!m_IndVarCountExpr) {
+    m_IndVarCountExpr = nonArrayIndVarCountExpr;
+  } else if (nonArrayIndVarCount != 0) {
+    m_IndVarCountExpr = BuildOp(BinaryOperatorKind::BO_Add, m_IndVarCountExpr,
+                                nonArrayIndVarCountExpr);
+  }
+
+  vectorDiffFD->setParams(
+      clad_compat::makeArrayRef(params.data(), params.size()));
+  vectorDiffFD->setBody(nullptr);
+
+  // Instantiate a variable indepVarCount to store the total number of
+  // independent variables requested.
+  // size_t indepVarCount = m_IndVarCountExpr;
+  auto* totalIndVars = BuildVarDecl(m_Context.UnsignedLongTy, "indepVarCount",
+                                    m_IndVarCountExpr);
+  addToCurrentBlock(BuildDeclStmt(totalIndVars));
+  m_IndVarCountExpr = BuildDeclRef(totalIndVars);
+
+  for (DeclStmt* decl : adjointDecls)
+    addToCurrentBlock(decl);
+
+  // Expression for maintaining the number of independent variables processed
+  // till now present as array elements. This will be sum of sizes of all such
+  // arrays.
+  Expr* arrayIndVarCountExpr = nullptr;
+
+  // Number of non-array independent variables processed till now.
+  nonArrayIndVarCount = 0;
+
+  // Current Index of independent variable in the param list of the function.
+  size_t independentVarIndex = 0;
+
+  size_t numParamsOriginalFn = m_DiffReq->getNumParams();
+  for (size_t i = 0; i < numParamsOriginalFn; ++i) {
+    bool is_array =
+        utils::isArrayOrPointerType(m_DiffReq->getParamDecl(i)->getType());
+    ParmVarDecl* param = params[i];
+    Expr* paramDiff = m_Variables[param];
+    QualType dParamType = clad::utils::GetValueType(param->getType());
+    // Desugaring the type is necessary to pass it to other templates
+    dParamType = dParamType.getDesugaredType(m_Context);
+    Expr* dVectorParam = nullptr;
+    if (m_DiffReq.DVI.size() > independentVarIndex &&
+        m_DiffReq.DVI[independentVarIndex].param ==
+            m_DiffReq->getParamDecl(i)) {
+      // Current offset for independent variable.
+      Expr* offsetExpr = arrayIndVarCountExpr;
+      Expr* nonArrayIndVarCountExpr = ConstantFolder::synthesizeLiteral(
+          m_Context.UnsignedLongTy, m_Context, nonArrayIndVarCount);
+      if (!offsetExpr)
+        offsetExpr = nonArrayIndVarCountExpr;
+      else if (nonArrayIndVarCount != 0)
+        offsetExpr = BuildOp(BinaryOperatorKind::BO_Add, offsetExpr,
+                             nonArrayIndVarCountExpr);
+
+      if (is_array) {
+        Expr* base = cast<UnaryOperator>(paramDiff)->getSubExpr();
+        // Get size of the array.
+        Expr* getSize = BuildCallExprToMemFn(Clone(base),
+                                             /*MemberFunctionName=*/"rows", {});
+        // Create an identity matrix for the parameter,
+        // with number of rows equal to the size of the array,
+        // and number of columns equal to the number of independent variables
+        llvm::SmallVector<Expr*, 3> args = {getSize, m_IndVarCountExpr,
+                                            offsetExpr};
+        dVectorParam = BuildIdentityMatrixExpr(dParamType, args, loc);
+
+        // Update the array independent expression.
+        if (!arrayIndVarCountExpr)
+          arrayIndVarCountExpr = getSize;
+        else
+          arrayIndVarCountExpr = BuildOp(BinaryOperatorKind::BO_Add,
+                                         arrayIndVarCountExpr, getSize);
+      } else {
+        // Create a one hot vector for the parameter.
+        llvm::SmallVector<Expr*, 2> args = {m_IndVarCountExpr, offsetExpr};
+        dVectorParam = BuildCallExprToCladFunction("one_hot_vector", args,
+                                                   {dParamType}, loc);
+        ++nonArrayIndVarCount;
+      }
+      ++independentVarIndex;
+    } else {
+      // We cannot initialize derived variable for pointer types because
+      // we do not know the correct size.
+      if (is_array)
+        continue;
+      // This parameter is not an independent variable.
+      // Initialize by all zeros.
+      dVectorParam = BuildCallExprToCladFunction(
+          "zero_vector", {m_IndVarCountExpr}, {dParamType}, loc);
+    }
+
+    // For each function arg to be differentiated, create a variable
+    // _d_vector_arg to store the vector of derivatives for that arg.
+    // for ex: double f(double x, double y, double z);
+    // and we want to differentiate w.r.t. x and z, then we will have
+    // -> clad::array<double> _d_vector_x = {1, 0};
+    // -> clad::array<double> _d_vector_y = {0, 0};
+    // -> clad::array<double> _d_vector_z = {0, 1};
+    if (utils::isArrayOrPointerType(param->getType()) ||
+        param->getType()->isReferenceType()) {
+      Expr* paramAssignment =
+          BuildOp(BO_Assign, Clone(paramDiff), dVectorParam);
+      addToCurrentBlock(paramAssignment);
+    } else {
+      auto* paramDecl = cast<VarDecl>(cast<DeclRefExpr>(paramDiff)->getDecl());
+      m_Sema.AddInitializerToDecl(paramDecl, dVectorParam, true);
+      paramDecl->setInitStyle(VarDecl::InitializationStyle::CInit);
+    }
+  }
+
+  // Traverse the function body and generate the derivative.
+  Stmt* BodyDiff = Visit(FD->getBody()).getStmt();
+  for (Stmt* S : cast<CompoundStmt>(BodyDiff)->body())
+    addToCurrentBlock(S);
+
+  Stmt* vectorDiffBody = endBlock();
+  m_Derivative->setBody(vectorDiffBody);
+  endScope(); // Function body scope
+  m_Sema.PopFunctionScopeInfo();
+  m_Sema.PopDeclContext();
+  endScope(); // Function decl scope
+  // Create the overload declaration for the derivative.
+  FunctionDecl* overloadFD = CreateDerivativeOverload();
+  return DerivativeAndOverload{vectorDiffFD, overloadFD};
+}
+
+QualType JacobianModeVisitor::getParamAdjointType(QualType T) {
+  QualType derivedTy = GetPushForwardDerivativeType(T);
+  derivedTy = m_Context.getPointerType(derivedTy.getNonReferenceType());
+  return derivedTy;
+}
+
+StmtDiff JacobianModeVisitor::VisitReturnStmt(const clang::ReturnStmt* RS) {
+  // If there is no return value, we must not attempt to differentiate
+  if (!RS->getRetValue())
+    return nullptr;
+
+  StmtDiff retValDiff = Visit(RS->getRetValue());
+  // This can instantiate as part of the move or copy initialization and
+  // needs a fake source location.
+  SourceLocation fakeLoc = utils::GetValidSLoc(m_Sema);
+  Stmt* returnStmt =
+      m_Sema
+          .ActOnReturnStmt(fakeLoc, retValDiff.getExpr_dx(), getCurrentScope())
+          .get();
+  return StmtDiff(returnStmt);
+}
+} // end namespace clad
diff --git a/lib/Differentiator/JacobianModeVisitor.h b/lib/Differentiator/JacobianModeVisitor.h
new file mode 100644
index 000000000..0a9aca311
--- /dev/null
+++ b/lib/Differentiator/JacobianModeVisitor.h
@@ -0,0 +1,20 @@
+#ifndef CLAD_DIFFERENTIATOR_JACOBIANMODEVISITOR_H
+#define CLAD_DIFFERENTIATOR_JACOBIANMODEVISITOR_H
+
+#include "clad/Differentiator/VectorPushForwardModeVisitor.h"
+
+namespace clad {
+class JacobianModeVisitor : public VectorPushForwardModeVisitor {
+
+public:
+  JacobianModeVisitor(DerivativeBuilder& builder, const DiffRequest& request);
+
+  DerivativeAndOverload DeriveJacobian();
+
+  clang::QualType getParamAdjointType(clang::QualType T);
+
+  StmtDiff VisitReturnStmt(const clang::ReturnStmt* RS) override;
+};
+} // end namespace clad
+
+#endif // CLAD_DIFFERENTIATOR_JACOBIANMODEVISITOR_H
diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp
index 19b902ce1..b90dacb99 100644
--- a/lib/Differentiator/ReverseModeVisitor.cpp
+++ b/lib/Differentiator/ReverseModeVisitor.cpp
@@ -189,160 +189,10 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
     }
   }
 
-  FunctionDecl* ReverseModeVisitor::CreateGradientOverload() {
-    auto gradientParams = m_Derivative->parameters();
-    auto gradientNameInfo = m_Derivative->getNameInfo();
-    // Calculate the total number of parameters that would be required for
-    // automatic differentiation in the derived function if all args are
-    // requested.
-    // FIXME: Here we are assuming all function parameters are of differentiable
-    // type. Ideally, we should not make any such assumption.
-    std::size_t totalDerivedParamsSize = m_DiffReq->getNumParams() * 2;
-    std::size_t numOfDerivativeParams = m_DiffReq->getNumParams();
-
-    // Account for the this pointer.
-    if (isa<CXXMethodDecl>(m_DiffReq.Function) &&
-        !utils::IsStaticMethod(m_DiffReq.Function))
-      ++numOfDerivativeParams;
-    // All output parameters will be of type `void*`. These
-    // parameters will be casted to correct type before the call to the actual
-    // derived function.
-    // We require each output parameter to be of same type in the overloaded
-    // derived function due to limitations of generating the exact derived
-    // function type at the compile-time (without clad plugin help).
-    QualType outputParamType = m_Context.getPointerType(m_Context.VoidTy);
-
-    llvm::SmallVector<QualType, 16> paramTypes;
-
-    // Add types for representing original function parameters.
-    for (auto* PVD : m_DiffReq->parameters())
-      paramTypes.push_back(PVD->getType());
-    // Add types for representing parameter derivatives.
-    // FIXME: We are assuming all function parameters are differentiable. We
-    // should not make any such assumptions.
-    for (std::size_t i = 0; i < numOfDerivativeParams; ++i)
-      paramTypes.push_back(outputParamType);
-
-    auto gradFuncOverloadEPI =
-        dyn_cast<FunctionProtoType>(m_DiffReq->getType())->getExtProtoInfo();
-    QualType gradientFunctionOverloadType =
-        m_Context.getFunctionType(m_Context.VoidTy, paramTypes,
-                                  // Cast to function pointer.
-                                  gradFuncOverloadEPI);
-
-    // FIXME: We should not use const_cast to get the decl context here.
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
-    auto* DC = const_cast<DeclContext*>(m_DiffReq->getDeclContext());
-    m_Sema.CurContext = DC;
-    DeclWithContext gradientOverloadFDWC =
-        m_Builder.cloneFunction(m_DiffReq.Function, *this, DC, noLoc,
-                                gradientNameInfo, gradientFunctionOverloadType);
-    FunctionDecl* gradientOverloadFD = gradientOverloadFDWC.first;
-
-    beginScope(Scope::FunctionPrototypeScope | Scope::FunctionDeclarationScope |
-               Scope::DeclScope);
-    m_Sema.PushFunctionScope();
-    m_Sema.PushDeclContext(getCurrentScope(), gradientOverloadFD);
-
-    llvm::SmallVector<ParmVarDecl*, 4> overloadParams;
-    llvm::SmallVector<Expr*, 4> callArgs;
-
-    overloadParams.reserve(totalDerivedParamsSize);
-    callArgs.reserve(gradientParams.size());
-
-    for (auto* PVD : m_DiffReq->parameters()) {
-      auto* VD = utils::BuildParmVarDecl(
-          m_Sema, gradientOverloadFD, PVD->getIdentifier(), PVD->getType(),
-          PVD->getStorageClass(), /*defArg=*/nullptr, PVD->getTypeSourceInfo());
-      overloadParams.push_back(VD);
-      callArgs.push_back(BuildDeclRef(VD));
-    }
-
-    for (std::size_t i = 0; i < numOfDerivativeParams; ++i) {
-      IdentifierInfo* II = nullptr;
-      StorageClass SC = StorageClass::SC_None;
-      std::size_t effectiveGradientIndex = m_DiffReq->getNumParams() + i;
-      // `effectiveGradientIndex < gradientParams.size()` implies that this
-      // parameter represents an actual derivative of one of the function
-      // original parameters.
-      if (effectiveGradientIndex < gradientParams.size()) {
-        auto* GVD = gradientParams[effectiveGradientIndex];
-        II = CreateUniqueIdentifier("_temp_" + GVD->getNameAsString());
-        SC = GVD->getStorageClass();
-      } else {
-        II = CreateUniqueIdentifier("_d_" + std::to_string(i));
-      }
-      auto* PVD = utils::BuildParmVarDecl(m_Sema, gradientOverloadFD, II,
-                                          outputParamType, SC);
-      overloadParams.push_back(PVD);
-    }
-
-    for (auto* PVD : overloadParams)
-      if (PVD->getIdentifier())
-        m_Sema.PushOnScopeChains(PVD, getCurrentScope(),
-                                 /*AddToContext=*/false);
-
-    gradientOverloadFD->setParams(overloadParams);
-    gradientOverloadFD->setBody(/*B=*/nullptr);
-
-    beginScope(Scope::FnScope | Scope::DeclScope);
-    m_DerivativeFnScope = getCurrentScope();
-    beginBlock();
-
-    // Build derivatives to be used in the call to the actual derived function.
-    // These are initialised by effectively casting the derivative parameters of
-    // overloaded derived function to the correct type.
-    for (std::size_t i = m_DiffReq->getNumParams(); i < gradientParams.size();
-         ++i) {
-      auto* overloadParam = overloadParams[i];
-      auto* gradientParam = gradientParams[i];
-      TypeSourceInfo* typeInfo =
-          m_Context.getTrivialTypeSourceInfo(gradientParam->getType());
-      SourceLocation fakeLoc = utils::GetValidSLoc(m_Sema);
-      auto* init = m_Sema
-                       .BuildCStyleCastExpr(fakeLoc, typeInfo, fakeLoc,
-                                            BuildDeclRef(overloadParam))
-                       .get();
-
-      auto* gradientVD = BuildGlobalVarDecl(gradientParam->getType(),
-                                            gradientParam->getName(), init);
-      callArgs.push_back(BuildDeclRef(gradientVD));
-      addToCurrentBlock(BuildDeclStmt(gradientVD));
-    }
-
-    // If the function is a global kernel, we need to transform it
-    // into a device function when calling it inside the overload function
-    // which is the final global kernel returned.
-    if (m_Derivative->hasAttr<clang::CUDAGlobalAttr>()) {
-      m_Derivative->dropAttr<clang::CUDAGlobalAttr>();
-      m_Derivative->addAttr(clang::CUDADeviceAttr::CreateImplicit(m_Context));
-    }
-
-    Expr* callExpr = BuildCallExprToFunction(m_Derivative, callArgs,
-                                             /*UseRefQualifiedThisObj=*/true);
-    addToCurrentBlock(callExpr);
-    Stmt* gradientOverloadBody = endBlock();
-
-    gradientOverloadFD->setBody(gradientOverloadBody);
-
-    endScope(); // Function body scope
-    m_Sema.PopFunctionScopeInfo();
-    m_Sema.PopDeclContext();
-    endScope(); // Function decl scope
-
-    return gradientOverloadFD;
-  }
-
   DerivativeAndOverload ReverseModeVisitor::Derive() {
     const FunctionDecl* FD = m_DiffReq.Function;
     if (m_ExternalSource)
       m_ExternalSource->ActOnStartOfDerive();
-
-    // FIXME: reverse mode plugins may have request mode other than
-    // `DiffMode::reverse`, but they still need the `DiffMode::reverse` mode
-    // specific behaviour, because they are "reverse" mode plugins.
-    // assert(m_DiffReq.Mode == DiffMode::reverse ||
-    //        m_DiffReq.Mode == DiffMode::jacobian && "Unexpected Mode.");
     if (m_DiffReq.Mode == DiffMode::error_estimation)
       // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
       const_cast<DiffRequest&>(m_DiffReq).Mode = DiffMode::reverse;
@@ -362,29 +212,12 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
 
     if (m_ExternalSource)
       m_ExternalSource->ActAfterParsingDiffArgs(m_DiffReq, args);
-    // Save the type of the output parameter(s) that is add by clad to the
-    // derived function
-    if (m_DiffReq.Mode == DiffMode::jacobian) {
-      unsigned lastArgN = m_DiffReq->getNumParams() - 1;
-      outputArrayStr = m_DiffReq->getParamDecl(lastArgN)->getNameAsString();
-    }
 
     auto derivativeBaseName = m_DiffReq.BaseFunctionName;
     std::string gradientName = derivativeBaseName + funcPostfix();
     // To be consistent with older tests, nothing is appended to 'f_grad' if
     // we differentiate w.r.t. all the parameters at once.
-    if (m_DiffReq.Mode == DiffMode::jacobian) {
-      // If Jacobian is asked, the last parameter is the result parameter
-      // and should be ignored
-      if (args.size() != FD->getNumParams()-1){
-        for (const auto* arg : args) {
-          const auto* const it =
-              std::find(FD->param_begin(), FD->param_end() - 1, arg);
-          auto idx = std::distance(FD->param_begin(), it);
-          gradientName += ('_' + std::to_string(idx));
-        }
-      }
-    } else if (args.size() != FD->getNumParams()) {
+    if (args.size() != FD->getNumParams()) {
       for (const auto* arg : args) {
         const auto* it = std::find(FD->param_begin(), FD->param_end(), arg);
         auto idx = std::distance(FD->param_begin(), it);
@@ -411,7 +244,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
     bool shouldCreateOverload = false;
     // FIXME: Gradient overload doesn't know how to handle additional parameters
     // added by the plugins yet.
-    if (m_DiffReq.Mode != DiffMode::jacobian && numExtraParam == 0)
+    if (numExtraParam == 0)
       shouldCreateOverload = true;
     if (!m_DiffReq.DeclarationOnly && !m_DiffReq.DerivedFDPrototypes.empty())
       // If the overload is already created, we don't need to create it again.
@@ -440,7 +273,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
       m_Derivative = customDerivative;
       FunctionDecl* gradientOverloadFD = nullptr;
       if (shouldCreateOverload)
-        gradientOverloadFD = CreateGradientOverload();
+        gradientOverloadFD = CreateDerivativeOverload();
       return DerivativeAndOverload{customDerivative, gradientOverloadFD};
     }
 
@@ -484,37 +317,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
     gradientFD->setBody(nullptr);
 
     if (!m_DiffReq.DeclarationOnly) {
-      if (m_DiffReq.Mode == DiffMode::jacobian) {
-        // Reference to the output parameter.
-        m_Result = BuildDeclRef(params.back());
-        numParams = args.size();
-
-        // Creates the ArraySubscriptExprs for the independent variables
-        size_t idx = 0;
-        for (const auto* arg : args) {
-          // FIXME: fix when adding array inputs, now we are just skipping all
-          // array/pointer inputs (not treating them as independent variables).
-          if (utils::isArrayOrPointerType(arg->getType())) {
-            if (arg->getName() == "p")
-              m_Variables[arg] = m_Result;
-            idx += 1;
-            continue;
-          }
-          auto size_type = m_Context.getSizeType();
-          unsigned size_type_bits = m_Context.getIntWidth(size_type);
-          // Create the idx literal.
-          auto* i = IntegerLiteral::Create(
-              m_Context, llvm::APInt(size_type_bits, idx), size_type, noLoc);
-          // Create the jacobianMatrix[idx] expression.
-          auto* result_at_i =
-              m_Sema.CreateBuiltinArraySubscriptExpr(m_Result, noLoc, i, noLoc)
-                  .get();
-          m_Variables[arg] = result_at_i;
-          idx += 1;
-          m_IndependentVars.push_back(arg);
-        }
-      }
-
       if (m_ExternalSource)
         m_ExternalSource->ActBeforeCreatingDerivedFnBodyScope();
 
@@ -550,8 +352,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
 
     FunctionDecl* gradientOverloadFD = nullptr;
     if (shouldCreateOverload) {
-      gradientOverloadFD =
-          CreateGradientOverload();
+      gradientOverloadFD = CreateDerivativeOverload();
     }
 
     return DerivativeAndOverload{result.first, gradientOverloadFD};
@@ -701,10 +502,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
       // derived variables are already created for independent variables.
       if (m_Variables.count(param))
         continue;
-      // in vector mode last non diff parameter is output parameter.
-      if (m_DiffReq.Mode == DiffMode::jacobian &&
-          i == m_DiffReq->getNumParams() - 1)
-        continue;
       auto VDDerivedType = getNonConstType(param->getType(), m_Context, m_Sema);
       // We cannot initialize derived variable for pointer types because
       // we do not know the correct size.
@@ -1610,48 +1407,31 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
       if (DRE->getDecl()->getType()->isReferenceType() &&
           VD->getType()->isPointerType())
         clonedDRE = BuildOp(UO_Deref, clonedDRE);
-      if (m_DiffReq.Mode == DiffMode::jacobian) {
-        if (m_VectorOutput.size() <= outputArrayCursor)
-          return StmtDiff(clonedDRE);
-
-        auto it = m_VectorOutput[outputArrayCursor].find(VD);
-        if (it == std::end(m_VectorOutput[outputArrayCursor]))
-          return StmtDiff(clonedDRE); // Not an independent variable, ignored.
-
-        // Create the (jacobianMatrix[idx] += dfdx) statement.
-        if (dfdx()) {
-          auto* add_assign = BuildOp(BO_AddAssign, it->second, dfdx());
-          // Add it to the body statements.
-          addToCurrentBlock(add_assign, direction::reverse);
-        }
-        return StmtDiff(clonedDRE, it->second, it->second);
-      } else {
-        // Check DeclRefExpr is a reference to an independent variable.
-        auto it = m_Variables.find(VD);
-        if (it == std::end(m_Variables)) {
-          // Is not an independent variable, ignored.
-          return StmtDiff(clonedDRE);
-        }
-        // Create the (_d_param[idx] += dfdx) statement.
-        if (dfdx()) {
-          // FIXME: not sure if this is generic.
-          // Don't update derivatives of record types.
-          if (!VD->getType()->isRecordType()) {
-            Expr* base = it->second;
-            if (auto* UO = dyn_cast<UnaryOperator>(it->second))
-              base = UO->getSubExpr()->IgnoreImpCasts();
-            if (shouldUseCudaAtomicOps(base)) {
-              Expr* atomicCall = BuildCallToCudaAtomicAdd(it->second, dfdx());
-              // Add it to the body statements.
-              addToCurrentBlock(atomicCall, direction::reverse);
-            } else {
-              auto* add_assign = BuildOp(BO_AddAssign, it->second, dfdx());
-              addToCurrentBlock(add_assign, direction::reverse);
-            }
+      // Check DeclRefExpr is a reference to an independent variable.
+      auto it = m_Variables.find(VD);
+      if (it == std::end(m_Variables)) {
+        // Is not an independent variable, ignored.
+        return StmtDiff(clonedDRE);
+      }
+      // Create the (_d_param[idx] += dfdx) statement.
+      if (dfdx()) {
+        // FIXME: not sure if this is generic.
+        // Don't update derivatives of record types.
+        if (!VD->getType()->isRecordType()) {
+          Expr* base = it->second;
+          if (auto* UO = dyn_cast<UnaryOperator>(it->second))
+            base = UO->getSubExpr()->IgnoreImpCasts();
+          if (shouldUseCudaAtomicOps(base)) {
+            Expr* atomicCall = BuildCallToCudaAtomicAdd(it->second, dfdx());
+            // Add it to the body statements.
+            addToCurrentBlock(atomicCall, direction::reverse);
+          } else {
+            auto* add_assign = BuildOp(BO_AddAssign, it->second, dfdx());
+            addToCurrentBlock(add_assign, direction::reverse);
           }
         }
-        return StmtDiff(clonedDRE, it->second, it->second);
       }
+      return StmtDiff(clonedDRE, it->second, it->second);
     }
 
     return StmtDiff(clonedDRE);
@@ -2720,66 +2500,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
         return BuildOp(opCode, LExpr, RExpr);
       }
 
-      // FIXME: Put this code into a separate subroutine and break out early
-      // using return if the diff mode is not jacobian and we are not dealing
-      // with the `outputArray`.
-      if (auto* ASE = dyn_cast<ArraySubscriptExpr>(L)) {
-        if (auto* DRE =
-                dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreImplicit())) {
-          auto type = QualType(DRE->getType()->getPointeeOrArrayElementType(),
-                               /*Quals=*/0);
-          std::string DRE_str = DRE->getDecl()->getNameAsString();
-
-          llvm::APSInt intIdx;
-          Expr::EvalResult res;
-          Expr::SideEffectsKind AllowSideEffects =
-              Expr::SideEffectsKind::SE_NoSideEffects;
-          auto isIdxValid =
-              ASE->getIdx()->EvaluateAsInt(res, m_Context, AllowSideEffects);
-
-          if (DRE_str == outputArrayStr && isIdxValid) {
-            intIdx = res.Val.getInt();
-            if (m_DiffReq.Mode == DiffMode::jacobian) {
-              outputArrayCursor = intIdx.getExtValue();
-
-              std::unordered_map<const clang::ValueDecl*, clang::Expr*>
-                  temp_m_Variables;
-              for (unsigned i = 0; i < numParams; i++) {
-                auto size_type = m_Context.getSizeType();
-                unsigned size_type_bits = m_Context.getIntWidth(size_type);
-                llvm::APInt idxValue(size_type_bits,
-                                     i + (outputArrayCursor * numParams));
-                auto* idx = IntegerLiteral::Create(m_Context, idxValue,
-                                                   size_type, noLoc);
-                // Create the jacobianMatrix[idx] expression.
-                auto* result_at_i = m_Sema
-                                        .CreateBuiltinArraySubscriptExpr(
-                                            m_Result, noLoc, idx, noLoc)
-                                        .get();
-                temp_m_Variables[m_IndependentVars[i]] = result_at_i;
-              }
-              if (m_VectorOutput.size() <= outputArrayCursor)
-                m_VectorOutput.resize(outputArrayCursor + 1);
-              m_VectorOutput[outputArrayCursor] = std::move(temp_m_Variables);
-            }
-
-            auto* dfdf = ConstantFolder::synthesizeLiteral(m_Context.IntTy,
-                                                           m_Context, 1);
-            ExprResult tmp = dfdf;
-            dfdf = m_Sema
-                       .ImpCastExprToType(tmp.get(), type,
-                                          m_Sema.PrepareScalarCast(tmp, type))
-                       .get();
-            auto ReturnResult = DifferentiateSingleExpr(R, dfdf);
-            StmtDiff ReturnDiff = ReturnResult.first;
-            Stmt* Reverse = ReturnDiff.getStmt_dx();
-            addToCurrentBlock(Reverse, direction::reverse);
-            for (Stmt* S : cast<CompoundStmt>(ReturnDiff.getStmt())->body())
-              addToCurrentBlock(S, direction::forward);
-          }
-        }
-      }
-
       // Visit LHS, but delay emission of its derivative statements, save them
       // in Lblock
       beginBlock(direction::reverse);
@@ -4730,11 +4450,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
         if (it != std::end(diffParams))
           paramTypes.push_back(ComputeParamType(PVD->getType()));
       }
-    } else if (m_DiffReq.Mode == DiffMode::jacobian) {
-      std::size_t lastArgIdx = m_DiffReq->getNumParams() - 1;
-      QualType derivativeParamType =
-          m_DiffReq->getParamDecl(lastArgIdx)->getType();
-      paramTypes.push_back(derivativeParamType);
     }
     return paramTypes;
   }
@@ -4756,8 +4471,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
 
     if (const auto* MD = dyn_cast<CXXMethodDecl>(m_DiffReq.Function)) {
       const CXXRecordDecl* RD = MD->getParent();
-      if (m_DiffReq.Mode != DiffMode::jacobian && MD->isInstance() &&
-          !RD->isLambda()) {
+      if (MD->isInstance() && !RD->isLambda()) {
         auto* thisDerivativePVD = utils::BuildParmVarDecl(
             m_Sema, m_Derivative, CreateUniqueIdentifier("_d_this"),
             derivativeFnType->getParamType(dParamTypesIdx));
@@ -4839,18 +4553,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
       ++dParamTypesIdx;
     }
 
-    if (m_DiffReq.Mode == DiffMode::jacobian) {
-      IdentifierInfo* II = CreateUniqueIdentifier("jacobianMatrix");
-      // FIXME: Why are we taking storageClass of `params.front()`?
-      auto* dPVD = utils::BuildParmVarDecl(
-          m_Sema, m_Derivative, II,
-          derivativeFnType->getParamType(dParamTypesIdx),
-          params.front()->getStorageClass());
-      paramDerivatives.push_back(dPVD);
-      if (dPVD->getIdentifier())
-        m_Sema.PushOnScopeChains(dPVD, getCurrentScope(),
-                                 /*AddToContext=*/false);
-    }
     params.insert(params.end(), paramDerivatives.begin(),
                   paramDerivatives.end());
     // FIXME: If we do not consider diffParams as an independent argument for
diff --git a/lib/Differentiator/VectorForwardModeVisitor.cpp b/lib/Differentiator/VectorForwardModeVisitor.cpp
index 1c9ff820d..8199957de 100644
--- a/lib/Differentiator/VectorForwardModeVisitor.cpp
+++ b/lib/Differentiator/VectorForwardModeVisitor.cpp
@@ -26,6 +26,8 @@ DiffMode VectorForwardModeVisitor::GetPushForwardMode() {
 
 QualType
 VectorForwardModeVisitor::GetPushForwardDerivativeType(QualType ParamType) {
+  if (ParamType == m_Context.VoidTy)
+    return ParamType;
   QualType valueType = utils::GetValueType(ParamType);
   QualType resType;
   if (utils::isArrayOrPointerType(ParamType)) {
diff --git a/lib/Differentiator/VisitorBase.cpp b/lib/Differentiator/VisitorBase.cpp
index 0e6453625..87c55b4cd 100644
--- a/lib/Differentiator/VisitorBase.cpp
+++ b/lib/Differentiator/VisitorBase.cpp
@@ -902,4 +902,149 @@ namespace clad {
   VisitorBase::GetCladConstructorReverseForwTagOfType(clang::QualType T) {
     return InstantiateTemplate(GetCladConstructorReverseForwTag(), {T});
   }
+
+  FunctionDecl* VisitorBase::CreateDerivativeOverload() {
+    auto diffParams = m_Derivative->parameters();
+    auto diffNameInfo = m_Derivative->getNameInfo();
+    // Calculate the total number of parameters that would be required for
+    // automatic differentiation in the derived function if all args are
+    // requested.
+    // FIXME: Here we are assuming all function parameters are of differentiable
+    // type. Ideally, we should not make any such assumption.
+    std::size_t totalDerivedParamsSize = m_DiffReq->getNumParams() * 2;
+    std::size_t numOfDerivativeParams = m_DiffReq->getNumParams();
+
+    // Account for the this pointer.
+    if (isa<CXXMethodDecl>(m_DiffReq.Function) &&
+        !utils::IsStaticMethod(m_DiffReq.Function) &&
+        (!m_DiffReq.Functor || m_DiffReq.Mode != DiffMode::jacobian))
+      ++numOfDerivativeParams;
+    // All output parameters will be of type `void*`. These
+    // parameters will be casted to correct type before the call to the actual
+    // derived function.
+    // We require each output parameter to be of same type in the overloaded
+    // derived function due to limitations of generating the exact derived
+    // function type at the compile-time (without clad plugin help).
+    QualType outputParamType = m_Context.getPointerType(m_Context.VoidTy);
+
+    llvm::SmallVector<QualType, 16> paramTypes;
+
+    // Add types for representing original function parameters.
+    for (auto* PVD : m_DiffReq->parameters())
+      paramTypes.push_back(PVD->getType());
+    // Add types for representing parameter derivatives.
+    // FIXME: We are assuming all function parameters are differentiable. We
+    // should not make any such assumptions.
+    for (std::size_t i = 0; i < numOfDerivativeParams; ++i)
+      paramTypes.push_back(outputParamType);
+
+    auto diffFuncOverloadEPI =
+        dyn_cast<FunctionProtoType>(m_DiffReq->getType())->getExtProtoInfo();
+    QualType diffFunctionOverloadType =
+        m_Context.getFunctionType(m_Context.VoidTy, paramTypes,
+                                  // Cast to function pointer.
+                                  diffFuncOverloadEPI);
+
+    // FIXME: We should not use const_cast to get the decl context here.
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
+    auto* DC = const_cast<DeclContext*>(m_DiffReq->getDeclContext());
+    m_Sema.CurContext = DC;
+    DeclWithContext diffOverloadFDWC =
+        m_Builder.cloneFunction(m_DiffReq.Function, *this, DC, noLoc,
+                                diffNameInfo, diffFunctionOverloadType);
+    FunctionDecl* diffOverloadFD = diffOverloadFDWC.first;
+
+    beginScope(Scope::FunctionPrototypeScope | Scope::FunctionDeclarationScope |
+               Scope::DeclScope);
+    m_Sema.PushFunctionScope();
+    m_Sema.PushDeclContext(getCurrentScope(), diffOverloadFD);
+
+    llvm::SmallVector<ParmVarDecl*, 4> overloadParams;
+    llvm::SmallVector<Expr*, 4> callArgs;
+
+    overloadParams.reserve(totalDerivedParamsSize);
+    callArgs.reserve(diffParams.size());
+
+    for (auto* PVD : m_DiffReq->parameters()) {
+      auto* VD = utils::BuildParmVarDecl(
+          m_Sema, diffOverloadFD, PVD->getIdentifier(), PVD->getType(),
+          PVD->getStorageClass(), /*defArg=*/nullptr, PVD->getTypeSourceInfo());
+      overloadParams.push_back(VD);
+      callArgs.push_back(BuildDeclRef(VD));
+    }
+
+    for (std::size_t i = 0; i < numOfDerivativeParams; ++i) {
+      IdentifierInfo* II = nullptr;
+      StorageClass SC = StorageClass::SC_None;
+      std::size_t effectiveDiffIndex = m_DiffReq->getNumParams() + i;
+      // `effectiveDiffIndex < diffParams.size()` implies that this
+      // parameter represents an actual derivative of one of the function
+      // original parameters.
+      if (effectiveDiffIndex < diffParams.size()) {
+        auto* GVD = diffParams[effectiveDiffIndex];
+        II = CreateUniqueIdentifier("_temp_" + GVD->getNameAsString());
+        SC = GVD->getStorageClass();
+      } else {
+        II = CreateUniqueIdentifier("_d_" + std::to_string(i));
+      }
+      auto* PVD = utils::BuildParmVarDecl(m_Sema, diffOverloadFD, II,
+                                          outputParamType, SC);
+      overloadParams.push_back(PVD);
+    }
+
+    for (auto* PVD : overloadParams)
+      if (PVD->getIdentifier())
+        m_Sema.PushOnScopeChains(PVD, getCurrentScope(),
+                                 /*AddToContext=*/false);
+
+    diffOverloadFD->setParams(overloadParams);
+    diffOverloadFD->setBody(/*B=*/nullptr);
+
+    beginScope(Scope::FnScope | Scope::DeclScope);
+    m_DerivativeFnScope = getCurrentScope();
+    beginBlock();
+
+    // Build derivatives to be used in the call to the actual derived function.
+    // These are initialised by effectively casting the derivative parameters of
+    // overloaded derived function to the correct type.
+    for (std::size_t i = m_DiffReq->getNumParams(); i < diffParams.size();
+         ++i) {
+      auto* overloadParam = overloadParams[i];
+      auto* diffParam = diffParams[i];
+      TypeSourceInfo* typeInfo =
+          m_Context.getTrivialTypeSourceInfo(diffParam->getType());
+      SourceLocation fakeLoc = utils::GetValidSLoc(m_Sema);
+      auto* init = m_Sema
+                       .BuildCStyleCastExpr(fakeLoc, typeInfo, fakeLoc,
+                                            BuildDeclRef(overloadParam))
+                       .get();
+
+      auto* diffVD =
+          BuildGlobalVarDecl(diffParam->getType(), diffParam->getName(), init);
+      callArgs.push_back(BuildDeclRef(diffVD));
+      addToCurrentBlock(BuildDeclStmt(diffVD));
+    }
+
+    // If the function is a global kernel, we need to transform it
+    // into a device function when calling it inside the overload function
+    // which is the final global kernel returned.
+    if (m_Derivative->hasAttr<clang::CUDAGlobalAttr>()) {
+      m_Derivative->dropAttr<clang::CUDAGlobalAttr>();
+      m_Derivative->addAttr(clang::CUDADeviceAttr::CreateImplicit(m_Context));
+    }
+
+    Expr* callExpr = BuildCallExprToFunction(m_Derivative, callArgs,
+                                             /*useRefQualifiedThisObj=*/true);
+    addToCurrentBlock(callExpr);
+    Stmt* diffOverloadBody = endBlock();
+
+    diffOverloadFD->setBody(diffOverloadBody);
+
+    endScope(); // Function body scope
+    m_Sema.PopFunctionScopeInfo();
+    m_Sema.PopDeclContext();
+    endScope(); // Function decl scope
+
+    return diffOverloadFD;
+  }
 } // end namespace clad
diff --git a/test/FirstDerivative/BuiltinDerivatives.C b/test/FirstDerivative/BuiltinDerivatives.C
index 0ff3b6e62..95c658204 100644
--- a/test/FirstDerivative/BuiltinDerivatives.C
+++ b/test/FirstDerivative/BuiltinDerivatives.C
@@ -99,7 +99,7 @@ float f7(float x) {
 
 // CHECK: float f7_darg0(float x) {
 // CHECK-NEXT:     float _d_x = 1;
-// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<decltype(::std::pow(float(), double())), decltype(::std::pow(float(), double()))> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, 2., _d_x, 0.);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<double, double> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, 2., _d_x, 0.);
 // CHECK-NEXT:     return _t0.pushforward;
 // CHECK-NEXT: }
 
@@ -120,7 +120,7 @@ double f8(float x) {
 
 // CHECK: double f8_darg0(float x) {
 // CHECK-NEXT:     float _d_x = 1;
-// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, 2, _d_x, 0);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<double, double> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, 2, _d_x, 0);
 // CHECK-NEXT:     return _t0.pushforward;
 // CHECK-NEXT: }
 
@@ -142,7 +142,7 @@ float f9(float x, float y) {
 // CHECK: float f9_darg0(float x, float y) {
 // CHECK-NEXT:     float _d_x = 1;
 // CHECK-NEXT:     float _d_y = 0;
-// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x, _d_y);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x, _d_y);
 // CHECK-NEXT:     return _t0.pushforward;
 // CHECK-NEXT: }
 
@@ -165,7 +165,7 @@ double f10(float x, int y) {
 // CHECK: double f10_darg0(float x, int y) {
 // CHECK-NEXT:     float _d_x = 1;
 // CHECK-NEXT:     int _d_y = 0;
-// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x, _d_y);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<double, double> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x, _d_y);
 // CHECK-NEXT:     return _t0.pushforward;
 // CHECK-NEXT: }
 
diff --git a/test/ForwardMode/UserDefinedTypes.C b/test/ForwardMode/UserDefinedTypes.C
index fc7d6c865..0acd9a2e1 100644
--- a/test/ForwardMode/UserDefinedTypes.C
+++ b/test/ForwardMode/UserDefinedTypes.C
@@ -1167,7 +1167,7 @@ int main() {
 // CHECK-NEXT:     {
 // CHECK-NEXT:         unsigned int _d_i = 0;
 // CHECK-NEXT:         for (unsigned int i = 0; i < 5U; ++i) {
-// CHECK-NEXT:             ValueAndPushforward<decltype(::std::pow(double(), double())), decltype(::std::pow(double(), double()))> _t0 = clad::custom_derivatives::pow_pushforward(a.data[i], b.data[i], _d_a.data[i], _d_b.data[i]);
+// CHECK-NEXT:             {{(clad::)?}}ValueAndPushforward<double, double> _t0 = clad::custom_derivatives::pow_pushforward(a.data[i], b.data[i], _d_a.data[i], _d_b.data[i]);
 // CHECK-NEXT:             _d_res.data[i] = _t0.pushforward;
 // CHECK-NEXT:             res.data[i] = _t0.value;
 // CHECK-NEXT:         }
diff --git a/test/Hessian/BuiltinDerivatives.C b/test/Hessian/BuiltinDerivatives.C
index 942175b4f..b4d12a240 100644
--- a/test/Hessian/BuiltinDerivatives.C
+++ b/test/Hessian/BuiltinDerivatives.C
@@ -266,17 +266,17 @@ int main() {
 
 // CHECK: float f4_darg0(float x) {
 // CHECK-NEXT:     float _d_x = 1;
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, 4.F, _d_x, 0.F);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, 4.F, _d_x, 0.F);
 // CHECK-NEXT:     return _t0.pushforward;
 // CHECK-NEXT: }
 
-// CHECK: void pow_pushforward_pullback(float x, float exponent, float d_x, float d_exponent, ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _d_y, float *_d_x, float *_d_exponent, float *_d_d_x, float *_d_d_exponent);
+// CHECK: void pow_pushforward_pullback(float x, float exponent, float d_x, float d_exponent, ValueAndPushforward<float, float> _d_y, float *_d_x, float *_d_exponent, float *_d_d_x, float *_d_d_exponent);
 
 // CHECK: void f4_darg0_grad(float x, float *_d_x) {
 // CHECK-NEXT:     float _d__d_x = 0.F;
 // CHECK-NEXT:     float _d_x0 = 1;
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _d__t0 = {};
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _t00 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, 4.F, _d_x0, 0.F);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _d__t0 = {};
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _t00 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, 4.F, _d_x0, 0.F);
 // CHECK-NEXT:     _d__t0.pushforward += 1;
 // CHECK-NEXT:     {
 // CHECK-NEXT:         {{.*}} _r0 = {};
@@ -293,15 +293,15 @@ int main() {
 
 // CHECK: float f5_darg0(float x) {
 // CHECK-NEXT:     float _d_x = 1;
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(2.F, x, 0.F, _d_x);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(2.F, x, 0.F, _d_x);
 // CHECK-NEXT:     return _t0.pushforward;
 // CHECK-NEXT: }
 
 // CHECK: void f5_darg0_grad(float x, float *_d_x) {
 // CHECK-NEXT:     float _d__d_x = 0.F;
 // CHECK-NEXT:     float _d_x0 = 1;
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _d__t0 = {};
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _t00 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(2.F, x, 0.F, _d_x0);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _d__t0 = {};
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _t00 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(2.F, x, 0.F, _d_x0);
 // CHECK-NEXT:     _d__t0.pushforward += 1;
 // CHECK-NEXT:     {
 // CHECK-NEXT:         {{.*}} _r0 = {};
@@ -319,7 +319,7 @@ int main() {
 // CHECK: float f6_darg0(float x, float y) {
 // CHECK-NEXT:     float _d_x = 1;
 // CHECK-NEXT:     float _d_y = 0;
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x, _d_y);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x, _d_y);
 // CHECK-NEXT:     return _t0.pushforward;
 // CHECK-NEXT: }
 
@@ -328,8 +328,8 @@ int main() {
 // CHECK-NEXT:     float _d_x0 = 1;
 // CHECK-NEXT:     float _d__d_y = 0.F;
 // CHECK-NEXT:     float _d_y0 = 0;
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _d__t0 = {};
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _t00 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x0, _d_y0);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _d__t0 = {};
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _t00 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x0, _d_y0);
 // CHECK-NEXT:     _d__t0.pushforward += 1;
 // CHECK-NEXT:     {
 // CHECK-NEXT:         {{.*}} _r0 = {};
@@ -349,7 +349,7 @@ int main() {
 // CHECK: float f6_darg1(float x, float y) {
 // CHECK-NEXT:     float _d_x = 0;
 // CHECK-NEXT:     float _d_y = 1;
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x, _d_y);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _t0 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x, _d_y);
 // CHECK-NEXT:     return _t0.pushforward;
 // CHECK-NEXT: }
 
@@ -358,8 +358,8 @@ int main() {
 // CHECK-NEXT:     float _d_x0 = 0;
 // CHECK-NEXT:     float _d__d_y = 0.F;
 // CHECK-NEXT:     float _d_y0 = 1;
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _d__t0 = {};
-// CHECK-NEXT:     ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _t00 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x0, _d_y0);
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _d__t0 = {};
+// CHECK-NEXT:     {{(clad::)?}}ValueAndPushforward<float, float> _t00 = clad::custom_derivatives{{(::std)?}}::pow_pushforward(x, y, _d_x0, _d_y0);
 // CHECK-NEXT:     _d__t0.pushforward += 1;
 // CHECK-NEXT:     {
 // CHECK-NEXT:         {{.*}} _r0 = {};
@@ -434,7 +434,7 @@ int main() {
 // CHECK-NEXT:     }
 // CHECK-NEXT: }
 
-// CHECK: void pow_pushforward_pullback(float x, float exponent, float d_x, float d_exponent, ValueAndPushforward<decltype(::std::pow(float(), float())), decltype(::std::pow(float(), float()))> _d_y, float *_d_x, float *_d_exponent, float *_d_d_x, float *_d_d_exponent) {
+// CHECK: void pow_pushforward_pullback(float x, float exponent, float d_x, float d_exponent, ValueAndPushforward<float, float> _d_y, float *_d_x, float *_d_exponent, float *_d_d_x, float *_d_d_exponent) {
 // CHECK-NEXT:     bool _cond0;
 // CHECK-NEXT:     float _t1;
 // CHECK-NEXT:     float _t2;
diff --git a/test/Jacobian/FunctionCalls.C b/test/Jacobian/FunctionCalls.C
index da7d5420c..f46fb1931 100644
--- a/test/Jacobian/FunctionCalls.C
+++ b/test/Jacobian/FunctionCalls.C
@@ -6,38 +6,52 @@
 #include <cmath>
 #include "clad/Differentiator/Differentiator.h"
 
-double outputs[4], results[4];
+double outputs[4];
+clad::matrix<double> results(2, 2);
 
 void fn1(double i, double j, double* output) {
   output[0] = std::pow(i, j);
   output[1] = std::pow(j, i);
 }
 
-// CHECK: void fn1_jac(double i, double j, double *output, double *jacobianMatrix) {
-// CHECK-NEXT:     double _t0 = output[0];
-// CHECK-NEXT:     output[0] = std::pow(i, j);
-// CHECK-NEXT:     double _t1 = output[1];
-// CHECK-NEXT:     output[1] = std::pow(j, i);
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             double _r2 = 0.;
-// CHECK-NEXT:             double _r3 = 0.;
-// CHECK-NEXT:             clad::custom_derivatives::pow_pullback(j, i, 1, &_r2, &_r3);
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += _r2;
-// CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += _r3;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             double _r0 = 0.;
-// CHECK-NEXT:             double _r1 = 0.;
-// CHECK-NEXT:             clad::custom_derivatives::pow_pullback(i, j, 1, &_r0, &_r1);
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += _r0;
-// CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += _r1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[0] = _t0;
-// CHECK-NEXT:     }
+// CHECK: void fn1_jac(double i, double j, double *output, clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     {{.*}} _t0 = clad::custom_derivatives::pow_pushforward(i, j, _d_vector_i, _d_vector_j);
+// CHECK-NEXT:     *_d_vector_output[0] = _t0.pushforward;
+// CHECK-NEXT:     output[0] = _t0.value;
+// CHECK-NEXT:     {{.*}} _t1 = clad::custom_derivatives::pow_pushforward(j, i, _d_vector_j, _d_vector_i);
+// CHECK-NEXT:     *_d_vector_output[1] = _t1.pushforward;
+// CHECK-NEXT:     output[1] = _t1.value;
+// CHECK-NEXT: }
+
+double add(double a, double b) { return a + b ;}
+
+// CHECK: clad::ValueAndPushforward<double, clad::array<double> > add_vector_pushforward(double a, double b, clad::array<double> _d_a, clad::array<double> _d_b);
+
+void fn2(double a, double b, double* res){ 
+    res[0] = a*10 + b*b*9;
+    res[0] = add(res[0], 10);
+    res[1] = a*a*9 + b*b*10;
+}
+
+// CHECK: void fn2_jac(double a, double b, double *res, clad::matrix<double> *_d_vector_res) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_res->rows() + 2UL;
+// CHECK-NEXT:     clad::array<double> _d_vector_a = clad::one_hot_vector(indepVarCount, 0UL);
+// CHECK-NEXT:     clad::array<double> _d_vector_b = clad::one_hot_vector(indepVarCount, 1UL);
+// CHECK-NEXT:     *_d_vector_res = clad::identity_matrix(_d_vector_res->rows(), indepVarCount, 2UL);
+// CHECK-NEXT:     double _t0 = b * b;
+// CHECK-NEXT:     *_d_vector_res[0] = _d_vector_a * 10 + a * (clad::zero_vector(indepVarCount)) + (_d_vector_b * b + b * _d_vector_b) * 9 + _t0 * (clad::zero_vector(indepVarCount));
+// CHECK-NEXT:     res[0] = a * 10 + _t0 * 9;
+// CHECK-NEXT:     clad::ValueAndPushforward<double, clad::array<double> > _t1 = add_vector_pushforward(res[0], 10, *_d_vector_res[0], clad::zero_vector(indepVarCount));
+// CHECK-NEXT:     *_d_vector_res[0] = _t1.pushforward;
+// CHECK-NEXT:     res[0] = _t1.value;
+// CHECK-NEXT:     double _t2 = a * a;
+// CHECK-NEXT:     double _t3 = b * b;
+// CHECK-NEXT:     *_d_vector_res[1] = (_d_vector_a * a + a * _d_vector_a) * 9 + _t2 * (clad::zero_vector(indepVarCount)) + (_d_vector_b * b + b * _d_vector_b) * 10 + _t3 * (clad::zero_vector(indepVarCount));
+// CHECK-NEXT:     res[1] = _t2 * 9 + _t3 * 10;
 // CHECK-NEXT: }
 
 #define INIT(F) auto d_##F = clad::jacobian(F);
@@ -47,24 +61,29 @@ void fn1(double i, double j, double* output) {
 template <unsigned numOfOutputs, typename Fn, typename... Args>
 void test(Fn derivedFn, Args... args) {
   unsigned numOfParameters = sizeof...(args);
-  unsigned numOfResults = numOfOutputs * numOfParameters;
   for (unsigned i = 0; i < numOfOutputs; ++i)
     outputs[i] = 0;
-  for (unsigned i = 0; i < numOfResults; ++i)
-    results[i] = 0;
-  derivedFn.execute(args..., outputs, results);
+  derivedFn.execute(args..., outputs, &results);
   printf("{");
-  for (unsigned i = 0; i < numOfResults; ++i) {
-    printf("%.2f", results[i]);
-    if (i != numOfResults - 1)
-      printf(", ");
+  for (unsigned i = 0; i < numOfOutputs; ++i) {
+    for (unsigned j = 0; j < numOfParameters; ++j) {
+      printf("%.2f", results[i][j]);
+      if (i != numOfOutputs - 1 || j != numOfParameters - 1)
+        printf(", ");
+    }
   }
   printf("}\n");
 }
 
 int main() {
   INIT(fn1);
-  
   test<2>(DERIVED_FN(fn1), 3, 5); // CHECK-EXEC: {405.00, 266.96, 201.18, 75.00}
+
+  INIT(fn2);
+  test<2>(DERIVED_FN(fn2), 3, 5); // CHECK-EXEC: {10.00, 90.00, 54.00, 100.00}
 }
 
+// CHECK: clad::ValueAndPushforward<double, clad::array<double> > add_vector_pushforward(double a, double b, clad::array<double> _d_a, clad::array<double> _d_b) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_b.size();
+// CHECK-NEXT:     return {a + b, _d_a + _d_b};
+// CHECK-NEXT: }
diff --git a/test/Jacobian/Functors.C b/test/Jacobian/Functors.C
index cf04e1291..22902b3de 100644
--- a/test/Jacobian/Functors.C
+++ b/test/Jacobian/Functors.C
@@ -16,27 +16,21 @@ struct Experiment {
     x = val;
   }
 
-  // CHECK: void operator_call_jac(double i, double j, double *output, double *jacobianMatrix) {
-  // CHECK-NEXT:     double _t0 = output[0];
-  // CHECK-NEXT:     output[0] = this->x * i * i * j;
-  // CHECK-NEXT:     double _t1 = output[1];
-  // CHECK-NEXT:     output[1] = this->y * i * j * j;
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += this->y * 1 * j * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += this->y * i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += this->y * i * j * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[1] = _t1;
-  // CHECK-NEXT:     }
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * 1 * j * i;
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += this->x * i * i * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[0] = _t0;
-  // CHECK-NEXT:     }
+  // CHECK: void operator_call_jac(double i, double j, double *output, clad::matrix<double> *_d_vector_output) {
+  // CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+  // CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+  // CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+  // CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+  // CHECK-NEXT:     double &_t0 = this->x;
+  // CHECK-NEXT:     double _t1 = _t0 * i;
+  // CHECK-NEXT:     double _t2 = _t1 * i;
+  // CHECK-NEXT:     *_d_vector_output[0] = ((0 * i + _t0 * _d_vector_i) * i + _t1 * _d_vector_i) * j + _t2 * _d_vector_j;
+  // CHECK-NEXT:     output[0] = _t2 * j;
+  // CHECK-NEXT:     double &_t3 = this->y;
+  // CHECK-NEXT:     double _t4 = _t3 * i;
+  // CHECK-NEXT:     double _t5 = _t4 * j;
+  // CHECK-NEXT:     *_d_vector_output[1] = ((0 * i + _t3 * _d_vector_i) * j + _t4 * _d_vector_j) * j + _t5 * _d_vector_j;
+  // CHECK-NEXT:     output[1] = _t5 * j;
   // CHECK-NEXT: }
 
 };
@@ -52,27 +46,21 @@ struct ExperimentConst {
     x = val;
   }
 
-  // CHECK: void operator_call_jac(double i, double j, double *output, double *jacobianMatrix) const {
-  // CHECK-NEXT:     double _t0 = output[0];
-  // CHECK-NEXT:     output[0] = this->x * i * i * j;
-  // CHECK-NEXT:     double _t1 = output[1];
-  // CHECK-NEXT:     output[1] = this->y * i * j * j;
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += this->y * 1 * j * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += this->y * i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += this->y * i * j * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[1] = _t1;
-  // CHECK-NEXT:     }
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * 1 * j * i;
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += this->x * i * i * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[0] = _t0;
-  // CHECK-NEXT:     }
+  // CHECK: void operator_call_jac(double i, double j, double *output, clad::matrix<double> *_d_vector_output) const {
+  // CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+  // CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+  // CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+  // CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+  // CHECK-NEXT:     double &_t0 = this->x;
+  // CHECK-NEXT:     double _t1 = _t0 * i;
+  // CHECK-NEXT:     double _t2 = _t1 * i;
+  // CHECK-NEXT:     *_d_vector_output[0] = ((0 * i + _t0 * _d_vector_i) * i + _t1 * _d_vector_i) * j + _t2 * _d_vector_j;
+  // CHECK-NEXT:     output[0] = _t2 * j;
+  // CHECK-NEXT:     double &_t3 = this->y;
+  // CHECK-NEXT:     double _t4 = _t3 * i;
+  // CHECK-NEXT:     double _t5 = _t4 * j;
+  // CHECK-NEXT:     *_d_vector_output[1] = ((0 * i + _t3 * _d_vector_i) * j + _t4 * _d_vector_j) * j + _t5 * _d_vector_j;
+  // CHECK-NEXT:     output[1] = _t5 * j;
   // CHECK-NEXT: }
 
 };
@@ -88,29 +76,21 @@ struct ExperimentVolatile {
     x = val;
   }
 
-  // CHECK: void operator_call_jac(double i, double j, double *output, double *jacobianMatrix) volatile {
-  // CHECK-NEXT:     double _t0 = this->x * i;
-  // CHECK-NEXT:     double _t1 = output[0];
-  // CHECK-NEXT:     output[0] = this->x * i * i * j;
-  // CHECK-NEXT:     double _t2 = this->y * i;
-  // CHECK-NEXT:     double _t3 = output[1];
-  // CHECK-NEXT:     output[1] = this->y * i * j * j;
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += this->y * 1 * j * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += _t2 * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += _t2 * j * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[1] = _t3;
-  // CHECK-NEXT:     }
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * 1 * j * i;
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += _t0 * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += _t0 * i * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[0] = _t1;
-  // CHECK-NEXT:     }
+  // CHECK: void operator_call_jac(double i, double j, double *output, clad::matrix<double> *_d_vector_output) volatile {
+  // CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+  // CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+  // CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+  // CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+  // CHECK-NEXT:     volatile double &_t0 = this->x;
+  // CHECK-NEXT:     double _t1 = _t0 * i;
+  // CHECK-NEXT:     double _t2 = _t1 * i;
+  // CHECK-NEXT:     *_d_vector_output[0] = ((0 * i + _t0 * _d_vector_i) * i + _t1 * _d_vector_i) * j + _t2 * _d_vector_j;
+  // CHECK-NEXT:     output[0] = _t2 * j;
+  // CHECK-NEXT:     volatile double &_t3 = this->y;
+  // CHECK-NEXT:     double _t4 = _t3 * i;
+  // CHECK-NEXT:     double _t5 = _t4 * j;
+  // CHECK-NEXT:     *_d_vector_output[1] = ((0 * i + _t3 * _d_vector_i) * j + _t4 * _d_vector_j) * j + _t5 * _d_vector_j;
+  // CHECK-NEXT:     output[1] = _t5 * j;
   // CHECK-NEXT: }
 
 };
@@ -126,29 +106,21 @@ struct ExperimentConstVolatile {
     x = val;
   }
 
-  // CHECK: void operator_call_jac(double i, double j, double *output, double *jacobianMatrix) const volatile {
-  // CHECK-NEXT:     double _t0 = this->x * i;
-  // CHECK-NEXT:     double _t1 = output[0];
-  // CHECK-NEXT:     output[0] = this->x * i * i * j;
-  // CHECK-NEXT:     double _t2 = this->y * i;
-  // CHECK-NEXT:     double _t3 = output[1];
-  // CHECK-NEXT:     output[1] = this->y * i * j * j;
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += this->y * 1 * j * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += _t2 * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += _t2 * j * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[1] = _t3;
-  // CHECK-NEXT:     }
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * 1 * j * i;
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += _t0 * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += _t0 * i * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[0] = _t1;
-  // CHECK-NEXT:     }
+  // CHECK: void operator_call_jac(double i, double j, double *output, clad::matrix<double> *_d_vector_output) const volatile {
+  // CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+  // CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+  // CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+  // CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+  // CHECK-NEXT:     volatile double &_t0 = this->x;
+  // CHECK-NEXT:     double _t1 = _t0 * i;
+  // CHECK-NEXT:     double _t2 = _t1 * i;
+  // CHECK-NEXT:     *_d_vector_output[0] = ((0 * i + _t0 * _d_vector_i) * i + _t1 * _d_vector_i) * j + _t2 * _d_vector_j;
+  // CHECK-NEXT:     output[0] = _t2 * j;
+  // CHECK-NEXT:     volatile double &_t3 = this->y;
+  // CHECK-NEXT:     double _t4 = _t3 * i;
+  // CHECK-NEXT:     double _t5 = _t4 * j;
+  // CHECK-NEXT:     *_d_vector_output[1] = ((0 * i + _t3 * _d_vector_i) * j + _t4 * _d_vector_j) * j + _t5 * _d_vector_j;
+  // CHECK-NEXT:     output[1] = _t5 * j;
   // CHECK-NEXT: }
 
 };
@@ -166,27 +138,21 @@ namespace outer {
         x = val;
       }
       
-  // CHECK: void operator_call_jac(double i, double j, double *output, double *jacobianMatrix) {
-  // CHECK-NEXT:     double _t0 = output[0];
-  // CHECK-NEXT:     output[0] = this->x * i * i * j;
-  // CHECK-NEXT:     double _t1 = output[1];
-  // CHECK-NEXT:     output[1] = this->y * i * j * j;
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += this->y * 1 * j * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += this->y * i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += this->y * i * j * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[1] = _t1;
-  // CHECK-NEXT:     }
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * 1 * j * i;
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += this->x * i * i * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[0] = _t0;
-  // CHECK-NEXT:     }
+  // CHECK: void operator_call_jac(double i, double j, double *output, clad::matrix<double> *_d_vector_output) {
+  // CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+  // CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+  // CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+  // CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+  // CHECK-NEXT:     double &_t0 = this->x;
+  // CHECK-NEXT:     double _t1 = _t0 * i;
+  // CHECK-NEXT:     double _t2 = _t1 * i;
+  // CHECK-NEXT:     *_d_vector_output[0] = ((0 * i + _t0 * _d_vector_i) * i + _t1 * _d_vector_i) * j + _t2 * _d_vector_j;
+  // CHECK-NEXT:     output[0] = _t2 * j;
+  // CHECK-NEXT:     double &_t3 = this->y;
+  // CHECK-NEXT:     double _t4 = _t3 * i;
+  // CHECK-NEXT:     double _t5 = _t4 * j;
+  // CHECK-NEXT:     *_d_vector_output[1] = ((0 * i + _t3 * _d_vector_i) * j + _t4 * _d_vector_j) * j + _t5 * _d_vector_j;
+  // CHECK-NEXT:     output[1] = _t5 * j;
   // CHECK-NEXT: }
 
     };
@@ -196,27 +162,17 @@ namespace outer {
       output[1] = i*j*j;
     };
 
-  // CHECK: inline void operator_call_jac(double i, double j, double *output, double *jacobianMatrix) const {
-  // CHECK-NEXT:     double _t0 = output[0];
-  // CHECK-NEXT:     output[0] = i * i * j;
-  // CHECK-NEXT:     double _t1 = output[1];
-  // CHECK-NEXT:     output[1] = i * j * j;
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += 1 * j * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += i * j * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[1] = _t1;
-  // CHECK-NEXT:     }
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += 1 * j * i;
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += i * i * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[0] = _t0;
-  // CHECK-NEXT:     }
+  // CHECK: inline void operator_call_jac(double i, double j, double *output, clad::matrix<double> *_d_vector_output) const {
+  // CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+  // CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+  // CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+  // CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+  // CHECK-NEXT:     double _t0 = i * i;
+  // CHECK-NEXT:     *_d_vector_output[0] = (_d_vector_i * i + i * _d_vector_i) * j + _t0 * _d_vector_j;
+  // CHECK-NEXT:     output[0] = _t0 * j;
+  // CHECK-NEXT:     double _t1 = i * j;
+  // CHECK-NEXT:     *_d_vector_output[1] = (_d_vector_i * j + i * _d_vector_j) * j + _t1 * _d_vector_j;
+  // CHECK-NEXT:     output[1] = _t1 * j;
   // CHECK-NEXT: }
 
   }
@@ -227,21 +183,20 @@ namespace outer {
   auto d_##E##Ref = clad::jacobian(E);
 
 #define TEST(E)                                                                \
-  result[0] = result[1] = result[2] = result[3] = 0;                           \
   output[0] = output[1] = 0;                                                   \
-  d_##E.execute(7, 9, output, result);                                         \
-  printf("{%.2f, %.2f, %.2f, %.2f}, ", result[0], result[1], result[2],        \
-         result[3]);                                                           \
-  result[0] = result[1] = result[2] = result[3] = 0;                           \
+  d_##E.execute(7, 9, output, &result);                                        \
+  printf("{%.2f, %.2f, %.2f, %.2f}, ", result[0][0], result[0][1],             \
+                                       result[1][0], result[1][1]);            \
   output[0] = output[1] = 0;                                                   \
-  d_##E##Ref.execute(7, 9, output, result);                                    \
-  printf("{%.2f, %.2f, %.2f, %.2f}\n", result[0], result[1], result[2],        \
-         result[3]);
+  d_##E##Ref.execute(7, 9, output, &result);                                   \
+  printf("{%.2f, %.2f, %.2f, %.2f}, ", result[0][0], result[0][1],             \
+                                       result[1][0], result[1][1]);
 
 double x = 3;
 double y = 5;
 int main() {
-  double output[2], result[4];
+  double output[2];
+  clad::matrix<double> result(2, 2);
   Experiment E(3, 5);
   auto E_Again = E;
   const ExperimentConst E_Const(3, 5);
@@ -254,27 +209,17 @@ int main() {
     output[1] = i*j*j;
   };
 
-  // CHECK: inline void operator_call_jac(double i, double j, double *output, double *jacobianMatrix) const {
-  // CHECK-NEXT:     double _t0 = output[0];
-  // CHECK-NEXT:     output[0] = i * i * j;
-  // CHECK-NEXT:     double _t1 = output[1];
-  // CHECK-NEXT:     output[1] = i * j * j;
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += 1 * j * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += i * j * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[1] = _t1;
-  // CHECK-NEXT:     }
-  // CHECK-NEXT:     {
-  // CHECK-NEXT:         {
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += 1 * j * i;
-  // CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += i * 1 * j;
-  // CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += i * i * 1;
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:         output[0] = _t0;
-  // CHECK-NEXT:     }
+  // CHECK-NEXT: inline void operator_call_jac(double i, double j, double *output, clad::matrix<double> *_d_vector_output) const {
+  // CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+  // CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+  // CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+  // CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+  // CHECK-NEXT:     double _t0 = i * i;
+  // CHECK-NEXT:     *_d_vector_output[0] = (_d_vector_i * i + i * _d_vector_i) * j + _t0 * _d_vector_j;
+  // CHECK-NEXT:     output[0] = _t0 * j;
+  // CHECK-NEXT:     double _t1 = i * j;
+  // CHECK-NEXT:     *_d_vector_output[1] = (_d_vector_i * j + i * _d_vector_j) * j + _t1 * _d_vector_j;
+  // CHECK-NEXT:     output[1] = _t1 * j;
   // CHECK-NEXT: }
 
   auto lambdaWithCapture = [&](double i, double jj, double *output) {
@@ -282,27 +227,19 @@ int main() {
     output[1] = y*i*jj*jj;
   };
 
-// CHECK: inline void operator_call_jac(double i, double jj, double *output, double *jacobianMatrix) const {
-// CHECK-NEXT:     double _t0 = output[0];
-// CHECK-NEXT:     output[0] = x * i * i * jj;
-// CHECK-NEXT:     double _t1 = output[1];
-// CHECK-NEXT:     output[1] = y * i * jj * jj;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += y * 1 * jj * jj;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += y * i * 1 * jj;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += y * i * jj * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += x * 1 * jj * i;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += x * i * 1 * jj;
-// CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += x * i * i * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[0] = _t0;
-// CHECK-NEXT:     }
+// CHECK: inline void operator_call_jac(double i, double jj, double *output, clad::matrix<double> *_d_vector_output) const {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_jj = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     double _t0 = x * i;
+// CHECK-NEXT:     double _t1 = _t0 * i;
+// CHECK-NEXT:     *_d_vector_output[0] = ((0. * i + x * _d_vector_i) * i + _t0 * _d_vector_i) * jj + _t1 * _d_vector_jj;
+// CHECK-NEXT:     output[0] = _t1 * jj;
+// CHECK-NEXT:     double _t2 = y * i;
+// CHECK-NEXT:     double _t3 = _t2 * jj;
+// CHECK-NEXT:     *_d_vector_output[1] = ((0. * i + y * _d_vector_i) * jj + _t2 * _d_vector_jj) * jj + _t3 * _d_vector_jj;
+// CHECK-NEXT:     output[1] = _t3 * jj;
 // CHECK-NEXT: }
 
   auto lambdaNNS = outer::inner::lambdaNNS;
diff --git a/test/Jacobian/Jacobian.C b/test/Jacobian/Jacobian.C
index 4d9537022..e238be5f2 100644
--- a/test/Jacobian/Jacobian.C
+++ b/test/Jacobian/Jacobian.C
@@ -12,43 +12,22 @@ void f_1(double a, double b, double c, double output[]) {
   output[2] = c * c * 10 - a * a;
 }
 
-void f_1_jac(double a, double b, double c, double output[], double *_result);
-
-// CHECK: void f_1_jac(double a, double b, double c, double output[], double *jacobianMatrix) {
-// CHECK-NEXT:     double _t0 = output[0];
-// CHECK-NEXT:     output[0] = a * a * a;
-// CHECK-NEXT:     double _t1 = output[1];
-// CHECK-NEXT:     output[1] = a * a * a + b * b * b;
-// CHECK-NEXT:     double _t2 = output[2];
-// CHECK-NEXT:     output[2] = c * c * 10 - a * a;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{8U|8UL|8ULL}}] += 1 * 10 * c;
-// CHECK-NEXT:             jacobianMatrix[{{8U|8UL|8ULL}}] += c * 1 * 10;
-// CHECK-NEXT:             jacobianMatrix[{{6U|6UL|6ULL}}] += -1 * a;
-// CHECK-NEXT:             jacobianMatrix[{{6U|6UL|6ULL}}] += a * -1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[2] = _t2;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += 1 * a * a;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += a * 1 * a;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += a * a * 1;
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += 1 * b * b;
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += b * 1 * b;
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += b * b * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += 1 * a * a;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += a * 1 * a;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += a * a * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[0] = _t0;
-// CHECK-NEXT:     }
+// CHECK: void f_1_jac(double a, double b, double c, double output[], clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{3U|3UL|3ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_a = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_b = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_c = clad::one_hot_vector(indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{3U|3UL|3ULL}});
+// CHECK-NEXT:     double _t0 = a * a;
+// CHECK-NEXT:     *_d_vector_output[0] = (_d_vector_a * a + a * _d_vector_a) * a + _t0 * _d_vector_a;
+// CHECK-NEXT:     output[0] = _t0 * a;
+// CHECK-NEXT:     double _t1 = a * a;
+// CHECK-NEXT:     double _t2 = b * b;
+// CHECK-NEXT:     *_d_vector_output[1] = (_d_vector_a * a + a * _d_vector_a) * a + _t1 * _d_vector_a + (_d_vector_b * b + b * _d_vector_b) * b + _t2 * _d_vector_b;
+// CHECK-NEXT:     output[1] = _t1 * a + _t2 * b;
+// CHECK-NEXT:     double _t3 = c * c;
+// CHECK-NEXT:     *_d_vector_output[2] = (_d_vector_c * c + c * _d_vector_c) * 10 + _t3 * (clad::zero_vector(indepVarCount)) - (_d_vector_a * a + a * _d_vector_a);
+// CHECK-NEXT:     output[2] = _t3 * 10 - a * a;
 // CHECK-NEXT: }
 
 void f_3(double x, double y, double z, double *_result) {
@@ -59,47 +38,29 @@ void f_3(double x, double y, double z, double *_result) {
   _result[2] = sin(z) * constant;
 }
 
-void f_3_jac(double x, double y, double z, double *_result, double *jacobianMatrix);
-
-// CHECK: void f_3_jac(double x, double y, double z, double *_result, double *jacobianMatrix) {
-// CHECK-NEXT:     double _d_constant = 0.;
+// CHECK: void f_3_jac(double x, double y, double z, double *_result, clad::matrix<double> *_d_vector__result) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector__result->rows() + {{3U|3UL|3ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_x = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_y = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_z = clad::one_hot_vector(indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     *_d_vector__result = clad::identity_matrix(_d_vector__result->rows(), indepVarCount, {{3U|3UL|3ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_constant(clad::zero_vector(indepVarCount)); 
 // CHECK-NEXT:     double constant = 42;
-// CHECK-NEXT:     double _t0 = sin(x);
-// CHECK-NEXT:     double _t1 = _result[0];
-// CHECK-NEXT:     _result[0] = sin(x) * constant;
-// CHECK-NEXT:     double _t2 = sin(y);
-// CHECK-NEXT:     double _t3 = _result[1];
-// CHECK-NEXT:     _result[1] = sin(y) * constant;
-// CHECK-NEXT:     double _t4 = sin(z);
-// CHECK-NEXT:     double _t5 = _result[2];
-// CHECK-NEXT:     _result[2] = sin(z) * constant;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             double _r2 = 0.;
-// CHECK-NEXT:             _r2 += 1 * constant * clad::custom_derivatives::sin_pushforward(z, 1.).pushforward;
-// CHECK-NEXT:             jacobianMatrix[{{8U|8UL|8ULL}}] += _r2;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         _result[2] = _t5;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             double _r1 = 0.;
-// CHECK-NEXT:             _r1 += 1 * constant * clad::custom_derivatives::sin_pushforward(y, 1.).pushforward;
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += _r1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         _result[1] = _t3;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             double _r0 = 0.;
-// CHECK-NEXT:             _r0 += 1 * constant * clad::custom_derivatives::sin_pushforward(x, 1.).pushforward;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += _r0;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         _result[0] = _t1;
-// CHECK-NEXT:     }
+// CHECK-NEXT:     {{.*}} _t0 = clad::custom_derivatives::sin_pushforward(x, _d_vector_x);
+// CHECK-NEXT:     double &_t1 = _t0.value;
+// CHECK-NEXT:     *_d_vector__result[0] = _t0.pushforward * constant + _t1 * _d_vector_constant;
+// CHECK-NEXT:     _result[0] = _t1 * constant;
+// CHECK-NEXT:     {{.*}} _t2 = clad::custom_derivatives::sin_pushforward(y, _d_vector_y);
+// CHECK-NEXT:     double &_t3 = _t2.value;
+// CHECK-NEXT:     *_d_vector__result[1] = _t2.pushforward * constant + _t3 * _d_vector_constant;
+// CHECK-NEXT:     _result[1] = _t3 * constant;
+// CHECK-NEXT:     {{.*}} _t4 = clad::custom_derivatives::sin_pushforward(z, _d_vector_z);
+// CHECK-NEXT:     double &_t5 = _t4.value;
+// CHECK-NEXT:     *_d_vector__result[2] = _t4.pushforward * constant + _t5 * _d_vector_constant;
+// CHECK-NEXT:     _result[2] = _t5 * constant;
 // CHECK-NEXT: }
 double multiply(double x, double y) { return x * y; }
-//CHECK: void multiply_pullback(double x, double y, double _d_y0, double *_d_x, double *_d_y);
+// CHECK: clad::ValueAndPushforward<double, clad::array<double> > multiply_vector_pushforward(double x, double y, clad::array<double> _d_x, clad::array<double> _d_y);
 
 void f_4(double x, double y, double z, double *_result) {
   double constant = 42;
@@ -109,84 +70,43 @@ void f_4(double x, double y, double z, double *_result) {
   _result[2] = multiply(z, x) * constant;
 }
 
-void f_4_jac(double x, double y, double z, double *_result, double *jacobianMatrix);
-// CHECK: void f_4_jac(double x, double y, double z, double *_result, double *jacobianMatrix) {
-// CHECK-NEXT:     double _d_constant = 0.;
+// CHECK: void f_4_jac(double x, double y, double z, double *_result, clad::matrix<double> *_d_vector__result) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector__result->rows() + {{3U|3UL|3ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_x = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_y = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_z = clad::one_hot_vector(indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     *_d_vector__result = clad::identity_matrix(_d_vector__result->rows(), indepVarCount, {{3U|3UL|3ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_constant(clad::zero_vector(indepVarCount)); 
 // CHECK-NEXT:     double constant = 42;
-// CHECK-NEXT:     double _t0 = multiply(x, y);
-// CHECK-NEXT:     double _t1 = _result[0];
-// CHECK-NEXT:     _result[0] = multiply(x, y) * constant;
-// CHECK-NEXT:     double _t2 = multiply(y, z);
-// CHECK-NEXT:     double _t3 = _result[1];
-// CHECK-NEXT:     _result[1] = multiply(y, z) * constant;
-// CHECK-NEXT:     double _t4 = multiply(z, x);
-// CHECK-NEXT:     double _t5 = _result[2];
-// CHECK-NEXT:     _result[2] = multiply(z, x) * constant;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             double _r4 = 0.;
-// CHECK-NEXT:             double _r5 = 0.;
-// CHECK-NEXT:             multiply_pullback(z, x, 1 * constant, &_r4, &_r5);
-// CHECK-NEXT:             jacobianMatrix[{{8U|8UL|8ULL}}] += _r4;
-// CHECK-NEXT:             jacobianMatrix[{{6U|6UL|6ULL}}] += _r5;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         _result[2] = _t5;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             double _r2 = 0.;
-// CHECK-NEXT:             double _r3 = 0.;
-// CHECK-NEXT:             multiply_pullback(y, z, 1 * constant, &_r2, &_r3);
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += _r2;
-// CHECK-NEXT:             jacobianMatrix[{{5U|5UL|5ULL}}] += _r3;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         _result[1] = _t3;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             double _r0 = 0.;
-// CHECK-NEXT:             double _r1 = 0.;
-// CHECK-NEXT:             multiply_pullback(x, y, 1 * constant, &_r0, &_r1);
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += _r0;
-// CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += _r1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         _result[0] = _t1;
-// CHECK-NEXT:     }
+// CHECK-NEXT:     clad::ValueAndPushforward<double, clad::array<double> > _t0 = multiply_vector_pushforward(x, y, _d_vector_x, _d_vector_y);
+// CHECK-NEXT:     double &_t1 = _t0.value;
+// CHECK-NEXT:     *_d_vector__result[0] = _t0.pushforward * constant + _t1 * _d_vector_constant;
+// CHECK-NEXT:     _result[0] = _t1 * constant;
+// CHECK-NEXT:     clad::ValueAndPushforward<double, clad::array<double> > _t2 = multiply_vector_pushforward(y, z, _d_vector_y, _d_vector_z);
+// CHECK-NEXT:     double &_t3 = _t2.value;
+// CHECK-NEXT:     *_d_vector__result[1] = _t2.pushforward * constant + _t3 * _d_vector_constant;
+// CHECK-NEXT:     _result[1] = _t3 * constant;
+// CHECK-NEXT:     clad::ValueAndPushforward<double, clad::array<double> > _t4 = multiply_vector_pushforward(z, x, _d_vector_z, _d_vector_x);
+// CHECK-NEXT:     double &_t5 = _t4.value;
+// CHECK-NEXT:     *_d_vector__result[2] = _t4.pushforward * constant + _t5 * _d_vector_constant;
+// CHECK-NEXT:     _result[2] = _t5 * constant;
 // CHECK-NEXT: }
 
-void f_1_jac_0(double a, double b, double c, double output[], double *jacobianMatrix);
-// CHECK: void f_1_jac_0(double a, double b, double c, double output[], double *jacobianMatrix) {
-// CHECK-NEXT:     double _d_b = 0.;
-// CHECK-NEXT:     double _d_c = 0.;
-// CHECK-NEXT:     double _t0 = output[0];
-// CHECK-NEXT:     output[0] = a * a * a;
-// CHECK-NEXT:     double _t1 = output[1];
-// CHECK-NEXT:     output[1] = a * a * a + b * b * b;
-// CHECK-NEXT:     double _t2 = output[2];
-// CHECK-NEXT:     output[2] = c * c * 10 - a * a;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += -1 * a;
-// CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += a * -1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[2] = _t2;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += 1 * a * a;
-// CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += a * 1 * a;
-// CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += a * a * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += 1 * a * a;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += a * 1 * a;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += a * a * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[0] = _t0;
-// CHECK-NEXT:     }
+// CHECK: void f_1_jac_0(double a, double b, double c, double output[], clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{3U|3UL|3ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_a = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_b = clad::zero_vector(indepVarCount);
+// CHECK-NEXT:     clad::array<double> _d_vector_c = clad::zero_vector(indepVarCount);
+// CHECK-NEXT:     double _t0 = a * a;
+// CHECK-NEXT:     *_d_vector_output[0] = (_d_vector_a * a + a * _d_vector_a) * a + _t0 * _d_vector_a;
+// CHECK-NEXT:     output[0] = _t0 * a;
+// CHECK-NEXT:     double _t1 = a * a;
+// CHECK-NEXT:     double _t2 = b * b;
+// CHECK-NEXT:     *_d_vector_output[1] = (_d_vector_a * a + a * _d_vector_a) * a + _t1 * _d_vector_a + (_d_vector_b * b + b * _d_vector_b) * b + _t2 * _d_vector_b;
+// CHECK-NEXT:     output[1] = _t1 * a + _t2 * b;
+// CHECK-NEXT:     double _t3 = c * c;
+// CHECK-NEXT:     *_d_vector_output[2] = (_d_vector_c * c + c * _d_vector_c) * 10 + _t3 * (clad::zero_vector(indepVarCount)) - (_d_vector_a * a + a * _d_vector_a);
+// CHECK-NEXT:     output[2] = _t3 * 10 - a * a;
 // CHECK-NEXT: }
 
 void f_5(float a, double output[]){
@@ -194,66 +114,145 @@ void f_5(float a, double output[]){
   output[0]=a*a;  
 }
 
-// CHECK: void f_5_jac(float a, double output[], double *jacobianMatrix) {
-// CHECK-NEXT:     double _t0 = output[1];
+// CHECK: void f_5_jac(float a, double output[], clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{1U|1UL|1ULL}};
+// CHECK-NEXT:     clad::array<float> _d_vector_a = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     *_d_vector_output[1] = _d_vector_a;
 // CHECK-NEXT:     output[1] = a;
-// CHECK-NEXT:     double _t1 = output[0];
+// CHECK-NEXT:     *_d_vector_output[0] = _d_vector_a * a + a * _d_vector_a;
 // CHECK-NEXT:     output[0] = a * a;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += 1 * a;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += a * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[0] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         jacobianMatrix[{{1U|1UL|1ULL}}] += 1;
-// CHECK-NEXT:         output[1] = _t0;
-// CHECK-NEXT:     }
 // CHECK-NEXT: }
 
+void f_6(double a[3], double b, double output[]) {
+    output[0] = a[0] * a[0] * a[0];
+    output[1] = a[0] * a[0] * a[0] + b * b * b;
+    output[2] = 2 * (a[0] + b);
+}
+
+// CHECK: void f_6_jac(double a[3], double b, double output[], clad::matrix<double> *_d_vector_a, clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_a->rows() + _d_vector_output->rows() + {{1U|1UL|1ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_b = clad::one_hot_vector(indepVarCount, _d_vector_a->rows());
+// CHECK-NEXT:     *_d_vector_a = clad::identity_matrix(_d_vector_a->rows(), indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, _d_vector_a->rows() + {{1U|1UL|1ULL}});
+// CHECK-NEXT:     double _t0 = a[0] * a[0];
+// CHECK-NEXT:     *_d_vector_output[0] = ((*_d_vector_a[0]) * a[0] + a[0] * (*_d_vector_a[0])) * a[0] + _t0 * (*_d_vector_a[0]);
+// CHECK-NEXT:     output[0] = _t0 * a[0];
+// CHECK-NEXT:     double _t1 = a[0] * a[0];
+// CHECK-NEXT:     double _t2 = b * b;
+// CHECK-NEXT:     *_d_vector_output[1] = ((*_d_vector_a[0]) * a[0] + a[0] * (*_d_vector_a[0])) * a[0] + _t1 * (*_d_vector_a[0]) + (_d_vector_b * b + b * _d_vector_b) * b + _t2 * _d_vector_b;
+// CHECK-NEXT:     output[1] = _t1 * a[0] + _t2 * b;
+// CHECK-NEXT:     double _t3 = (a[0] + b);
+// CHECK-NEXT:     *_d_vector_output[2] = (clad::zero_vector(indepVarCount)) * _t3 + 2 * (*_d_vector_a[0] + _d_vector_b);
+// CHECK-NEXT:     output[2] = 2 * _t3;
+// CHECK-NEXT: }
+
+void f_7(double a, double& b, double&c) {
+    b = 3 * a;
+    c = -5 * b;
+}
+
+// CHECK: void f_7_jac(double a, double &b, double &c, clad::array<double> *_d_vector_b, clad::array<double> *_d_vector_c) {
+// CHECK-NEXT:     unsigned long indepVarCount = {{3U|3UL|3ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_a = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     *_d_vector_b = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     *_d_vector_c = clad::one_hot_vector(indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     *_d_vector_b = (clad::zero_vector(indepVarCount)) * a + 3 * _d_vector_a;
+// CHECK-NEXT:     b = 3 * a;
+// CHECK-NEXT:     *_d_vector_c = - clad::zero_vector(indepVarCount) * b + -5 * *_d_vector_b;
+// CHECK-NEXT:     c = -5 * b;
+// CHECK-NEXT: }
 
-#define TEST(F, x, y, z) { \
-  result[0] = 0; result[1] = 0; result[2] = 0;\
-  result[3] = 0; result[4] = 0; result[5] = 0;\
-  result[6] = 0; result[7] = 0; result[8] = 0;\
+void f_8(double a, double b, double output[]) {
+  double a3 = a * a * a;
+  output[0] = a3;
+  output[1] = a3 + b * b * b;
+  output[2] = 2 * (a + b);
+}
+
+// CHECK: void f_8_jac(double a, double b, double output[], clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + 2UL;
+// CHECK-NEXT:     clad::array<double> _d_vector_a = clad::one_hot_vector(indepVarCount, 0UL);
+// CHECK-NEXT:     clad::array<double> _d_vector_b = clad::one_hot_vector(indepVarCount, 1UL);
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, 2UL);
+// CHECK-NEXT:     double _t0 = a * a;
+// CHECK-NEXT:     clad::array<double> _d_vector_a3((_d_vector_a * a + a * _d_vector_a) * a + _t0 * _d_vector_a);
+// CHECK-NEXT:     double a3 = _t0 * a;
+// CHECK-NEXT:     *_d_vector_output[0] = _d_vector_a3;
+// CHECK-NEXT:     output[0] = a3;
+// CHECK-NEXT:     double _t1 = b * b;
+// CHECK-NEXT:     *_d_vector_output[1] = _d_vector_a3 + (_d_vector_b * b + b * _d_vector_b) * b + _t1 * _d_vector_b;
+// CHECK-NEXT:     output[1] = a3 + _t1 * b;
+// CHECK-NEXT:     double _t2 = (a + b);
+// CHECK-NEXT:     *_d_vector_output[2] = (clad::zero_vector(indepVarCount)) * _t2 + 2 * (_d_vector_a + _d_vector_b);
+// CHECK-NEXT:     output[2] = 2 * _t2;
+// CHECK-NEXT: }
+
+void f_9(float a, double output[]){
+  output[0]=a*a;  
+  output[1]=output[0]*3;
+}
+
+// CHECK: void f_9_jac(float a, double output[], clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + 1UL;
+// CHECK-NEXT:     clad::array<float> _d_vector_a = clad::one_hot_vector(indepVarCount, 0UL);
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, 1UL);
+// CHECK-NEXT:     *_d_vector_output[0] = _d_vector_a * a + a * _d_vector_a;
+// CHECK-NEXT:     output[0] = a * a;
+// CHECK-NEXT:     *_d_vector_output[1] = (*_d_vector_output[0]) * 3 + output[0] * (clad::zero_vector(indepVarCount));
+// CHECK-NEXT:     output[1] = output[0] * 3;
+// CHECK-NEXT: }
+
+#define TEST(F, ...) { \
   outputarr[0] = 0; outputarr[1] = 1; outputarr[2] = 0;\
   auto j = clad::jacobian(F);\
-  j.execute(x, y, z, outputarr, result);\
+  j.execute(__VA_ARGS__, &result);\
   printf("Result is = {%.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f}\n",\
-  result[0], result[1], result[2],\
-  result[3], result[4], result[5],\
-  result[6], result[7], result[8]);\
-  F##_jac(x, y, z, outputarr, result);\
+  result[0][0], result[0][1], result[0][2],\
+  result[1][0], result[1][1], result[1][2],\
+  result[2][0], result[2][1], result[2][2]);\
 }
 
 #define TEST_F_1_SINGLE_PARAM(x, y, z) { \
-  result[0] = 0; result[1] = 0; result[2] = 0;\
   outputarr[0] = 0; outputarr[1] = 1; outputarr[2] = 0;\
   auto j = clad::jacobian(f_1,"a");\
-  j.execute(x, y, z, outputarr, result);\
+  j.execute(x, y, z, outputarr, &result);\
   printf("Result is = {%.2f, %.2f, %.2f}\n",\
-  result[0], result[1], result[2]);\
+  result[0][0], result[1][0], result[2][0]);\
 }
 
 
 int main() {
-  double result[10];
+  clad::matrix<double> result (3, 3);
   double outputarr[9];
-  TEST(f_1, 1, 2, 3); // CHECK-EXEC: Result is = {3.00, 0.00, 0.00, 3.00, 12.00, 0.00, -2.00, 0.00, 60.00}
-  TEST(f_3, 1, 2, 3); // CHECK-EXEC: Result is = {22.69, 0.00, 0.00, 0.00, -17.48, 0.00, 0.00, 0.00, -41.58}
-  TEST(f_4, 1, 2, 3); // CHECK-EXEC: Result is = {84.00, 42.00, 0.00, 0.00, 126.00, 84.00, 126.00, 0.00, 42.00}
+  TEST(f_1, 1, 2, 3, outputarr); // CHECK-EXEC: Result is = {3.00, 0.00, 0.00, 3.00, 12.00, 0.00, -2.00, 0.00, 60.00}
+  TEST(f_3, 1, 2, 3, outputarr); // CHECK-EXEC: Result is = {22.69, 0.00, 0.00, 0.00, -17.48, 0.00, 0.00, 0.00, -41.58}
+  TEST(f_4, 1, 2, 3, outputarr); // CHECK-EXEC: Result is = {84.00, 42.00, 0.00, 0.00, 126.00, 84.00, 126.00, 0.00, 42.00}
   TEST_F_1_SINGLE_PARAM(1, 2, 3); // CHECK-EXEC: Result is = {3.00, 3.00, -2.00}
 
   auto df5 = clad::jacobian(f_5);
   result[0] = 0; result[1] = 0;
-  df5.execute(3, outputarr, result);
-  printf("Result is = {%.2f, %.2f}", result[0], result[1]); // CHECK-EXEC: Result is = {6.00, 1.00}
+  df5.execute(3, outputarr, &result);
+  printf("Result is = {%.2f, %.2f}\n", result[0][0], result[1][0]); // CHECK-EXEC: Result is = {6.00, 1.00}
+  
+  double a[] = {3, 1};
+  clad::matrix<double> da (2, 5);
+  TEST(f_6, a, 2, outputarr, &da); // CHECK-EXEC: Result is = {27.00, 0.00, 0.00, 27.00, 0.00, 12.00, 2.00, 0.00, 2.00}
+
+  auto df7 = clad::jacobian(f_7);
+  clad::array<double> db, dc;
+  double a7=4, b=5, c=6;
+  df7.execute(a7, b, c, &db, &dc);
+  printf("Result is = {%.2f, %.2f}\n", db[0], dc[0]); // CHECK-EXEC: Result is = {3.00, -15.00}
+
+  TEST(f_8, 4, -3, outputarr); // CHECK-EXEC: Result is = {48.00, 0.00, 0.00, 48.00, 27.00, 0.00, 2.00, 2.00, 0.00} 
+
+  auto df9 = clad::jacobian(f_9);
+  df9.execute(3, outputarr, &result);
+  printf("Result is = {%.2f, %.2f}\n", result[0][0], result[1][0]); // CHECK-EXEC: Result is = {6.00, 18.00}
 }
 
-//CHECK: void multiply_pullback(double x, double y, double _d_y0, double *_d_x, double *_d_y) {
-//CHECK-NEXT:    {
-//CHECK-NEXT:        *_d_x += _d_y0 * y;
-//CHECK-NEXT:        *_d_y += x * _d_y0;
-//CHECK-NEXT:    }
-//CHECK-NEXT:}
+// CHECK: clad::ValueAndPushforward<double, clad::array<double> > multiply_vector_pushforward(double x, double y, clad::array<double> _d_x, clad::array<double> _d_y) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_y.size();
+// CHECK-NEXT:     return {x * y, _d_x * y + x * _d_y};
+// CHECK-NEXT: }
diff --git a/test/Jacobian/Pointers.C b/test/Jacobian/Pointers.C
index 01bdcecc4..1a90b61e7 100644
--- a/test/Jacobian/Pointers.C
+++ b/test/Jacobian/Pointers.C
@@ -10,31 +10,27 @@ void nonMemFn(double i, double j, double* out) {
   out[1] = j;
 }
 
-// CHECK: void nonMemFn_jac(double i, double j, double *out, double *jacobianMatrix) {
-// CHECK-NEXT:     double _t0 = out[0];
+// CHECK: void nonMemFn_jac(double i, double j, double *out, clad::matrix<double> *_d_vector_out) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_out->rows() + {{2U|2UL|2ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     *_d_vector_out = clad::identity_matrix(_d_vector_out->rows(), indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     *_d_vector_out[0] = _d_vector_i;
 // CHECK-NEXT:     out[0] = i;
-// CHECK-NEXT:     double _t1 = out[1];
+// CHECK-NEXT:     *_d_vector_out[1] = _d_vector_j;
 // CHECK-NEXT:     out[1] = j;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         jacobianMatrix[{{3U|3UL|3ULL}}] += 1;
-// CHECK-NEXT:         out[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         jacobianMatrix[{{0U|0UL|0ULL}}] += 1;
-// CHECK-NEXT:         out[0] = _t0;
-// CHECK-NEXT:     }
 // CHECK-NEXT: }
 
 #define NON_MEM_FN_TEST(var)\
-res[0]=res[1]=res[2]=res[3]=0;\
-var.execute(5, 7, out, res);\
-printf("{%.2f %.2f %.2f %.2f}\n", res[0], res[1], res[2], res[3]);
+var.execute(5, 7, out, &res);\
+printf("{%.2f %.2f %.2f %.2f}\n", res[0][0], res[0][1],\
+                                  res[1][0], res[1][1]);
 
 int main() {
   auto nonMemFnPtr = &nonMemFn;
   auto nonMemFnPtrToPtr = &nonMemFnPtr;
 
-  double res[4];
+  clad::matrix<double> res(2, 2);
   double out[2];
   auto d_nonMemFn = clad::jacobian(nonMemFn);
   auto d_nonMemFnPar = clad::jacobian((nonMemFn));
diff --git a/test/Jacobian/TemplateFunctors.C b/test/Jacobian/TemplateFunctors.C
index 9762d355e..62c0cb897 100644
--- a/test/Jacobian/TemplateFunctors.C
+++ b/test/Jacobian/TemplateFunctors.C
@@ -15,25 +15,24 @@ template <typename T> struct Experiment {
   void setX(T val) { x = val; }
 };
 
-// CHECK: void operator_call_jac(double i, double j, double *output, double *jacobianMatrix) {
-// CHECK-NEXT:     double _t0 = output[0];
-// CHECK-NEXT:     output[0] = this->x * this->y * i * j;
-// CHECK-NEXT:     double _t1 = output[1];
-// CHECK-NEXT:     output[1] = 2 * this->x * this->y * i * j;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += 2 * this->x * this->y * 1 * j;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += 2 * this->x * this->y * i * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * this->y * 1 * j;
-// CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += this->x * this->y * i * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[0] = _t0;
-// CHECK-NEXT:     }
+// CHECK: void operator_call_jac(double i, double j, double *output, clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     double &_t0 = this->x;
+// CHECK-NEXT:     double &_t1 = this->y;
+// CHECK-NEXT:     double _t2 = _t0 * _t1;
+// CHECK-NEXT:     double _t3 = _t2 * i;
+// CHECK-NEXT:     *_d_vector_output[0] = ((0 * _t1 + _t0 * 0) * i + _t2 * _d_vector_i) * j + _t3 * _d_vector_j;
+// CHECK-NEXT:     output[0] = _t3 * j;
+// CHECK-NEXT:     double &_t4 = this->x;
+// CHECK-NEXT:     double _t5 = 2 * _t4;
+// CHECK-NEXT:     double &_t6 = this->y;
+// CHECK-NEXT:     double _t7 = _t5 * _t6;
+// CHECK-NEXT:     double _t8 = _t7 * i;
+// CHECK-NEXT:     *_d_vector_output[1] = ((((clad::zero_vector(indepVarCount)) * _t4 + 2 * 0) * _t6 + _t5 * 0) * i + _t7 * _d_vector_i) * j + _t8 * _d_vector_j;
+// CHECK-NEXT:     output[1] = _t8 * j;
 // CHECK-NEXT: }
 
 template <> struct Experiment<long double> {
@@ -46,27 +45,26 @@ template <> struct Experiment<long double> {
   void setX(long double val) { x = val; }
 };
 
-// CHECK: void operator_call_jac(long double i, long double j, long double *output, long double *jacobianMatrix) {
-// CHECK-NEXT:     long double _t0 = output[0];
-// CHECK-NEXT:     output[0] = this->x * this->y * i * i * j;
-// CHECK-NEXT:     long double _t1 = output[1];
-// CHECK-NEXT:     output[1] = 2 * this->x * this->y * i * i * j;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += 2 * this->x * this->y * 1 * j * i;
-// CHECK-NEXT:             jacobianMatrix[{{2U|2UL|2ULL}}] += 2 * this->x * this->y * i * 1 * j;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += 2 * this->x * this->y * i * i * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * this->y * 1 * j * i;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += this->x * this->y * i * 1 * j;
-// CHECK-NEXT:             jacobianMatrix[{{1U|1UL|1ULL}}] += this->x * this->y * i * i * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[0] = _t0;
-// CHECK-NEXT:     }
+// CHECK: void operator_call_jac(long double i, long double j, long double *output, clad::matrix<long double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{2U|2UL|2ULL}};
+// CHECK-NEXT:     clad::array<long double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<long double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     long double &_t0 = this->x;
+// CHECK-NEXT:     long double &_t1 = this->y;
+// CHECK-NEXT:     long double _t2 = _t0 * _t1;
+// CHECK-NEXT:     long double _t3 = _t2 * i;
+// CHECK-NEXT:     long double _t4 = _t3 * i;
+// CHECK-NEXT:     *_d_vector_output[0] = (((0 * _t1 + _t0 * 0) * i + _t2 * _d_vector_i) * i + _t3 * _d_vector_i) * j + _t4 * _d_vector_j;
+// CHECK-NEXT:     output[0] = _t4 * j;
+// CHECK-NEXT:     long double &_t5 = this->x;
+// CHECK-NEXT:     long double _t6 = 2 * _t5;
+// CHECK-NEXT:     long double &_t7 = this->y;
+// CHECK-NEXT:     long double _t8 = _t6 * _t7;
+// CHECK-NEXT:     long double _t9 = _t8 * i;
+// CHECK-NEXT:     long double _t10 = _t9 * i;
+// CHECK-NEXT:     *_d_vector_output[1] = (((((clad::zero_vector(indepVarCount)) * _t5 + 2 * 0) * _t7 + _t6 * 0) * i + _t8 * _d_vector_i) * i + _t9 * _d_vector_i) * j + _t10 * _d_vector_j;
+// CHECK-NEXT:     output[1] = _t10 * j;
 // CHECK-NEXT: }
 
 #define INIT(E)                                                                \
@@ -74,32 +72,30 @@ template <> struct Experiment<long double> {
   auto d_##E##Ref = clad::jacobian(E);
 
 #define TEST_DOUBLE(E, ...)                                                    \
-  result[0] = result[1] = result[2] = result[3] = 0;                           \
   output[0] = output[1] = 0;                                                   \
-  d_##E.execute(__VA_ARGS__, output, result);                                  \
-  printf("{%.2f, %.2f, %.2f, %.2f} ", result[0], result[1], result[2],        \
-         result[3]);                                                           \
-  result[0] = result[1] = result[2] = result[3] = 0;                           \
+  d_##E.execute(__VA_ARGS__, output, &result);                                 \
+  printf("{%.2f, %.2f, %.2f, %.2f} ", result[0][0], result[0][1],              \
+                                      result[1][0], result[1][1]);             \
   output[0] = output[1] = 0;                                                   \
-  d_##E##Ref.execute(__VA_ARGS__, output, result);                             \
-  printf("{%.2f, %.2f, %.2f, %.2f}\n", result[0], result[1], result[2],        \
-         result[3]);
+  d_##E##Ref.execute(__VA_ARGS__, output, &result);                            \
+  printf("{%.2f, %.2f, %.2f, %.2f} ", result[0][0], result[0][1],              \
+                                      result[1][0], result[1][1]);             
 
 #define TEST_LONG_DOUBLE(E, ...)                                               \
-  result_ld[0] = result_ld[1] = result_ld[2] = result_ld[3] = 0;               \
   output_ld[0] = output_ld[1] = 0;                                             \
-  d_##E.execute(__VA_ARGS__, output_ld, result_ld);                            \
-  printf("{%.2Lf, %.2Lf, %.2Lf, %.2Lf} ", result_ld[0], result_ld[1],         \
-         result_ld[2], result_ld[3]);                                          \
-  result_ld[0] = result_ld[1] = result_ld[2] = result_ld[3] = 0;               \
+  d_##E.execute(__VA_ARGS__, output_ld, &result_ld);                           \
+  printf("{%.2Lf, %.2Lf, %.2Lf, %.2Lf} ", result_ld[0][0], result_ld[0][1],    \
+                                      result_ld[1][0], result_ld[1][1]);       \
   output_ld[0] = output_ld[1] = 0;                                             \
-  d_##E##Ref.execute(__VA_ARGS__, output_ld, result_ld);                       \
-  printf("{%.2Lf, %.2Lf, %.2Lf, %.2Lf}\n", result_ld[0], result_ld[1],         \
-         result_ld[2], result_ld[3]);
+  d_##E##Ref.execute(__VA_ARGS__, output_ld, &result_ld);                      \
+  printf("{%.2Lf, %.2Lf, %.2Lf, %.2Lf} ", result_ld[0][0], result_ld[0][1],    \
+                                      result_ld[1][0], result_ld[1][1]);             
 
 int main() {
-  double result[4], output[2];
-  long double result_ld[4], output_ld[2];
+  double output[2];
+  clad::matrix<double> result(2, 2);
+  long double output_ld[2];
+  clad::matrix<long double> result_ld(2, 2);
   Experiment<double> E(3, 5);
   Experiment<long double> E_ld(3, 5);
 
diff --git a/test/Jacobian/constexprTest.C b/test/Jacobian/constexprTest.C
index 1d34cd196..f8d6ef42e 100644
--- a/test/Jacobian/constexprTest.C
+++ b/test/Jacobian/constexprTest.C
@@ -8,9 +8,8 @@
 #include "../TestUtils.h"
 
   double result[3] = {0};
-  double jacobianou[6] = {0};
   double result1[3] = {0};
-  double jacobianou1[9] = {0};
+  clad::matrix<double> jacobian(3, 2);
 
 constexpr void fn_mul(double i, double j, double *res) {
    res[0] = i*i;
@@ -18,34 +17,17 @@ constexpr void fn_mul(double i, double j, double *res) {
    res[2] = i*j;
 }
 
-// CHECK: void fn_mul_jac(double i, double j, double *res, double *jacobianMatrix) {
-// CHECK-NEXT:     double _t0 = res[0];
+// CHECK: constexpr void fn_mul_jac(double i, double j, double *res, clad::matrix<double> *_d_vector_res) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_res->rows() + {{2U|2UL|2ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     *_d_vector_res = clad::identity_matrix(_d_vector_res->rows(), indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     *_d_vector_res[0] = _d_vector_i * i + i * _d_vector_i;
 // CHECK-NEXT:     res[0] = i * i;
-// CHECK-NEXT:     double _t1 = res[1];
+// CHECK-NEXT:     *_d_vector_res[1] = _d_vector_j * j + j * _d_vector_j;
 // CHECK-NEXT:     res[1] = j * j;
-// CHECK-NEXT:     double _t2 = res[2];
+// CHECK-NEXT:     *_d_vector_res[2] = _d_vector_i * j + i * _d_vector_j;
 // CHECK-NEXT:     res[2] = i * j;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += 1 * j;
-// CHECK-NEXT:             jacobianMatrix[{{5U|5UL|5ULL}}] += i * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         res[2] = _t2;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += 1 * j;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += j * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         res[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += 1 * i;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += i * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         res[0] = _t0;
-// CHECK-NEXT:     }
 // CHECK-NEXT: }
 
 
@@ -55,41 +37,22 @@ constexpr void f_1(double x, double y, double z, double output[]) {
   output[2] = z * x * 10 - y * z;
 }
 
-// CHECK: constexpr void f_1_jac(double x, double y, double z, double output[], double *jacobianMatrix) {
-// CHECK-NEXT:     double _t0 = output[0];
-// CHECK-NEXT:     output[0] = x * x * x; 
-// CHECK-NEXT:     double _t1 = output[1];
-// CHECK-NEXT:     output[1] = x * y * x + y * x * x; 
-// CHECK-NEXT:     double _t2 = output[2];
-// CHECK-NEXT:     output[2] = z * x * 10 - y * z; 
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{8U|8UL|8ULL}}] += 1 * 10 * x; 
-// CHECK-NEXT:             jacobianMatrix[{{6U|6UL|6ULL}}] += z * 1 * 10; 
-// CHECK-NEXT:             jacobianMatrix[{{7U|7UL|7ULL}}] += -1 * z;     
-// CHECK-NEXT:             jacobianMatrix[{{8U|8UL|8ULL}}] += y * -1;     
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[2] = _t2; 
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += 1 * x * y; 
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += x * 1 * x;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += x * y * 1;
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += 1 * x * x;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += y * 1 * x; 
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += y * x * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[1] = _t1; 
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += 1 * x * x; 
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += x * 1 * x; 
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += x * x * 1; 
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[0] = _t0;
-// CHECK-NEXT:     }
+// CHECK: constexpr void f_1_jac(double x, double y, double z, double output[], clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{3U|3UL|3ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_x = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_y = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_z = clad::one_hot_vector(indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{3U|3UL|3ULL}});
+// CHECK-NEXT:     double _t0 = x * x;
+// CHECK-NEXT:     *_d_vector_output[0] = (_d_vector_x * x + x * _d_vector_x) * x + _t0 * _d_vector_x;
+// CHECK-NEXT:     output[0] = _t0 * x;
+// CHECK-NEXT:     double _t1 = x * y;
+// CHECK-NEXT:     double _t2 = y * x;
+// CHECK-NEXT:     *_d_vector_output[1] = (_d_vector_x * y + x * _d_vector_y) * x + _t1 * _d_vector_x + (_d_vector_y * x + y * _d_vector_x) * x + _t2 * _d_vector_x;
+// CHECK-NEXT:     output[1] = _t1 * x + _t2 * x;
+// CHECK-NEXT:     double _t3 = z * x;
+// CHECK-NEXT:     *_d_vector_output[2] = (_d_vector_z * x + z * _d_vector_x) * 10 + _t3 * (clad::zero_vector(indepVarCount)) - (_d_vector_y * z + y * _d_vector_z); 
+// CHECK-NEXT:     output[2] = _t3 * 10 - y * z;
 // CHECK-NEXT: }
 
 int main() {
@@ -97,7 +60,7 @@ int main() {
     INIT_JACOBIAN(fn_mul);
     INIT_JACOBIAN(f_1);
 
-    TEST_JACOBIAN(fn_mul, 2, 6, 3, 1, result, jacobianou); // CHECK-EXEC: {6.00, 0.00, 0.00, 2.00, 1.00, 3.00}
-    TEST_JACOBIAN(f_1, 3, 9, 4, 5, 6, result1, jacobianou1); // CHECK-EXEC: {48.00, 0.00, 0.00, 80.00, 32.00, 0.00, 60.00, -6.00, 35.00}
+    TEST_JACOBIAN(fn_mul, 2, 6, 3, 1, result, &jacobian); // CHECK-EXEC: {6.00, 0.00, 0.00, 2.00, 1.00, 3.00}
+    TEST_JACOBIAN(f_1, 3, 9, 4, 5, 6, result1, &jacobian); // CHECK-EXEC: {48.00, 0.00, 0.00, 80.00, 32.00, 0.00, 60.00, -6.00, 35.00}
 }
 
diff --git a/test/Jacobian/testUtility.C b/test/Jacobian/testUtility.C
index 7b38eb7ec..ae1e65b08 100644
--- a/test/Jacobian/testUtility.C
+++ b/test/Jacobian/testUtility.C
@@ -8,9 +8,8 @@
 #include "../TestUtils.h"
 
   double output[3] = {0};
-  double jacobian[6] = {0};
   double output1[3] = {0};
-  double jacobian1[9] = {0};
+  clad::matrix<double> jacobian(3, 3);
 
 void fn_mul(double i, double j, double *res) {
    res[0] = i*i;
@@ -18,34 +17,17 @@ void fn_mul(double i, double j, double *res) {
    res[2] = i*j;
 }
 
-// CHECK: void fn_mul_jac(double i, double j, double *res, double *jacobianMatrix) {
-// CHECK-NEXT:     double _t0 = res[0];
+// CHECK: void fn_mul_jac(double i, double j, double *res, clad::matrix<double> *_d_vector_res) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_res->rows() + {{2U|2UL|2ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_i = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_j = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     *_d_vector_res = clad::identity_matrix(_d_vector_res->rows(), indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     *_d_vector_res[0] = _d_vector_i * i + i * _d_vector_i;
 // CHECK-NEXT:     res[0] = i * i;
-// CHECK-NEXT:     double _t1 = res[1];
+// CHECK-NEXT:     *_d_vector_res[1] = _d_vector_j * j + j * _d_vector_j;
 // CHECK-NEXT:     res[1] = j * j;
-// CHECK-NEXT:     double _t2 = res[2];
+// CHECK-NEXT:     *_d_vector_res[2] = _d_vector_i * j + i * _d_vector_j;
 // CHECK-NEXT:     res[2] = i * j;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += 1 * j;
-// CHECK-NEXT:             jacobianMatrix[{{5U|5UL|5ULL}}] += i * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         res[2] = _t2;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += 1 * j;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += j * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         res[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += 1 * i;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += i * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         res[0] = _t0;
-// CHECK-NEXT:     }
 // CHECK-NEXT: }
 
 
@@ -56,41 +38,22 @@ void f_1(double x, double y, double z, double output[]) {
   output[2] = z * x * 10 - y * z;
 }
 
-// CHECK: void f_1_jac(double x, double y, double z, double output[], double *jacobianMatrix) {
-// CHECK-NEXT:     double _t0 = output[0];
-// CHECK-NEXT:     output[0] = x * x * x;
-// CHECK-NEXT:     double _t1 = output[1];
-// CHECK-NEXT:     output[1] = x * y * x + y * x * x;
-// CHECK-NEXT:     double _t2 = output[2];
-// CHECK-NEXT:     output[2] = z * x * 10 - y * z;
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{8U|8UL|8ULL}}] += 1 * 10 * x;
-// CHECK-NEXT:             jacobianMatrix[{{6U|6UL|6ULL}}] += z * 1 * 10;
-// CHECK-NEXT:             jacobianMatrix[{{7U|7UL|7ULL}}] += -1 * z;
-// CHECK-NEXT:             jacobianMatrix[{{8U|8UL|8ULL}}] += y * -1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[2] = _t2;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += 1 * x * y;
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += x * 1 * x;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3ULL}}] += x * y * 1;
-// CHECK-NEXT:             jacobianMatrix[{{4U|4UL|4ULL}}] += 1 * x * x;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3OLL}}] += y * 1 * x;
-// CHECK-NEXT:             jacobianMatrix[{{3U|3UL|3OLL}}] += y * x * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[1] = _t1;
-// CHECK-NEXT:     }
-// CHECK-NEXT:     {
-// CHECK-NEXT:         {
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += 1 * x * x;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += x * 1 * x;
-// CHECK-NEXT:             jacobianMatrix[{{0U|0UL|0ULL}}] += x * x * 1;
-// CHECK-NEXT:         }
-// CHECK-NEXT:         output[0] = _t0;
-// CHECK-NEXT:     }
+// CHECK: void f_1_jac(double x, double y, double z, double output[], clad::matrix<double> *_d_vector_output) {
+// CHECK-NEXT:     unsigned long indepVarCount = _d_vector_output->rows() + {{3U|3UL|3ULL}};
+// CHECK-NEXT:     clad::array<double> _d_vector_x = clad::one_hot_vector(indepVarCount, {{0U|0UL|0ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_y = clad::one_hot_vector(indepVarCount, {{1U|1UL|1ULL}});
+// CHECK-NEXT:     clad::array<double> _d_vector_z = clad::one_hot_vector(indepVarCount, {{2U|2UL|2ULL}});
+// CHECK-NEXT:     *_d_vector_output = clad::identity_matrix(_d_vector_output->rows(), indepVarCount, {{3U|3UL|3ULL}});
+// CHECK-NEXT:     double _t0 = x * x;
+// CHECK-NEXT:     *_d_vector_output[0] = (_d_vector_x * x + x * _d_vector_x) * x + _t0 * _d_vector_x;
+// CHECK-NEXT:     output[0] = _t0 * x;
+// CHECK-NEXT:     double _t1 = x * y;
+// CHECK-NEXT:     double _t2 = y * x;
+// CHECK-NEXT:     *_d_vector_output[1] = (_d_vector_x * y + x * _d_vector_y) * x + _t1 * _d_vector_x + (_d_vector_y * x + y * _d_vector_x) * x + _t2 * _d_vector_x;
+// CHECK-NEXT:     output[1] = _t1 * x + _t2 * x;
+// CHECK-NEXT:     double _t3 = z * x;
+// CHECK-NEXT:     *_d_vector_output[2] = (_d_vector_z * x + z * _d_vector_x) * 10 + _t3 * (clad::zero_vector(indepVarCount)) - (_d_vector_y * z + y * _d_vector_z); 
+// CHECK-NEXT:     output[2] = _t3 * 10 - y * z;
 // CHECK-NEXT: }
 
 
@@ -98,7 +61,7 @@ int main(){
     INIT_JACOBIAN(fn_mul);
     INIT_JACOBIAN(f_1);
 
-    TEST_JACOBIAN(fn_mul, 2, 6, 3, 1, output, jacobian); // CHECK-EXEC: {6.00, 0.00, 0.00, 2.00, 1.00, 3.00}
-    TEST_JACOBIAN(f_1, 3, 9, 4, 5, 6, output1, jacobian1); // CHECK-EXEC: {48.00, 0.00, 0.00, 80.00, 32.00, 0.00, 60.00, -6.00, 35.00}
+    TEST_JACOBIAN(fn_mul, 2, 6, 3, 1, output, &jacobian); // CHECK-EXEC: {6.00, 0.00, 0.00, 2.00, 1.00, 3.00}
+    TEST_JACOBIAN(f_1, 3, 9, 4, 5, 6, output1, &jacobian); // CHECK-EXEC: {48.00, 0.00, 0.00, 80.00, 32.00, 0.00, 60.00, -6.00, 35.00}
 
 }
diff --git a/test/NthDerivative/CustomDerivatives.C b/test/NthDerivative/CustomDerivatives.C
index e5bcbecbb..827320a0d 100644
--- a/test/NthDerivative/CustomDerivatives.C
+++ b/test/NthDerivative/CustomDerivatives.C
@@ -55,15 +55,15 @@ float test_trig(float x, float y, int a, int b) {
 // CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<float, float>, ValueAndPushforward<float, float> > _t0 = clad::custom_derivatives::std::sin_pushforward_pushforward(x * y, _d_x0 * y + x * _d_y0, _d_x * y + x * _d_y, _d__d_x * y + _d_x0 * _d_y + _d_x * _d_y0 + x * _d__d_y);
 // CHECK-NEXT:    ValueAndPushforward<float, float> _d__t0 = _t0.pushforward;
 // CHECK-NEXT:    ValueAndPushforward<float, float> _t00 = _t0.value;
-// CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))>, ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> > _t1 = pow_pushforward_pushforward(_t00.value, a, _t00.pushforward, _d_a0, _d__t0.value, _d_a, _d__t0.pushforward, _d__d_a);
-// CHECK-NEXT:    ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _d__t1 = _t1.pushforward;
-// CHECK-NEXT:    ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _t10 = _t1.value;
+// CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<double, double>, ValueAndPushforward<double, double> > _t1 = pow_pushforward_pushforward(_t00.value, a, _t00.pushforward, _d_a0, _d__t0.value, _d_a, _d__t0.pushforward, _d__d_a);
+// CHECK-NEXT:    ValueAndPushforward<double, double> _d__t1 = _t1.pushforward;
+// CHECK-NEXT:    ValueAndPushforward<double, double> _t10 = _t1.value;
 // CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<float, float>, ValueAndPushforward<float, float> > _t2 = clad::custom_derivatives::std::cos_pushforward_pushforward(x * y, _d_x0 * y + x * _d_y0, _d_x * y + x * _d_y, _d__d_x * y + _d_x0 * _d_y + _d_x * _d_y0 + x * _d__d_y);
 // CHECK-NEXT:    ValueAndPushforward<float, float> _d__t2 = _t2.pushforward;
 // CHECK-NEXT:    ValueAndPushforward<float, float> _t20 = _t2.value;
-// CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))>, ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> > _t3 = clad::custom_derivatives::std::pow_pushforward_pushforward(_t20.value, b, _t20.pushforward, _d_b0, _d__t2.value, _d_b, _d__t2.pushforward, _d__d_b);
-// CHECK-NEXT:    ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _d__t3 = _t3.pushforward;
-// CHECK-NEXT:    ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _t30 = _t3.value;
+// CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<double, double>, ValueAndPushforward<double, double> > _t3 = clad::custom_derivatives::std::pow_pushforward_pushforward(_t20.value, b, _t20.pushforward, _d_b0, _d__t2.value, _d_b, _d__t2.pushforward, _d__d_b);
+// CHECK-NEXT:    ValueAndPushforward<double, double> _d__t3 = _t3.pushforward;
+// CHECK-NEXT:    ValueAndPushforward<double, double> _t30 = _t3.value;
 // CHECK-NEXT:    double &_d__t4 = _d__t1.value;
 // CHECK-NEXT:    double &_t40 = _t10.value;
 // CHECK-NEXT:    double &_d__t5 = _d__t3.value;
@@ -92,15 +92,15 @@ float test_trig(float x, float y, int a, int b) {
 // CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<float, float>, ValueAndPushforward<float, float> > _t0 = clad::custom_derivatives::std::sin_pushforward_pushforward(x * y, _d_x0 * y + x * _d_y0, _d_x * y + x * _d_y, _d__d_x * y + _d_x0 * _d_y + _d_x * _d_y0 + x * _d__d_y);
 // CHECK-NEXT:    ValueAndPushforward<float, float> _d__t0 = _t0.pushforward;
 // CHECK-NEXT:    ValueAndPushforward<float, float> _t00 = _t0.value;
-// CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))>, ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> > _t1 = clad::custom_derivatives::std::pow_pushforward_pushforward(_t00.value, a, _t00.pushforward, _d_a0, _d__t0.value, _d_a, _d__t0.pushforward, _d__d_a);
-// CHECK-NEXT:    ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _d__t1 = _t1.pushforward;
-// CHECK-NEXT:    ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _t10 = _t1.value;
+// CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<double, double>, ValueAndPushforward<double, double> > _t1 = clad::custom_derivatives::std::pow_pushforward_pushforward(_t00.value, a, _t00.pushforward, _d_a0, _d__t0.value, _d_a, _d__t0.pushforward, _d__d_a);
+// CHECK-NEXT:    ValueAndPushforward<double, double> _d__t1 = _t1.pushforward;
+// CHECK-NEXT:    ValueAndPushforward<double, double> _t10 = _t1.value;
 // CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<float, float>, ValueAndPushforward<float, float> > _t2 = clad::custom_derivatives::std::cos_pushforward_pushforward(x * y, _d_x0 * y + x * _d_y0, _d_x * y + x * _d_y, _d__d_x * y + _d_x0 * _d_y + _d_x * _d_y0 + x * _d__d_y);
 // CHECK-NEXT:    ValueAndPushforward<float, float> _d__t2 = _t2.pushforward;
 // CHECK-NEXT:    ValueAndPushforward<float, float> _t20 = _t2.value;
-// CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))>, ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> > _t3 = clad::custom_derivatives::std::pow_pushforward_pushforward(_t20.value, b, _t20.pushforward, _d_b0, _d__t2.value, _d_b, _d__t2.pushforward, _d__d_b);
-// CHECK-NEXT:    ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _d__t3 = _t3.pushforward;
-// CHECK-NEXT:    ValueAndPushforward<decltype(::std::pow(float(), int())), decltype(::std::pow(float(), int()))> _t30 = _t3.value;
+// CHECK-NEXT:    clad::ValueAndPushforward<ValueAndPushforward<double, double>, ValueAndPushforward<double, double> > _t3 = clad::custom_derivatives::std::pow_pushforward_pushforward(_t20.value, b, _t20.pushforward, _d_b0, _d__t2.value, _d_b, _d__t2.pushforward, _d__d_b);
+// CHECK-NEXT:    ValueAndPushforward<double, double> _d__t3 = _t3.pushforward;
+// CHECK-NEXT:    ValueAndPushforward<double, double> _t30 = _t3.value;
 // CHECK-NEXT:    double &_d__t4 = _d__t1.value;
 // CHECK-NEXT:    double &_t40 = _t10.value;
 // CHECK-NEXT:    double &_d__t5 = _d__t3.value;
diff --git a/test/TestUtils.h b/test/TestUtils.h
index 6d76c59f6..13d78df2c 100644
--- a/test/TestUtils.h
+++ b/test/TestUtils.h
@@ -116,16 +116,25 @@ void run_gradient_impl(CF cf, index_pack<S...> s, Args&&... args) {
   display(std::get<S>(t)...);
 }
 
-template <class CF, std::size_t... S, class... Args>
-void run_jacobian_impl(CF cf, std::size_t size, index_pack<S...> s,
-                       Args&&... args) {
+template <class CF, class... Args>
+void run_jacobian_impl(CF cf, std::size_t size, Args&&... args) {
   std::tuple<Args...> t = {args...};
-  reset(std::get<S>(t)...);
   cf.execute(args...);
   typedef
       typename std::remove_reference<decltype(*std::get<sizeof...(args) - 1>(
           t))>::type arrElemType;
-  displayarr<arrElemType>(std::get<sizeof...(args) - 1>(t), size);
+  auto& res = *std::get<sizeof...(args) - 1>(t);
+  unsigned numOfParameters = sizeof...(args) - 2;
+  unsigned numOfOutputs = res.rows();
+  printf("{");
+  for (unsigned i = 0; i < numOfOutputs; ++i) {
+    for (unsigned j = 0; j < numOfParameters; ++j) {
+      printf("%.2f", res[i][j]);
+      if (i != numOfOutputs - 1 || j != numOfParameters - 1)
+        printf(", ");
+    }
+  }
+  printf("}\n");
 }
 
 template <class CF, std::size_t... S, class... Args>
@@ -150,11 +159,7 @@ void run_gradient(CF cf, Args&&... args) {
 template <std::size_t NumOfDerivativeArgs, std::size_t size, class CF,
           class... Args>
 void run_jacobian(CF cf, Args&&... args) {
-  using DerivativeArgRange =
-      typename GenerateRange<sizeof...(Args) - NumOfDerivativeArgs,
-                             sizeof...(Args) - 1>::type;
-  run_jacobian_impl(cf, size, DerivativeArgRange(),
-                    std::forward<Args>(args)...);
+  run_jacobian_impl(cf, size, std::forward<Args>(args)...);
 }
 
 template <std::size_t NumOfDerivativeArgs, class CF, class... Args>