diff --git a/src/madness/chem/lowrankfunction.h b/src/madness/chem/lowrankfunction.h
index 5707f0d7688..d08007afa6e 100644
--- a/src/madness/chem/lowrankfunction.h
+++ b/src/madness/chem/lowrankfunction.h
@@ -54,6 +54,7 @@ namespace madness {
     protected:
         double volume_element=0.1;
         double radius=3;
+        bool do_print=false;
     };
 
     template<std::size_t NDIM>
@@ -67,7 +68,7 @@ namespace madness {
         std::vector<Vector<double,NDIM>> get_grid() const {
             std::vector<Vector<double, NDIM>> grid;
             long npoint_within_volume=volume()/volume_element;
-            print("npoint_within_volume",npoint_within_volume);
+            if (do_print) print("npoint_within_volume",npoint_within_volume);
 
             auto cell = FunctionDefaults<NDIM>::get_cell();
             auto is_in_cell = [&cell](const Vector<double, NDIM>& r) {
@@ -92,10 +93,12 @@ namespace madness {
                 grid.push_back(tmp);
                 if (rank==npoint_within_volume) break;
             }
-            print("grid points in volume ",rank);
-            print("total grid points     ",grid.size());
-            print("ratio                 ",rank/double(grid.size()));
-            print("volume element        ",volume()/rank);
+            if (do_print) {
+                print("grid points in volume ",rank);
+                print("total grid points     ",grid.size());
+                print("ratio                 ",rank/double(grid.size()));
+                print("volume element        ",volume()/rank);
+            }
             return grid;
         }
 
@@ -117,58 +120,6 @@ namespace madness {
             return result;
         }
 
-//        double computed_volume_element() const {
-//            double volume = 4.0 / 3.0 * constants::pi * std::pow(radius, 3.0);
-//            print("volume element in random grid", volume / (0.67 * rank));
-//        }
-
-    };
-
-    template<std::size_t NDIM>
-    class sphericalgrid : public gridbase {
-    public:
-        double volume_element=0.1;
-        double radius=3;
-        bool hard_shell=true;
-
-        sphericalgrid(const double volume_element, const double radius, bool hard_shell)
-                :volume_element(volume_element), radius(radius), hard_shell(hard_shell) {
-        };
-
-        std::vector<Vector<double,NDIM>> get_coordinates() const {
-            // 1D grid
-            double volume_element_1d=std::pow(volume_element,1./NDIM);
-            long ngrid=std::ceil(radius/volume_element_1d);
-            double stepsize=radius/ngrid;
-            double scale=1.0;
-            if (not hard_shell) scale=std::pow(2.0,1.0/(ngrid+1));
-            print("scale",scale);
-
-            std::vector<double> coord1d;
-            print("volume element, stepsize, ngrid" ,volume_element, std::pow(stepsize,NDIM),stepsize,ngrid);
-            for (int i=0; i<ngrid+1; ++i) {
-                if (not hard_shell) stepsize*=scale;
-                double c=i*stepsize;
-                coord1d.push_back(c);
-                if (i!=0) coord1d.push_back(-c);
-                print("coord",c);
-            }
-            print("coord1d",coord1d.size());
-            std::vector<Vector<double,NDIM>> result;
-            for (int i=0; i<coord1d.size(); ++i) {
-                for (int j=0; j<coord1d.size(); ++j) {
-                    for (int k=0; k<coord1d.size(); ++k) {
-                        Vector<double,NDIM> c{coord1d[i],coord1d[j],coord1d[k]};
-                        double cutoff = hard_shell ? radius : 2.0*radius;
-                        if (c.normf()<cutoff) result.push_back(c);
-                    }
-                }
-            }
-            print("result.size()",result.size());
-//        for (auto& r: result) print(r);
-            return  result;
-
-        }
     };
 
     template<std::size_t NDIM>
@@ -280,6 +231,13 @@ struct particle {
         return ss.str();
     }
 
+
+    /// type conversion to std::array
+    std::array<int,PDIM> get_array() const {
+        return dims;
+    }
+
+
     /// assuming two particles only
     bool is_first() const {return dims[0]==0;}
     /// assuming two particles only
@@ -422,9 +380,10 @@ struct LRFunctorPure : public LRFunctorBase<T,NDIM> {
 
     std::vector<Function<T,LDIM>> inner(const std::vector<Function<T,LDIM>>& rhs,
                                         const particle<LDIM> p1, const particle<LDIM> p2) const {
-        std::vector<Function<T,LDIM>> result;
-        for (const auto& r : rhs) result.push_back(madness::inner(f,r,p1.get_tuple(),p2.get_tuple()));
-        return result;
+        return madness::innerXX<LDIM>(f,rhs,p1.get_array(),p2.get_array());
+//        std::vector<Function<T,LDIM>> result;
+//        for (const auto& r : rhs) result.push_back(madness::inner(f,r,p1.get_tuple(),p2.get_tuple()));
+//        return result;
     }
 
     T operator()(const Vector<double,NDIM>& r) const {
@@ -449,7 +408,7 @@ struct LRFunctorPure : public LRFunctorBase<T,NDIM> {
         World& world;
         double rank_revealing_tol=1.e-8;     // rrcd tol
         std::string orthomethod="canonical";
-        bool do_print=true;
+        bool do_print=false;
         std::vector<Function<T,LDIM>> g,h;
         const particle<LDIM> p1=particle<LDIM>::particle1();
         const particle<LDIM> p2=particle<LDIM>::particle2();
@@ -693,6 +652,7 @@ struct LRFunctorPure : public LRFunctorBase<T,NDIM> {
 
             // \int f(1,2)^2 d1d2
             double term1 = lrfunctor1.norm2();
+            term1=term1*term1;
             t.tag("computing term1");
 
             // \int f(1,2) pre(1) post(2) \sum_i g(1) h(2) d1d2
@@ -783,6 +743,7 @@ struct LRFunctorPure : public LRFunctorBase<T,NDIM> {
     template<typename T, std::size_t NDIM, std::size_t PDIM>
     LowRankFunction<T,NDIM> inner(const LowRankFunction<T,NDIM>& f1, const Function<T,NDIM>& f2,
                                   const particle<PDIM> p1, const particle<PDIM> p2) {
+        static_assert(TensorTypeData<T>::iscomplex==false, "complex inner in LowRankFunction not implemented");
         World& world=f1.world;
         static_assert(2*PDIM==NDIM);
         // int f(1,2) k(2,3) d2 = \sum \int g_i(1) h_i(2) k(2,3) d2
@@ -790,14 +751,12 @@ struct LRFunctorPure : public LRFunctorBase<T,NDIM> {
         LowRankFunction<T, NDIM> result(world);
         if (p1.is_last()) { // integrate over 2: result(1,3) = lrf(1,2) f(2,3)
             result.g = f1.g;
-            decltype(f1.h) h;
-            for (int i=0; i<f1.rank(); ++i) h.push_back(inner(f1.h[i],f2,(particle<PDIM>::particle1().get_tuple()),p2.get_tuple()));
-            result.h=copy(h);
+            change_tree_state(f1.h,reconstructed);
+            result.h=innerXX<PDIM>(f2,f1.h,p2.get_array(),particle<PDIM>::particle1().get_array());
         } else if (p1.is_first()) { // integrate over 1: result(2,3) = lrf(1,2) f(1,3)
             result.g = f1.h;        // correct! second variable of f1 becomes first variable of result
-            decltype(f1.g) h;
-            for (int i=0; i<f1.rank(); ++i) h.push_back(inner(f1.g[i],f2,particle<PDIM>::particle1().get_tuple(),p2.get_tuple()));
-            result.h=copy(h);
+            change_tree_state(f1.g,reconstructed);
+            result.h=innerXX<PDIM>(f2,f1.g,p2.get_array(),particle<PDIM>::particle1().get_array());
         }
         return result;
     }
diff --git a/src/madness/chem/test_low_rank_function.cc b/src/madness/chem/test_low_rank_function.cc
index 1ebbd506383..2233fdfb514 100644
--- a/src/madness/chem/test_low_rank_function.cc
+++ b/src/madness/chem/test_low_rank_function.cc
@@ -309,7 +309,7 @@ template<std::size_t LDIM>
 int test_full_rank_functor(World& world, LowRankFunctionParameters& parameters) {
 
     test_output t1("test_full_rank_functor");
-//    t1.set_cout_to_terminal();
+    t1.set_cout_to_terminal();
     print_header2("entering test_full_rank_functor");
     constexpr int NDIM=2*LDIM;
     FunctionDefaults<LDIM>::set_thresh(1.e-6);
@@ -470,12 +470,12 @@ int test_inner(World& world, LowRankFunctionParameters parameters) {
             .functor([](const Vector<double,LDIM>& r){return exp(-4.0*inner(r,r));});
 
     LRFunctorF12<double,NDIM> functor1;
-//    functor1.f12.reset(GaussOperatorPtr<LDIM>(world,1.0));
-    functor1.f12.reset(SlaterOperatorPtr_ND<LDIM>(world,1.0,1.e-4,thresh));
+    functor1.f12.reset(GaussOperatorPtr<LDIM>(world,1.0));
+//    functor1.f12.reset(SlaterOperatorPtr_ND<LDIM>(world,1.0,1.e-4,thresh));
     functor1.a=phi;
     LRFunctorF12<double,NDIM> functor2;
-//    functor2.f12.reset(GaussOperatorPtr<LDIM>(world,2.0));
-    functor2.f12.reset(SlaterOperatorPtr_ND<LDIM>(world,2.0,1.e-4,thresh));
+    functor2.f12.reset(GaussOperatorPtr<LDIM>(world,2.0));
+//    functor2.f12.reset(SlaterOperatorPtr_ND<LDIM>(world,2.0,1.e-4,thresh));
     functor2.a=phi;
 
     auto p1=particle<LDIM>::particle1();
@@ -491,7 +491,7 @@ int test_inner(World& world, LowRankFunctionParameters parameters) {
     // f2(x,y) = exp(-a*x^2) * exp(-g (x-y)^2)
     // with a=4, g=2
     // int f1(x,y),f2(x,z) dx = inner(f1,f2,0,0) : norm^2 = Pi^2/(2 Sqrt[2] Sqrt[a gamma] Sqrt[1 + 2 a + gamma]) = 0.37197471167788324677
-    // int f1(x,y),f2(z,x) dx = inner(f1,f2,0,1) : norm^2 = 0.32972034117743393239
+    // int f1(x,y),f2(z,x) dx = inner(f1,f2,0,1) : norm^2 = Pi^2/(2 Sqrt[a (1 + a + gamma) (a + 2 gamma)]) = 0.32972034117743393239
     // int f1(y,x),f2(x,z) dx = inner(f1,f2,1,0) : norm^2 = 0.26921553123369812300
     // int f1(y,x),f2(z,x) dx = inner(f1,f2,1,1) : norm^2 = 0.35613867236025352322
 
@@ -509,7 +509,8 @@ int test_inner(World& world, LowRankFunctionParameters parameters) {
 
                 // full/full
                 auto lhs1=inner(fullrank1,fullrank2,p11.get_tuple(),p22.get_tuple());
-                double l1=lhs1.norm2();
+                const double l1=lhs1.norm2();
+                print("l1",l1,l1*l1,ref);
                 t1.checkpoint(fabs(l1*l1-ref)<thresh,"inner(full,full,"+p11.str()+","+p22.str()+")");
                 double asymmetric_ref=inner(fullrank1,lhs1);
                 double asymmetric1=inner(fullrank1,lhs1);
@@ -544,8 +545,8 @@ int test_inner(World& world, LowRankFunctionParameters parameters) {
                 counter++;
             }
         }
-
     }
+//    return t1.end();
 
     // inner f(1,2) g(2)
     // this is surprisingly inaccurate, but algorithm is correct, the error can be systematically decreased
@@ -599,7 +600,7 @@ int test_construction_optimization(World& world, LowRankFunctionParameters param
     parameters.set_user_defined_value("volume_element",0.05);
     constexpr std::size_t NDIM=2*LDIM;
     test_output t1("LowRankFunction::construction/optimization in dimension "+std::to_string(NDIM));
-    t1.set_cout_to_terminal();
+//    t1.set_cout_to_terminal();
     OperatorInfo info(1.0,1.e-6,FunctionDefaults<LDIM>::get_thresh(),OT_SLATER);
     auto slater=std::shared_ptr<SeparatedConvolution<double,LDIM> >(new SeparatedConvolution<double,LDIM>(world,info));
     Function<double,LDIM> one=FunctionFactory<double,LDIM>(world).functor([](const Vector<double,LDIM>& r){return exp(-0.2*inner(r,r));});
@@ -677,11 +678,12 @@ int main(int argc, char **argv) {
 //        isuccess+=test_grids<1>(world,parameters);
 //        isuccess+=test_grids<2>(world,parameters);
 //        isuccess+=test_grids<3>(world,parameters);
-//        isuccess+=test_full_rank_functor<1>(world, parameters);
-//        isuccess+=test_construction_optimization<1>(world,parameters);
-//        isuccess+=test_construction_optimization<2>(world,parameters);
+        isuccess+=test_full_rank_functor<1>(world, parameters);
+        isuccess+=test_construction_optimization<1>(world,parameters);
+        isuccess+=test_construction_optimization<2>(world,parameters);
         isuccess+=test_arithmetic<1>(world,parameters);
         isuccess+=test_arithmetic<2>(world,parameters);
+
         isuccess+=test_inner<1>(world,parameters);
         isuccess+=test_inner<2>(world,parameters);
 
diff --git a/src/madness/mra/mra.h b/src/madness/mra/mra.h
index ebdea3baf4d..5ca1967d125 100644
--- a/src/madness/mra/mra.h
+++ b/src/madness/mra/mra.h
@@ -903,7 +903,9 @@ namespace madness {
             } else {
                 MADNESS_EXCEPTION("unknown/unsupported final tree state",1);
             }
-            if (must_fence and world().rank()==0) print("could not respect fence in change_tree_state");
+            if (must_fence and world().rank()==0) {
+                print("could not respect fence in change_tree_state");
+            }
             if (fence && VERIFY_TREE) verify_tree(); // Must be after in case nonstandard
             return *this;
         }
@@ -2509,8 +2511,8 @@ namespace madness {
     /// @param[in]  task    0: everything, 1; prepare only (fence), 2: work only (no fence), 3: finalize only (fence)
     template<std::size_t NDIM, typename T, std::size_t LDIM, typename R, std::size_t KDIM,
             std::size_t CDIM = (KDIM + LDIM - NDIM) / 2>
-    Function<TENSOR_RESULT_TYPE(T, R), NDIM>
-    innerXX(const Function<T, LDIM>& f, const Function<R, KDIM>& g, const std::array<int, CDIM> v1,
+    std::vector<Function<TENSOR_RESULT_TYPE(T, R), NDIM>>
+    innerXX(const Function<T, LDIM>& f, const std::vector<Function<R, KDIM>>& vg, const std::array<int, CDIM> v1,
            const std::array<int, CDIM> v2, int task=0) {
         bool prepare = ((task==0) or (task==1));
         bool work = ((task==0) or (task==2));
@@ -2527,8 +2529,8 @@ namespace madness {
         MADNESS_CHECK((v2[0]==0) or (v2[CDIM-1]==KDIM-1));
 
         MADNESS_CHECK(f.is_initialized());
-        MADNESS_CHECK(g.is_initialized());
-        MADNESS_CHECK(f.world().id() == g.world().id());
+        MADNESS_CHECK(vg[0].is_initialized());
+        MADNESS_CHECK(f.world().id() == vg[0].world().id());
         // this needs to be run in a single world, so that all coefficients are local.
         // Use macrotasks if run on multiple processes.
         World& world=f.world();
@@ -2536,32 +2538,33 @@ namespace madness {
 
         if (prepare) {
             f.change_tree_state(nonstandard);
-            g.change_tree_state(nonstandard);
+            change_tree_state(vg,nonstandard);
             world.gop.fence();
             f.get_impl()->compute_snorm_and_dnorm(false);
-            g.get_impl()->compute_snorm_and_dnorm(false);
+            for (auto& g : vg) g.get_impl()->compute_snorm_and_dnorm(false);
             world.gop.fence();
         }
 
         typedef TENSOR_RESULT_TYPE(T, R) resultT;
-        Function<resultT,NDIM> result;
+        std::vector<Function<resultT,NDIM>> result(vg.size());
         if (work) {
             world.gop.set_forbid_fence(true);
-            result=FunctionFactory<resultT,NDIM>(world)
-                            .k(f.k()).thresh(f.thresh()).empty().nofence();
-            result.get_impl()->partial_inner(*f.get_impl(),*g.get_impl(),v1,v2);
-            result.get_impl()->set_tree_state(nonstandard_after_apply);
+            for (int i=0; i<vg.size(); ++i)  {
+                result[i]=FunctionFactory<resultT,NDIM>(world)
+                        .k(f.k()).thresh(f.thresh()).empty().nofence();
+                result[i].get_impl()->partial_inner(*f.get_impl(),*(vg[i]).get_impl(),v1,v2);
+                result[i].get_impl()->set_tree_state(nonstandard_after_apply);
+            }
             world.gop.set_forbid_fence(false);
         }
 
         if (finish) {
 
             world.gop.fence();
-            result.get_impl()->reconstruct(true);
-            result.reconstruct();
-            FunctionImpl<T,LDIM>& f_nc=const_cast<FunctionImpl<T,LDIM>&>(*f.get_impl());
-            FunctionImpl<R,KDIM>& g_nc=const_cast<FunctionImpl<R,KDIM>&>(*g.get_impl());
+//            result.get_impl()->reconstruct(true);
 
+            change_tree_state(result,reconstructed);
+//            result.reconstruct();
             // restore initial state of g and h
             auto erase_list = [] (const auto& funcimpl) {
                 typedef typename std::decay_t<decltype(funcimpl)>::keyT keyTT;
@@ -2574,10 +2577,14 @@ namespace madness {
                 return to_be_erased;
             };
 
+            FunctionImpl<T,LDIM>& f_nc=const_cast<FunctionImpl<T,LDIM>&>(*f.get_impl());
             for (auto& key : erase_list(f_nc)) f_nc.get_coeffs().erase(key);
-            for (auto& key : erase_list(g_nc)) g_nc.get_coeffs().erase(key);
+            for (auto& g : vg) {
+                FunctionImpl<R,KDIM>& g_nc=const_cast<FunctionImpl<R,KDIM>&>(*g.get_impl());
+                for (auto& key : erase_list(g_nc)) g_nc.get_coeffs().erase(key);
+            }
             world.gop.fence();
-            g_nc.reconstruct(false);
+            change_tree_state(vg,reconstructed);
             f_nc.reconstruct(false);
             world.gop.fence();
 
@@ -2586,6 +2593,20 @@ namespace madness {
         return result;
     }
 
+
+    /// Computes the partial scalar/inner product between two functions, returns a low-dim function
+
+    /// syntax similar to the inner product in tensor.h
+    /// e.g result=inner<3>(f,g),{0},{1}) : r(x,y) = int f(x1,x) g(y,x1) dx1
+    /// @param[in]  task    0: everything, 1; prepare only (fence), 2: work only (no fence), 3: finalize only (fence)
+    template<std::size_t NDIM, typename T, std::size_t LDIM, typename R, std::size_t KDIM,
+            std::size_t CDIM = (KDIM + LDIM - NDIM) / 2>
+    Function<TENSOR_RESULT_TYPE(T, R), NDIM>
+    innerXX(const Function<T, LDIM>& f, const Function<R, KDIM>& g, const std::array<int, CDIM> v1,
+            const std::array<int, CDIM> v2, int task=0) {
+        return innerXX<NDIM,T,LDIM,R,KDIM>(f,std::vector<Function<R,KDIM>>({g}),v1,v2,task)[0];
+    }
+
     /// Computes the partial scalar/inner product between two functions, returns a low-dim function
 
     /// syntax similar to the inner product in tensor.h
diff --git a/src/madness/mra/vmra.h b/src/madness/mra/vmra.h
index 4ca5cd50bd8..3b7757e38e7 100644
--- a/src/madness/mra/vmra.h
+++ b/src/madness/mra/vmra.h
@@ -288,6 +288,19 @@ namespace madness {
         if (not dummy.is_initialized()) return v;
         World& world=dummy.world();
 
+        // a couple of special cases
+        if (finalstate==nonstandard) {
+            bool must_fence=false;
+            for (auto& f : v) {
+                if (f.get_impl()->get_tree_state()== reconstructed) continue;
+                if (f.get_impl()->get_tree_state()!=nonstandard) {
+                    must_fence=true;
+                    f.change_tree_state(reconstructed,false);
+                }
+            }
+            if (must_fence) world.gop.fence();
+//            if (world.rank()==0 and must_fence) print("could not respect fence in change_tree_state(vector)");
+        }
 //        if (not fence) world.gop.set_forbid_fence(true);    // make sure fence is respected
         for (unsigned int i=0; i<v.size(); ++i) v[i].change_tree_state(finalstate,fence);
 //        if (not fence) world.gop.set_forbid_fence(false);
@@ -1481,7 +1494,7 @@ namespace madness {
         bool print_timings=(NDIM==6) and (world.rank()==0) and op.print_timings;
 
         double wall0=wall_time();
-        reconstruct(world, f);
+//        reconstruct(world, f);
         make_nonstandard(world, ncf);
         double wall1=wall_time();
         if (print_timings) printf("timer: %20.20s %8.2fs\n", "make_nonstandard", wall1-wall0);
@@ -1494,7 +1507,12 @@ namespace madness {
         world.gop.fence();
 
         // restores promise of logical constness
-        if (not op.destructive()) standard(world, ncf, false);
+        if (op.destructive()) {
+            for (auto& ff : ncf) ff.clear(false);
+            world.gop.fence();
+        } else {
+            reconstruct(world,f);
+        }
 
         // svd-tensor requires some cleanup after apply
         if (result[0].get_impl()->get_tensor_type()==TT_2D) {