diff --git a/src/madness/lcao/bspline.cc b/src/madness/lcao/bspline.cc index 7bf29e45d63..76841c83e1f 100644 --- a/src/madness/lcao/bspline.cc +++ b/src/madness/lcao/bspline.cc @@ -23,55 +23,123 @@ template struct scalar_type> {typedef T type;}; using namespace madness; // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -// Locate the interval containing x in the padded knot vector t (increasing order) using linear search. -// Works for general knots on the interval including the endpoints. Since it using the padded knot vector -// it returns an index in the inclusive range order-1..Npknots-order-1. template -size_t xinterval(T x, const Tensor& t, size_t order) { - size_t Npknots = t.size(); - if (x < t[0] || x > t[Npknots-1]) { - print(x, t[0], t[Npknots-1]); - throw "Xinterval: x not in t"; +class KnotsGeneral { + const Tensor _knots; // unique knot vector in ascending order + const T xlo, xhi; // interval + const size_t nknots; // number of knots + +public: + KnotsGeneral(const Tensor& knots) + : _knots(knots) + , xlo(_knots[0]) + , xhi(_knots[knots.size()-1]) + , nknots(_knots.size()) + { + for (size_t i=1; i= t[i] && x < t[i+1]) return i; + + size_t size() const {return nknots;} + + // Return (shallow copy) the knots + const Tensor knots() const { + return _knots; } - return Npknots - order - 1; -} -// Locate the interval containing x in the padded *uniform* knot vector t (increasing order) -// Note that the knot interval is inclusive of both sides, whereas the general function above is exclusive of the right side. -template -size_t xinterval_uniform(T x, const Tensor& t, size_t order) { - size_t Npknots = t.size(); - if (x < t[order-1] || x > t[Npknots-order]) throw "Xinterval: x not in t"; - T h = ((t[Npknots-order] - t[order-1])/(Npknots - 2* order + 1)) * (1 + std::numeric_limits::epsilon()); - size_t i = size_t((x - t[0])/h ) + order - 1; - return i; -} + // Locate the leftmost index of the unique knot interval + // containing x in the vector of unique knots. If x exactly equals + // a knot return the knot index (even if it is the rightmost + // knot). + // + // This general version is dumb and slow ... linear search!!!!! Bisection would be a better choice. + size_t interval(T x) const { +#if !defined(NDEBUG) + if (x < xlo || x > xhi) throw "interval: x not in range"; +#endif + for (size_t i=0; i= _knots[i] && x < _knots[i+1]) return i; + } + return nknots-1; + } +}; -// Uniform points template -Tensor uniform_knots(size_t nknots, T xlo, T xhi) { - Tensor pts(nknots); - T h = (xhi - xlo)/(nknots - 1); - for (size_t i=0; i::epsilon()))) + {} + + Tensor knots() const { + Tensor pts(nknots); + T h = (xhi - xlo)/(nknots - 1); + for (size_t i=0; i xhi) throw "Xinterval: x not in t"; +#endif + return size_t((x - xlo)*rheps); + } + + size_t size() const {return nknots;} +}; -// Chebyshev points modifyed to include the endpoints +// Chebyshev points *modified to include the endpoints* template -Tensor cheby_knots(size_t nknots, T xlo, T xhi) { - Tensor pts(nknots); - T pi = std::atan(T(1))*4; - for (size_t i=0; i knots() const { + Tensor pts(nknots); + static const T pi = std::atan(T(1))*4; // since T might be dd or qd + for (size_t i=0; i xhi) throw "Xinterval: x not in t"; +#endif + static const T tworpi = 2.0/std::atan(T(1))*4; + T y = (x - xlo)*rL; + T z = std::acos(y)*tworpi; + return size_t(z*(nknots - 1)); + } + + size_t size() const {return nknots;} +}; + // Oversample the knots by inserting geometric mean of neighboring knots. For knots on a geometric/exponential grid this is // what you want, and for a uniform grid we have sqrt(a*(a+h))= a+h/2+O(h^2) where a is a knot and h is the knot spacing. -// So for a dense uniform grid it will also be close to the geometric mean for a>>h. +// So for a dense uniform grid it will be close to the geometric mean for a>>h. template Tensor oversample_knots(const Tensor& knots) { long nknots = knots.size(); @@ -95,10 +163,10 @@ Tensor oversample_knots(const Tensor& knots) { // 3. Ditto for oversampling // 4. Since smoothed derivatives are much more accurate (compared to the "exact" derivative) on the interior of the interval but much less acccurate at the endpoints, we should perhaps increase the density of oversampling for the smoothing near the endpoints. Or perhaps constrain the intermediate fits to respect the exact derivative at the endpoints. -// For now just bundle the knots, oversampling, and the basis functions together in a class, and use the slow generic xinterval() function. -template -class BsplineBasis { -public: +// knots is templated primarily so we can inline the interval method for vectorization +template +class BsplineBasis : protected knotsT { +public: // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< when we remove const this will have to go too! // Minimum data required to describe the basis const size_t order; // the order of the basis const size_t p; // the degree of the basis = order-1 @@ -106,7 +174,7 @@ class BsplineBasis { const size_t npknots;// the number of padded knots const size_t nbasis;// the number of basis functions const Tensor knots; // the unpadded knots - const Tensor t; // the padded knots + Tensor t; // the padded knots // Pad/repeat the knots at the ends to complete the basis static Tensor pad_knots(size_t order, const Tensor& knots) { @@ -117,23 +185,24 @@ class BsplineBasis { for (size_t i=0; i& knots) - : order(order) + BsplineBasis(size_t order, const knotsT& knots) + : knotsT(knots) + , order(order) , p(order - 1) , nknots(knots.size()) , npknots(nknots + 2*order - 2) , nbasis(nknots + order - 2) - , knots(copy(knots)) - , t(pad_knots(order, knots)) + , knots(knots.knots()) { if (order < 2) throw "BsplineBasis: order must be >= 2"; if (order > 20) throw "BsplineBasis: order must be <= 20 (sanity check)"; - if (knots.size() < 2*order - 1) throw "BsplineBasis: knots must have at least 2*order - 1 elements"; - if (knots[0] > knots[knots.size()-1]) throw "BsplineBasis: knots must be in increasing order"; + if (nknots < 2*order - 1) throw "BsplineBasis: knots must have at least 2*order - 1 elements"; + for (size_t i=1; iknots[i] < this->knots[i-1]) throw "BsplineBasis: knots not in ascending order or not unique"; + } + t = pad_knots(order,this->knots); if (t.size() != npknots) throw "BsplineBasis: internal error with padded knots"; if (npknots-order != nbasis) throw "BsplineBasis: internal error with number of basis functions"; } @@ -141,11 +210,16 @@ class BsplineBasis { // Returns the total number of basis functions corresponding to the length of a coefficient vector size_t size() const { return nbasis; } + // Returns a const ref to the knots object + const knotsT& knots_object() const {return *(knotsT*)this;} + + // in next few routines can k=interval(x)+p is usually used with k-p except where k-r. + // Stably evaluate a spline expansion at a point, i.e., compute sum(i) c[i] b[i](x) template R deBoor(T x, const Tensor& c) const { MADNESS_ASSERT(c.dims()[0] == nbasis); - size_t k = xinterval(x, t, order); + size_t k = knotsT::interval(x) + p; Tensor d = copy(c(Slice(k-p, k))); for (size_t r=0; rr; j--) { @@ -165,7 +239,7 @@ class BsplineBasis { std::vector k(nx); Tensor d(p+1,nx); for (size_t i=0; i d(p+1,nx), s(2*p,nx); // zeroing not necessary for (size_t i=0; i bspline(T x) const { - size_t k = xinterval(x, t, order); + size_t k = knotsT::interval(x)+p; Tensor bm1(nbasis); Tensor b(nbasis); bm1[k] = T(1.0); @@ -308,7 +382,7 @@ class BsplineBasis { // make_deriv_matrix := proc(t, order, Xsample) local Npknots, Nbasis, t1, A, M, dMat; Npknots := numelems(t); Nbasis := Npknots - order; t1 := Vector(Npknots + 2); t1[2 .. Npknots + 1] := t; t1[1] := t[1]; t1[Npknots + 2] := t[Npknots]; A := tabulate_basis(t, order, Xsample); M := make_lsq_matrix(t1, order + 1, Xsample); dMat := make_deriv_exact_matrix(t1, order + 1); return (dMat . M) . A; end proc; Tensor make_deriv_matrix(const Tensor& Xsample) const { size_t nsample = Xsample.dims()[0]; - BsplineBasis B1 = BsplineBasis(order + 1, this->knots); + BsplineBasis B1 = BsplineBasis(order + 1, knots_object()); Tensor A = tabulate_basis(Xsample); Tensor M = B1.make_lsq_matrix(Xsample); Tensor dMat = B1.make_deriv_exact_matrix(); @@ -318,7 +392,7 @@ class BsplineBasis { // same riff as make_deriv2_matrixX ... need more understanding Tensor make_deriv_matrixX(const Tensor& Xsample) const { size_t nsample = Xsample.dims()[0]; - BsplineBasis B1 = BsplineBasis(order - 1, this->knots); + BsplineBasis B1 = BsplineBasis(order - 1, knots_object()); Tensor A = B1.tabulate_basis(Xsample); Tensor M = make_lsq_matrix(Xsample); Tensor dMat = make_deriv_exact_matrix(); @@ -329,8 +403,8 @@ class BsplineBasis { // Gives us 2 order higher accuracy and produces a result in the same order basis as the input. Tensor make_deriv2_matrix(const Tensor& Xsample) const { size_t nsample = Xsample.dims()[0]; - BsplineBasis B1 = BsplineBasis(order + 1, this->knots); - BsplineBasis B2 = BsplineBasis(order + 2, this->knots); + BsplineBasis B1 = BsplineBasis(order + 1, knots_object()); + BsplineBasis B2 = BsplineBasis(order + 2, knots_object()); Tensor A = tabulate_basis(Xsample); Tensor M2 = B2.make_lsq_matrix(Xsample); Tensor dMat2 = B2.make_deriv_exact_matrix(); @@ -343,8 +417,8 @@ class BsplineBasis { // Need to understand why this sometimes seems better than make_deriv2_matrix which intuitively seems better. Tensor make_deriv2_matrixX(const Tensor& Xsample) const { size_t nsample = Xsample.dims()[0]; - BsplineBasis B1 = BsplineBasis(order - 1, this->knots); - BsplineBasis B2 = BsplineBasis(order - 2, this->knots); + BsplineBasis B1 = BsplineBasis(order - 1, knots_object()); + BsplineBasis B2 = BsplineBasis(order - 2, knots_object()); Tensor A = B2.tabulate_basis(Xsample); Tensor M = make_lsq_matrix(Xsample); Tensor dMat2 = B1.make_deriv_exact_matrix(); @@ -358,18 +432,13 @@ class BsplineBasis { size_t order = 4; T xlo = -constants::pi; T xhi = 2*constants::pi; - T xtest = 1.77; - auto knots = uniform_knots(nknots, xlo, xhi); print("uniform", knots); - auto cheby = cheby_knots(nknots, xlo, xhi); print("cheby", cheby); - - //knots = cheby; - - auto pad = BsplineBasis::pad_knots(order, knots); print("pad", pad); - auto xsam = oversample_knots(knots); print("xsam", xsam); + T xtest = 0.5*(xhi+xlo); + auto knots = knotsT(xlo, xhi, nknots); print("uniform", knots.knots()); + auto xsam = oversample_knots(knots.knots()); print("xsam", xsam); long nbasis = nknots + order - 2; size_t nsam = xsam.dims()[0]; - BsplineBasis B(order, knots); + BsplineBasis B(order, knots); for (long i=0; i B1(order - 1, knots); + BsplineBasis B1(order - 1, knots); auto d = B.deriv_exact(c); print(std::cos(1.5), B1.deBoor(1.5, d)); //print(-2*std::exp(-2*1.5), B1.deBoor(1.5, d)); @@ -424,7 +493,7 @@ class BsplineBasis { print(derr1); // Compute second derivative in the two lower order basis by applying deriv_exact twice - BsplineBasis B2(order - 2, knots); + BsplineBasis B2(order - 2, knots); { auto dMat1 = B1.make_deriv_exact_matrix(); auto d2 = inner(dMat1,inner(dMat,c)); @@ -449,7 +518,7 @@ class BsplineBasis { print("Err in fit-1 first derivative", derr3.normf()); } { - Gnuplot g("set style data l; set xrange [-2.5:5.5]"); + Gnuplot g("set style data l"); // ; set xrange [-2.5:5.5] g.plot(xsam, derr1, derr2, derr3); } // Compute second derivative in the same order basis @@ -486,7 +555,8 @@ template class BsplineFunction { public: typedef typename TensorTypeData::scalar_type scalar_type; - typedef BsplineBasis basisT; + typedef KnotsChebyshev knotsT; + typedef BsplineBasis basisT; typedef std::shared_ptr basis_ptrT; typedef Tensor tensorT; typedef BsplineFunction bfunctionT; @@ -671,7 +741,7 @@ class BsplineFunction { int main() { //BsplineBasis::test(); - BsplineBasis::test(); - BsplineFunction::test(); + BsplineBasis>::test(); + //BsplineFunction::test(); return 0; }