diff --git a/src/madness/bspline/mlmnlm.pdf b/src/madness/bspline/mlmnlm.pdf new file mode 100644 index 00000000000..04daad63d3c Binary files /dev/null and b/src/madness/bspline/mlmnlm.pdf differ diff --git a/src/madness/bspline/solidharmonics.cc b/src/madness/bspline/solidharmonics.cc new file mode 100644 index 00000000000..8b3a8946c1e --- /dev/null +++ b/src/madness/bspline/solidharmonics.cc @@ -0,0 +1,702 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Solid harmonics following JCP 104, p8003 (1996) + +template +struct constants { + static T pi; + + static void init() { + pi = from_str("3.141592653589793238462643383279502884197169399375105820974944592307816"); + } +}; +template <> double constants::pi = 0; +template <> dd_real constants::pi = 0; +template <> qd_real constants::pi = 0; + + +// sum(2*l+1,l=0..L) = (L+1)^2 +// 20! is largest factorial representable in 64-bit size_t +// 170! is largest factorial representable in double without overflowing the exponent + +class LebedevQuadrature { + size_t order; + size_t npts; + std::vector thetas; + std::vector phis; + std::vector sinthetas; + std::vector costhetas; + std::vector sinphis; + std::vector cosphis; + std::vector weights; + +public: + + // From https://people.sc.fsu.edu/~jburkardt/datasets/sphere_lebedev_rule/sphere_lebedev_rule.html + // Adjust input values by + // (1) convert to radian + // (2) switch theta and phi (physics convention) + // (3) quadrature range phi=0..2pi (instead of -pi..pi) + // (4) incorporate factor of 4*pi in the weights + LebedevQuadrature(int maxl) { + for (auto n : {3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,35,41,47,53,59,65,71,77,83,89,95,101,113,119,125,131}) { + if (n >= maxl) { + order = n; + char filename[16]; + sprintf(filename,"leb_%03d.txt",n); + std::ifstream f(filename); + + f >> npts; + //std::cout << filename << " " << npts << std::endl; + + thetas.resize(npts); + phis.resize(npts); + sinthetas.resize(npts); + costhetas.resize(npts); + sinphis.resize(npts); + cosphis.resize(npts); + weights.resize(npts); + + double wsum = 0.0; + + double pi = constants::pi; + double deg_to_rad = pi/180.0; + for (size_t i=0; i> phis[i] >> thetas[i] >> weights[i]; + phis[i] += 180.0; + phis[i] = phis[i]*deg_to_rad; + thetas[i] = thetas[i]*deg_to_rad; + weights[i] *= 4*pi; + + sinthetas[i] = std::sin(thetas[i]); + costhetas[i] = std::cos(thetas[i]); + sinphis[i] = std::sin(phis[i]); + cosphis[i] = std::cos(phis[i]); + + //std::cout << i << " " << thetas[i] << " " << phis[i] << " " << weights[i] << std::endl; + wsum += weights[i]; + } + //std::cout << "wsum " << (wsum-4*pi)/(4*pi) << std::endl; + return; + } + } + throw "maximum supported angular momentum is 131"; + } + + template + auto cartesian_integral(functionT f, double r=1.0) const { + auto value = f(r,0.0,0.0); // just to deduce the return type using auto + value = 0; + for (size_t i=0; i + auto spherical_integral(functionT f, double r=1.0) const { + auto value = f(0.0,0.0,0.0); // just for type deduction + value = 0; + for (size_t i=0; i,std::vector,std::vector> + cartesian_pts() const { + std::vector x(npts); + std::vector y(npts); + std::vector z(npts); + for (size_t i=0; i wts() const { + return weights; + } +}; + +template +T factorial(size_t a) { + T value = T(1); + while (a>0) {value*=T(double(a--));} + return value; +} + +template +T Alm(unsigned l, int m) { + T top = constants::pi * factorial(l+m) * factorial(l-m); + T bot = 2.*l+1; + return std::sqrt(top/bot); +} + +template +class NlmContainer { + unsigned LMAX; + std::vector N; + + // The following are layed out contiguously + // N+[0,0] + // N-[1,1] N+[1,0] N+[1,1] + // N-[2,2] N-[2,1] N+[2,0] N+[2,1] N+[2,2] + // etc. + // (N-[l,m] for m=l,l-1,..,1) and then (N+[l,m] for m=0..l) + + // Returns offset to data for l so that N(loff(l)+m) yields the desired value for -l<=m<=l + size_t loff(unsigned l) const { + return l*(l+1); + } +public: + NlmContainer(unsigned lmax) + : LMAX(lmax) + , N((lmax+1)*(lmax+1),T(0)) // must initialize everything so that unused upper elements are valid for ops like addition + {} + + unsigned lmax() const {return LMAX;} + + // Returns reference to N[l,m] with m>=0 being N+ and m<0 being N- + T& value(unsigned l, int m) { + return N[loff(l)+m]; + } + + // Returns value N[l,m] with m>=0 being N+ and m<0 being N- + T value(unsigned l, int m) const { + return N[loff(l)+m]; + } + + // Fill with scalar + NlmContainer& operator=(T s) { + for (size_t i=0; i +std::ostream& operator<<(std::ostream& s, const NlmContainer& Nlm) { + char buf[256]; + for (int l=0; l<=int(Nlm.lmax()); l++) { + for (int m=-l; m<=l; m++) { + sprintf(buf,"%12.4e",to_double(Nlm.value(l,m))); + s << buf; + } + std::cout << std::endl; + } + return s; +} + + +// Compute N+ and N- for given (l,|m|) which is not efficient if you want many values +template +std::pair Nlm(unsigned l, unsigned m, const T& x, const T& y, const T& z) { + T rsq = x*x + y*y + z*z; + + // First recur m up from 0 to m to make N+|m||m| and N-|m||m| + T Np_mm=1, Nm_mm=0; + for (unsigned i=1; i<=m; i++) { // i plays role of m + T fac = T(-1)/T(2.*i); + T Np = fac*(x*Np_mm - y*Nm_mm); + T Nm = fac*(y*Np_mm + x*Nm_mm); + Np_mm = Np; + Nm_mm = Nm; + } + //std::cout << "after mm " << Np_mm << " " << Nm_mm << std::endl; + + // Now recur l up from m to l to make N+l|m| and N-l|m| + // Note that Nlm is treated as zero if l +NlmContainer Nlm(unsigned lmax, T x, T y, T z) { + T rsq = x*x + y*y + z*z; + + NlmContainer N(lmax); + + // Initialize m=0 for which N- are missing + N.value(0,0) = T(1); + T Np_l1m=T(1), Np_l2m=T(0); // l-1,m and l-2,m + for (unsigned l=1; l<=lmax; l++) { + T fac = T(1)/T(int(l*l)); + T Np = fac*(T(2*l-1.0)*z*Np_l1m - rsq*Np_l2m); + Np_l2m = Np_l1m; + Np_l1m = Np; + N.value(l,0) = Np; + } + + // Recur m up to make N+|m||m| and N-|m||m| + for (unsigned m=1; m<=lmax; m++) { + T Np1 = N.value(m-1,m-1); + T Nm1 = (m<=1) ? T(0) : N.value(m-1,1-m); + T fac = T(-1)/T(2.*m); + N.value(m, m) = fac*(x*Np1 - y*Nm1); + N.value(m,-m) = fac*(y*Np1 + x*Nm1); + + // Recur l up from m to lmax to make N+l|m| and N-l|m| + // Note that Nlm is treated as zero if l +T Nlm_normalization(unsigned l, int m) { + T value = constants::pi/(Alm(l,m)*T(2.*l+1)); + static const T sqrthalf = std::sqrt(T(0.5)); + if (m) return sqrthalf/value; + else return T(0.5)/value; +} + +template +NlmContainer Nlm_normalization(unsigned lmax) { + NlmContainer Nnorm(lmax); + for (unsigned l=0; l<=lmax; l++) { + for (unsigned m=0; m<=l; m++) { + T norm = Nlm_normalization(l, m); + Nnorm.value(l,m) = norm; + if (m > 0) Nnorm.value(l,-m) = norm; + } + } + return Nnorm; +} + +template +void test_Nlm() { + { + // Check the container internal storage logic + NlmContainer::test(); + } + + { + // Check the values computed in three different ways + size_t lmax = 8; + T x = 0.125; + T y = 0.325; + T z = -0.75; + + NlmContainer Nfull = Nlm(lmax, x, y, z); + NlmContainer Nnorm = Nlm_normalization(lmax); + NlmContainer N(lmax); + + NlmContainer Nref = Nlm(lmax, R(x), R(y), R(z)); + NlmContainer NnormR = Nlm_normalization(lmax); + + for (size_t l=0; l<=lmax; l++) { + for (size_t m=0; m<=l; m++) { + { + auto [Np, Nm] = Nlm(l, m, x, y, z); + N.value(l,m) = Np; + if (m > 0) N.value(l,-m) = Nm; + } + } + } + + // Verify the normalized values + N *= Nnorm; + Nfull *= Nnorm; + Nref *= NnormR; + + NlmContainer err1 = Nfull-N; + NlmContainer err2(lmax); /// = Nfull-Nref; don't have type conversion on containers yet + + T e1=0, e2=0; + T maxreferr=0, maxrefrelerr=0; + for (int l=0; l<=int(lmax); l++) { + for (int m=-l; m<=l; m++) { + err2.value(l,m) = convert(R(Nfull.value(l,m))-Nref.value(l,m)); + T d1 = err1.value(l,m), d2 = err2.value(l,m); + maxreferr = std::max(std::abs(d2),maxreferr); + maxrefrelerr = std::max(std::abs(d2)/std::abs(Nfull.value(l,m)),maxrefrelerr); + e1 += d1*d1; + e2 += d2*d2; + } + } + e1 = std::sqrt(e1); + e2 = std::sqrt(e2); + if (e1 != 0) throw "e1 failed"; + if (e2 > 6e-16) throw "e2 failed"; + std::cout << "xxxx " << e1 << " " << e2 << std::endl; + std::cout << "yyyy " << maxreferr << " " << maxrefrelerr << std::endl; + std::cout << N << std::endl; + std::cout << Nref << std::endl; + std::cout << Nfull << std::endl; + std::cout << err1 << std::endl; + std::cout << err2 << std::endl; + } + + // Verify orthonormality + { + size_t lmax = 8; + NlmContainer Nnorm = Nlm_normalization(lmax); + GaussLegendreSphere q(2*lmax); + + T maxerr = 0; + for (int l=0; l<=int(lmax); l++) { + for (int m=-l; m<=l; m++) { + auto f = [&](T x, T y, T z) { + NlmContainer N = Nlm(lmax, x, y, z); + T v = N.value(l,m); + v *= Nnorm.value(l,m); + N *= Nnorm; + return N*v; + }; + auto N = q.cartesian_integral(f, T(1.0)); + //std::cout << l << " " << m << std::endl << N << std::endl; + for (int ll=0; ll<=int(lmax); ll++) { + for (int mm=-ll; mm<=ll; mm++) { + T v = N.value(ll,mm); + T err = std::abs(v); + if (ll==l && mm==m) err -= 1; + maxerr =std::max(maxerr,err); + if (err > T(2e-12)) { // <<<<<<<< change to numeric_limit for data type + std::cout << "bad norm (" << l << "," << m << ") (" << ll << "," << mm << ") " << v << " " << err << std::endl; + std::cout << N << std::endl; + throw "bad norm"; + } + } + } + } + } + std::cout << "maxerr in orthonormality test " << maxerr << std::endl; + } +} + +template +auto threeJ() { + int lmax1 = 16; + int lmax2 = lmax1; // assumed equal below + int lmax3 = lmax1 + lmax2; // Product order is sum of both + + std::vector> qs; // Use lowest order rule possible + for (int l=0; l<=2*lmax3; l++) qs.emplace_back(GaussLegendreSphere(l)); + + //std::vector qs; // Use lowest order rule possible + //for (int l=0; l<=2*lmax3; l++) qs.emplace_back(LebedevQuadrature(l)); + + std::cout << std::setprecision(6) << std::scientific; + + // Sparse storage for the 3j coeffs + int lx4 = (lmax1+1)*(lmax1+1)*(lmax1+1)*(lmax1+1); + std::vector> index(lx4); // first=index into j3, second=count + std::vector> j3; // l3, m3, value + j3.reserve((lmax1*lx4)/2); // seems to be a v close upper estimate of the size + + auto lmx = [=](int l1, int m1, int l2, int m2) { // offset into the index vector + return (l1*(l1+1)+m1)*(lmax1+1)*(lmax1+1) + l2*(l2+1)+m2; + }; + + + // N(l1,m1)*N(l2,m2) = sum(l3,m3) j(l1,m1,l2,m2,l3,m3) N(l3,m3) + // int(N(l1,m1)*N(l2,m2)*N(l3,m3)) = j(l1,m1,l2,m2,l3,m3) int(N(l3,m3)**2) by orthogonality + // --> j(l1,m1,l2,m2,l3,m3) = int(N(l1,m1)*N(l2,m2)*N(l3,m3)) * Nnorm(l3,m3)**2 + + // But the function norms can be massive, so for screening small + // coeffs imagine using and projecting with the normalized + // functions + // + // N(l1,m1) N(l2,m2) Norm(l1,m1) Norm(l2,m2) = sum(l3,m3) j(l1,m1,l2,m2,l3,m3) N(l3,m3) Norm(l1,m1) Norm(l2,m2) Norm(l3,m3) + // int(N(l1,m1)*N(l2,m2)*N(l3,m3)) Norm(l1,m1) Norm(l2,m2) Norm(l3,m3) = j(l1,m1,l2,m2,l3,m3) Norm(l1,m1) Norm(l2,m2) / Norm(l3,m3) + // so the normalized 3j coeffs are + // Jnormalized(l1,m1,l2,m2,l3,m3) = j(l1,m1,l2,m2,l3,m3) Norm(l1,m1) Norm(l2,m2) / Norm(l3,m3) + // If this is small relative to unity, we can discard values. + + for (int l1=0; l1<=lmax1; l1++) { + for (int m1=-l1; m1<=l1; m1++) { + for (int l2=0; l2<=l1; l2++) { // <<<<<<<<<<<<<< restrict range to save space + for (int m2=-l2; m2<=l2; m2++) { + int lmax4 = l1+l2; + NlmContainer Nnorm = Nlm_normalization(lmax4); + auto f = [&](T x, T y, T z) { + NlmContainer v3 = Nlm(lmax4, x, y, z); + T v1 = v3.value(l1,m1); + T v2 = v3.value(l2,m2); + v3 *= Nnorm; + v3 *= Nnorm; + return v3*(v2*v1); + }; + + auto C = qs[2*lmax4].cartesian_integral(f,1.0); // use lowest order possible + + size_t off=j3.size(); + for (int l3=std::abs(l1-l2); l3<=l1+l2; l3++) { // enforce known restrictions + for (int m3=-l3; m3<=l3; m3++) { + // For screening small values need to include normalization of l1 and l2 + // But need to partially undo that for l3 due to projection onto unnormalized functions + T j3value = C.value(l3,m3); + T test = j3value*Nnorm.value(l1,m1)*Nnorm.value(l2,m2)/Nnorm.value(l3,m3); + if (std::abs(test) > 1e-8) { + //std::cout << std::setw(3) << l1 << " " << std::setw(3) << m1 << " " << std::setw(3) << l2 << " " << std::setw(3) << m2 << " --> " << std::setw(3) << l3 << " " << std::setw(3) << m3 << " " << std::setw(15) << j3value << " " << test << std::endl; + j3.push_back({l3,m3,j3value}); + } + } + } + index[lmx(l1,m1,l2,m2)] = {off,int(j3.size()-off)}; + index[lmx(l2,m2,l1,m1)] = {off,int(j3.size()-off)}; + } + } + } + } + std::cout << "j3 size " << j3.size() << " " << (lmax1*lx4)/2 << std::endl; + + return std::make_tuple(lmax1,index,j3); +} + +template +void test_j3(int lmax1, const std::vector>& index, const std::vector>& j3) { + // Test reconstructing Nlm from the coupling coefficients + std::mt19937 gen(5551212); + std::uniform_int_distribution rand(-32768,32768); + + int lmax2 = lmax1; + int lmax3 = lmax1+lmax2; + auto lmx = [=](int l1, int m1, int l2, int m2) { // offset into the index vector + return (l1*(l1+1)+m1)*(lmax1+1)*(lmax1+1) + l2*(l2+1)+m2; + }; + NlmContainer Nnorm = Nlm_normalization(lmax3); + + T maxerr = 0.0, maxrelerr = 0, maxerrnorm = 0; + for (int sample=0; sample<1000; sample++) { + T x = rand(gen); + T y = rand(gen); + T z = rand(gen); + T r = std::sqrt(x*x + y*y + z*z); + x /= r; // Put test points on the unit sphere + y /= r; + z /= r; + r = 1; + + NlmContainer N = Nlm(lmax3, x, y, z); + + T thresh = 500*std::numeric_limits::epsilon(); + + for (int l1=0; l1<=lmax1; l1++) { + for (int m1=-l1; m1<=l1; m1++) { + for (int l2=0; l2<=lmax2; l2++) { + for (int m2=-l2; m2<=l2; m2++) { + auto [off,n] = index[lmx(l1,m1,l2,m2)]; + T Ntest = 0.0; + for (int i=0; i thresh) { // + std::cout << std::setw(3) << l1 << " " << std::setw(3) << m1 << " " << std::setw(3) << l2 << " " << std::setw(3) << m2 << " " << std::setw(15) << err << std::endl; + } + maxerr = std::max(err,maxerr); + maxerrnorm = std::max(err*Nnorm.value(l1,m1)*Nnorm.value(l2,m2),maxerrnorm); + T prodnormed = prod*Nnorm.value(l1,m1)*Nnorm.value(l2,m2); + if (std::abs(prodnormed) > thresh) + maxrelerr = std::max(err/prod,maxrelerr); + } + } + } + } + } + std::cout << " maxerr in J3 reconstruction on unit sphere " << maxerr << std::endl; + std::cout << " maxrelerr in J3 reconstruction on unit sphere " << maxrelerr << std::endl; + std::cout << "maxnormerr in J3 reconstruction on unit sphere " << maxerrnorm << std::endl; + + /* + lmax = 8 + Using GL + Testing j3 with double + j3 size 26113 26244 + maxerr in J3 reconstruction on unit sphere 3.053113e-16 + maxrelerr in J3 reconstruction on unit sphere 4.518745e-03 + maxnormerr in J3 reconstruction on unit sphere 1.123624e-14 + + Testing j3 with double-double + j3 size 26113 26244 + maxerr in J3 reconstruction on unit sphere 1.386670e-31 + maxrelerr in J3 reconstruction on unit sphere 2.778597e-12 + maxnormerr in J3 reconstruction on unit sphere 6.280683e-30 + + Testing j3 with double-double coeffs but in double + maxerr in J3 reconstruction on unit sphere 2.220446e-16 + maxrelerr in J3 reconstruction on unit sphere 4.738321e-04 + maxnormerr in J3 reconstruction on unit sphere 3.322992e-15 + + Using Lebedev (which we only have in double) + Testing j3 with double + j3 size 26113 26244 + maxerr in J3 reconstruction on unit sphere 3.663736e-15 + maxrelerr in J3 reconstruction on unit sphere 3.880582e-02 + maxnormerr in J3 reconstruction on unit sphere 1.249279e-13 + + lmax = 16 + Using GL + Testing j3 with double + j3 size 603487 668168 + maxerr in J3 reconstruction on unit sphere 3.053113e-16 + maxrelerr in J3 reconstruction on unit sphere 2.373996e+04 + maxnormerr in J3 reconstruction on unit sphere 1.380099e-08 + + Testing j3 with double-double + j3 size 603487 668168 + maxerr in J3 reconstruction on unit sphere 1.386670e-31 + maxrelerr in J3 reconstruction on unit sphere 1.417492e+20 + maxnormerr in J3 reconstruction on unit sphere 1.380100e-08 + + Testing j3 with double-double coeffs but in double + maxerr in J3 reconstruction on unit sphere 2.220446e-16 + maxrelerr in J3 reconstruction on unit sphere 2.373996e+04 + maxnormerr in J3 reconstruction on unit sphere 1.380100e-08 + + Using Lebedev (which we only have in double) + maxerr in J3 reconstruction on unit sphere 3.663736e-15 + maxrelerr in J3 reconstruction on unit sphere 2.373997e+04 + maxnormerr in J3 reconstruction on unit sphere 1.380101e-08 + */ +} + + + +int main() { + constants::init(); + constants::init(); + constants::init(); + test_Nlm(); + test_Nlm(); + test_Nlm(); + + { + std::cout << "Testing j3 with double\n"; + auto [lmax1, index, j3] = threeJ(); + test_j3(lmax1, index, j3); + } + + { + std::cout << "Testing j3 with double-double\n"; + auto [lmax1, index, j3] = threeJ(); + test_j3(lmax1, index, j3); + + std::cout << "Testing j3 with double-double coeffs but in double\n"; + std::vector> j3d; + j3d.reserve(j3.size()); + for (auto [l,m,v] : j3) {j3d.push_back({l,m,to_double(v)});} + test_j3(lmax1, index, j3d); + } + + //std::ifstream f("gauleg.220bit"); + + return 0; +}