diff --git a/src/examples/CMakeLists.txt b/src/examples/CMakeLists.txt index 4715fee334d..38b0b55aa00 100644 --- a/src/examples/CMakeLists.txt +++ b/src/examples/CMakeLists.txt @@ -1,7 +1,7 @@ # src/examples set(EXAMPLE_SOURCES - madinfo h2dft hedft hello hatom_energy h2 he tdse_example heat heat2 csqrt + madinfo h2dft hedft hello hatom_energy h2 he tdse_example heat heat2 csqrt hatom_sf_dirac sdf_shape_tester test_gmres tdse1d vnucso nonlinschro sininteg functionio dataloadbal hatom_1d binaryop dielectric hehf 3dharmonic testsolver testspectralprop dielectric_external_field tiny h2dynamic newsolver testcomplexfunctionsolver diff --git a/src/examples/hatom_sf_dirac.cc b/src/examples/hatom_sf_dirac.cc new file mode 100644 index 00000000000..41e284ed181 --- /dev/null +++ b/src/examples/hatom_sf_dirac.cc @@ -0,0 +1,356 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double Z = 80.0; // nuclear charge +static const double L = 40.0/Z; // L=40/Z [-L,L] box size so exp(-Zr)=1e-16 padded a bit since we are masking +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +static const double c = 137.035999679; // speed of light +static const int Vtruncmode = (Z<=10 && thresh>1e-10) ? 1 : 0; // Point nucleus potential woes +static const double eprec = 0.0; //std::min(1e-4,thresh*Z*Z); // Note this is for NR wavefun, and for now we just want relative not absolute error +static const double rcut = 1e-4; // typical size of heavy nucleus //smoothing_parameter(Z, eprec); +static const double v = std::sqrt(1.0 - Z*Z/(c*c)) - 1.0; + +inline double mask1(double x) { + x = (x * x * (3. - 2. * x)); + return x; +} + +static double mask3(const coord_3d& ruser) { + coord_3d rsim; + user_to_sim(ruser, rsim); + double x = rsim[0], y = rsim[1], z = rsim[2]; + double lo = 0.0625, hi = 1.0 - lo, result = 1.0; + double rlo = 1.0 / lo; + + if (x < lo) + result *= mask1(mask1(x * rlo)); + else if (x > hi) + result *= mask1(mask1((1.0 - x) * rlo)); + if (y < lo) + result *= mask1(mask1(y * rlo)); + else if (y > hi) + result *= mask1(mask1((1.0 - y) * rlo)); + if (z < lo) + result *= mask1(mask1(z * rlo)); + else if (z > hi) + result *= mask1(mask1((1.0 - z) * rlo)); + + return result; +} + + +struct LBCost { + double leaf_value; + double parent_value; + LBCost(double leaf_value=1.0, double parent_value=1.0) + : leaf_value(leaf_value) + , parent_value(parent_value) + {} + + double operator()(const Key<3>& key, const FunctionNode& node) const { + if (node.is_leaf()) return leaf_value; + else return parent_value; + } +}; + +// This class is used to store information for the non-linear solver +struct F { + real_function_3d psi, phi; + + F(const real_function_3d& psi, const real_function_3d& phi) + : psi(psi), phi(phi) + {} + + F operator-(const F& b) const { + return F(psi-b.psi, phi-b.phi); + } + + F operator+=(const F& b) { // Operator+= necessarphi + psi += b.psi; phi += b.phi; + return *this; + } + + F operator*(double a) const { // Scale bphi a constant necessarphi + return F(psi*a,phi*a); + } +}; + +double inner(const F& a, const F& b) { + return inner(a.psi,b.psi) + inner(a.phi,b.phi); +} + +struct allocator { + World& world; + + allocator(World& world) : world(world) {} + + F operator()() { + return F(real_function_3d(world),real_function_3d(world)); + } +}; + +double exact_energy(double Z) { + if (Z<=10) { + double a2 = Z*Z/(c*c); + double a4 = a2*a2; + double a6 = a4*a2; + double a8 = a4*a4; + return Z*Z*(-1.0/2.0 - 1.0/8.0*a2 - 1/16*a4 - 5.0/128.0*a6 - 7.0/256.0*a8); + } + double Za = Z/c; + double s = Za / sqrt(1.0 - Za*Za); + return c*c/std::sqrt(1.0 + s*s) - c*c; +} + +static double psi_guess(const coord_3d& r) { + double R = std::sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]); + const double expnt = std::sqrt(-2*exact_energy(Z)*(1+exact_energy(Z)/(2*c*c))); + return std::pow(R+rcut,v)*exp(-expnt*R)/sqrt(constants::pi/(expnt*expnt*expnt)); // note +rcut in singular part to smooth on nuclear scale + //const double expnt = Z*0.5; + //return exp(-expnt*R)/sqrt(constants::pi/(expnt*expnt*expnt)); // note +rcut in singular part to smooth on nuclear scale +} + +static double phi_guess(const coord_3d& r) { + double R = std::sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]); + const double expnt = std::sqrt(-2*exact_energy(Z)*(1+exact_energy(Z)/(2*c*c))); + return exp(-expnt*R)/sqrt(constants::pi/(expnt*expnt*expnt)); + // const double expnt = Z; + // return exp(-expnt*R)/sqrt(constants::pi/(expnt*expnt*expnt)); // note +rcut in singular part to smooth on nuclear scale +} + +static double V(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + //return -Z/(sqrt(x*x+y*y+z*z)); + return (-Z/rcut)*smoothed_potential(sqrt(x*x+y*y+z*z)/rcut); +} + +class Vderiv : public FunctionFunctorInterface { + const int axis; +public: + Vderiv(int axis) : axis(axis) {} + + double operator()(const coord_3d& r) const { + double R = std::sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]); + double rc = 1.0/rcut; + return - Z * (r[axis] / R) * dsmoothed_potential(R * rc) * (rc * rc); + } +}; + +// Applies pV.p/4m^2c^2 phi = sum_q (-dV/dq dphi/dq - V d^2phi/dq^2) +real_function_3d pVp(World& world, const real_function_3d& V, const real_function_3d gradV[3], const real_function_3d& phi) { + real_function_3d Wphi = real_function_3d(world); + for (int axis=0; axis<3; axis++) { + { + real_derivative_3d D = free_space_derivative(world, axis); + D.set_bspline1(); + Wphi -= D(phi)*gradV[axis]; + } + { + real_derivative_3d D = free_space_derivative(world, axis); + D.set_bspline2(); + Wphi -= D(phi)*V; + } + } + Wphi *= 1.0/(4.0*c*c); + return Wphi; +} + +// Applies pV.p/4m^2c^2 +real_function_3d pVp(World& world, const real_function_3d& V, const real_function_3d& phi) { + real_function_3d Wphi = real_function_3d(world); + for (int axis=0; axis<3; axis++) { + real_derivative_3d D = free_space_derivative(world, axis); + D.set_bspline1(); // try bspline smoother derivative + Wphi -= D(D(phi)*V); + } + Wphi *= 1.0/(4.0*c*c); + return Wphi; +} + +// E = + +std::tuple compute_energy(World& world, + real_function_3d& V, + real_function_3d& psi, + real_function_3d& phi) +{ + double overlap = inner(psi,psi); + double potential_energy = inner(psi,V*psi) / overlap; + double kinetic_energy = 0.0; + for (int axis=0; axis<3; axis++) { + real_derivative_3d D = free_space_derivative(world, axis); // more accurate to use ABGV derivative for KE + real_function_3d dpsi = D(psi); + real_function_3d dphi = D(phi); + kinetic_energy += 0.5*inner(dpsi,dphi); + } + kinetic_energy /= overlap; + double total_energy = kinetic_energy + potential_energy; + return {total_energy, kinetic_energy, potential_energy, overlap}; +} + +// (T-E) phi = -V psi + E(psi-phi) +// and with s=1+E/2mc^2 +// T (psi-s*phi) = - pVp phi/4mc^2 +// BAD!! (T-E) (psi-s*phi) = - pVp phi - E(psi-s*phi) +template +std::tuple iterate(World& world, + const real_function_3d& V, + const real_function_3d gradV[3], + const real_function_3d& mask, + const real_function_3d& psi, + const real_function_3d& phi, + const double energy, + const int iter, + solverT& solver) +{ + real_convolution_3d coulop = CoulombOperator(world, rcut*0.1, thresh); + real_convolution_3d op = BSHOperator3D(world, sqrt(-2*energy), rcut*0.1, thresh); + real_function_3d rhs = -2*(psi*V - energy*(psi-phi)); + rhs.truncate(); + real_function_3d phi_new = apply(op,rhs); + + double s = 1.0 + energy/(2*c*c); + //rhs = -2*(pVp(world,V,gradV,phi) + energy*(psi-s*phi)); + rhs = -(2/(4*constants::pi))*pVp(world,V,phi); + rhs.truncate(); + real_function_3d psimphi_new = apply(coulop,rhs); + real_function_3d psi_new = s*phi_new + psimphi_new; + + psi_new.truncate(); + phi_new.truncate(); + + psi_new = psi_new * mask; + phi_new = phi_new * mask; + + double rnormL = (psi_new-psi).norm2(); + double rnormP = (phi_new-phi).norm2(); + if (world.rank() == 0) print(rnormL, rnormP); + double rnorm = (psi_new-psi).norm2() + (phi_new-phi).norm2(); + + // Comment out next 5 lines to not use the solver (i.e., to just do fixed-point iteration) + if (iter>1) { + F f = solver.update(F(psi,phi), F(psi_new-psi, phi_new-phi)); + psi_new = f.psi; + phi_new = f.phi; + psi_new.truncate(); + phi_new.truncate(); + } + + double stepnorm = (psi_new-psi).norm2() + (phi_new-phi).norm2(); + double maxstep = 1e-1; + if (stepnorm > maxstep) { + if (world.rank() == 0) print("step restriction", stepnorm); + double step = maxstep/stepnorm; + psi_new = psi + step*(psi_new-psi); + phi_new = phi + step*(phi_new-phi); + } + + double norm = inner(psi_new,psi_new); + psi_new *= 1.0/sqrt(norm); + phi_new *= 1.0/sqrt(norm); + + return {psi_new, phi_new, rnorm}; +} + +void run(World& world) { + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_truncate_mode(1); + FunctionDefaults<3>::set_cubic_cell(-L,L); + + if (world.rank() == 0) { + print(" Z:", Z); + print(" L:", L); + print(" k:", k); + print(" v:", v); + print(" c:", c); + print(" rcut:", rcut); + print(" eprec:", eprec); + print(" thresh:", thresh); + print(" Vtmode:", Vtruncmode); + print(" exact DC:", exact_energy(Z)); + print(""); + } + + { + real_function_3d Vnuc = real_factory_3d(world).f(V).truncate_mode(Vtruncmode); + LoadBalanceDeux<3> lb(world); + lb.add_tree(Vnuc, LBCost(1.0,8.0)); + FunctionDefaults<3>::redistribute(world, lb.load_balance(2.0,false)); + } + real_function_3d Vnuc = real_factory_3d(world).f(V).truncate_mode(Vtruncmode); + + real_function_3d gradV[3]; + for (int axis=0; axis<3; axis++) { + std::shared_ptr> p(new Vderiv(axis)); + gradV[axis] = real_factory_3d(world).functor(p).truncate_mode(0); + } + + + coord_3d lo = {0.0,0.0,-L}, hi = {0.0,0.0,L}; + const int npt = 1001; + + real_function_3d mask = real_factory_3d(world).f(mask3); + plot_line("mask.dat", npt, lo, hi, mask); + + real_function_3d psi = real_factory_3d(world).f(psi_guess); + real_function_3d phi = real_factory_3d(world).f(phi_guess); + psi = psi*mask; + phi = phi*mask; + { + double norm; + norm = psi.norm2(); psi.scale(1.0/norm); + norm = phi.norm2(); phi.scale(1.0/norm); + } + + XNonlinearSolver solver = XNonlinearSolver(allocator(world)); + solver.set_maxsub(10); + + double energy = -0.5*Z*Z; // NR energy guess + for (int iter=0; iter<1000; iter++) { + char fname[256]; + sprintf(fname,"psi-phi-%3.3d.dat", iter); + plot_line(fname, npt, lo, hi, psi, phi); + + auto [psi_new, phi_new, rnorm] = iterate(world, Vnuc, gradV, mask, psi, phi, energy, iter, solver); + psi = psi_new; + phi = phi_new; + + auto psisize = psi.size(); + auto phisize = phi.size(); + auto phinorm = phi.norm2(); + auto [total_energy, kinetic_energy, potential_energy, overlap] = compute_energy(world, Vnuc, psi, phi); + + if (world.rank() == 0) { + print("\niteration ", iter, " rnorm ", rnorm, " psi size ", psisize, " phi size ", phisize, " phi norm ", phinorm); + print(" Kinetic energy ", kinetic_energy); + print(" Nuclear attraction energy ", potential_energy); + print(" Total energy ", total_energy, total_energy - energy); + } + + energy = total_energy; + + if (iter>10 and rnorm<10.0*thresh) break; + } +} + +int main(int argc, char** argv) { + World& world = initialize(argc, argv); + startup(world,argc,argv); + std::cout.precision(10); + + run(world); // Put all functions in separate scope to force destruction before finalize + + world.gop.fence(); + + finalize(); + return 0; +} diff --git a/src/madness/mra/derivative.h b/src/madness/mra/derivative.h index bf00cf959f8..37bdc616ef9 100644 --- a/src/madness/mra/derivative.h +++ b/src/madness/mra/derivative.h @@ -370,7 +370,12 @@ namespace madness { return; } } - tensorT gcoeffs = df->parent_to_child(found_argT.get().second, found_argT.get().first,key).full_tensor_copy(); +#ifdef HAVE_PARSEC + std::cerr << "FATAL ERROR: PaRSEC does not support recursive task execution but Derivative::do_diff2b requires this. Use a different backend" << std::endl; + abort(); +#endif + const auto& found_argT_value = found_argT.get(); // do not recursively execute tasks to avoid making PaRSEC sad + tensorT gcoeffs = df->parent_to_child(found_argT_value.second, found_argT_value.first,key).full_tensor_copy(); //if (this->bc.get_bc().dim(0) == 1) { if (NDIM == 1) { diff --git a/src/madness/world/parsec.cc b/src/madness/world/parsec.cc index 0267edbc15b..be8eddecbc6 100644 --- a/src/madness/world/parsec.cc +++ b/src/madness/world/parsec.cc @@ -6,6 +6,8 @@ #include "thread.h" #include +#include + // Here we initialize with the right child class namespace madness { #ifndef MADNESS_ASSERTIONS_DISABLE @@ -66,7 +68,7 @@ namespace madness { } static char *madness_parsec_key_print(char *buffer, size_t buffer_size, parsec_key_t k, void *user_data) { - snprintf(buffer, buffer_size, "MADNESS_TASK(%" PRIx64 ")", (intptr_t)(k)); + snprintf(buffer, buffer_size, "MADNESS_TASK(%" PRIxPTR ")", k); return buffer; }