From 334b592b20abf435403908cdb7b7c49693dd627e Mon Sep 17 00:00:00 2001 From: Joshua Townsend Date: Thu, 25 Apr 2019 14:34:12 -0600 Subject: [PATCH 1/2] Initial commit for Kokkos-free einspline_spo reference implementation. check_spo compiles and runs - but fails with a large error. Probably due to spo initialization order mis-match between ref and kokkos. See the differences between the set() methods for the Kokkos and ref implementations. --- src/CMakeLists.txt | 2 + src/Drivers/check_spo.cpp | 45 +-- src/Drivers/miniqmc.cpp | 5 +- src/Numerics/Einspline/bspline_ref.h | 33 ++ .../Einspline/multi_bspline_structs_ref.h | 91 +++++ src/Numerics/Spline2/MultiBsplineRef.hpp | 48 ++- .../Spline2/bspline_allocator_ref.cpp | 78 +++++ .../Spline2/bspline_allocator_ref.hpp | 209 +++++++++++ src/Numerics/Spline2/bspline_traits_ref.hpp | 79 +++++ .../Spline2/einspline_allocator_ref.cpp | 325 ++++++++++++++++++ .../Spline2/einspline_allocator_ref.h | 35 ++ src/QMCWaveFunctions/SPOSet_builder.cpp | 11 +- src/QMCWaveFunctions/einspline_spo.hpp | 3 +- src/QMCWaveFunctions/einspline_spo_ref.hpp | 130 +++---- 14 files changed, 958 insertions(+), 136 deletions(-) create mode 100644 src/Numerics/Einspline/bspline_ref.h create mode 100644 src/Numerics/Einspline/multi_bspline_structs_ref.h create mode 100644 src/Numerics/Spline2/bspline_allocator_ref.cpp create mode 100644 src/Numerics/Spline2/bspline_allocator_ref.hpp create mode 100644 src/Numerics/Spline2/bspline_traits_ref.hpp create mode 100644 src/Numerics/Spline2/einspline_allocator_ref.cpp create mode 100644 src/Numerics/Spline2/einspline_allocator_ref.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a87f6bae6..67c555447 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -137,7 +137,9 @@ ENDIF() Utilities/XMLWriter.cpp Utilities/tinyxml/tinyxml2.cpp Utilities/qmcpack_version.cpp + Numerics/Spline2/einspline_allocator_ref.cpp Numerics/Spline2/einspline_allocator.cpp + Numerics/Spline2/bspline_allocator_ref.cpp Numerics/Spline2/bspline_allocator.cpp Numerics/Spline2/MultiBsplineData.cpp ${GITREV_TMP} diff --git a/src/Drivers/check_spo.cpp b/src/Drivers/check_spo.cpp index cf72fbabd..6c7387c64 100644 --- a/src/Drivers/check_spo.cpp +++ b/src/Drivers/check_spo.cpp @@ -57,7 +57,6 @@ int main(int argc, char** argv) Kokkos::initialize(argc, argv); { //Begin kokkos block. - // clang-format off typedef QMCTraits::RealType RealType; typedef ParticleSet::ParticlePos_t ParticlePos_t; @@ -188,8 +187,10 @@ int main(int argc, char** argv) app_summary() << "\nSPO coefficients size = " << SPO_coeff_size << " bytes (" << SPO_coeff_size_MB << " MB)" << endl; + std::cout << "Setting up the Kokkos implementation..."; spo_main.set(nx, ny, nz, norb); spo_main.Lattice.set(lattice_b); + std::cout << "Done." << std::endl; /* auto& spline = spo_main.spline; @@ -219,8 +220,12 @@ int main(int argc, char** argv) cout << " psi[0] = " << psiMirror(0) << ", psi[4] = " << psiMirror(4) << endl; */ + // @DEBUG: problem is in here + std::cout << "Setting up the reference implementation..."; spo_ref_main.set(nx, ny, nz, norb, nTiles); spo_ref_main.Lattice.set(lattice_b); + std::cout << "Done." << std::endl; + /* cout << " looking at the reference version:" << endl; cout << " coef(2,2,0,0) = " << spo_ref_main.einsplines(0).coefs_view(2,2,0,0) << endl; @@ -300,30 +305,30 @@ int main(int argc, char** argv) spo_ref.evaluate_vgh(pos); // accumulate error - auto psiView = Kokkos::create_mirror_view(spo_main.psi); - auto gradView = Kokkos::create_mirror_view(spo_main.grad); - auto hessView = Kokkos::create_mirror_view(spo_main.hess); - Kokkos::deep_copy(psiView, spo_main.psi); - Kokkos::deep_copy(gradView, spo_main.grad); - Kokkos::deep_copy(hessView, spo_main.hess); + auto psi_host = Kokkos::create_mirror_view(spo_main.psi); + auto grad_host = Kokkos::create_mirror_view(spo_main.grad); + auto hess_host = Kokkos::create_mirror_view(spo_main.hess); + Kokkos::deep_copy(psi_host, spo_main.psi); + Kokkos::deep_copy(grad_host, spo_main.grad); + Kokkos::deep_copy(hess_host, spo_main.hess); for (int ib = 0; ib < spo_ref.nBlocks; ib++) for (int n = 0; n < spo_ref.nSplinesPerBlock; n++) { int spNum = ib*spo_ref.nSplinesPerBlock+n; // value - evalVGH_v_err += std::fabs(psiView(spNum) - spo_ref.psi[ib][n]); + evalVGH_v_err += std::fabs(psi_host(spNum) - spo_ref.psi[ib][n]); // grad - evalVGH_g_err += std::fabs(gradView(spNum, 0) - spo_ref.grad[ib](n, 0)); - evalVGH_g_err += std::fabs(gradView(spNum, 1) - spo_ref.grad[ib](n, 1)); - evalVGH_g_err += std::fabs(gradView(spNum, 2) - spo_ref.grad[ib](n, 2)); + evalVGH_g_err += std::fabs(grad_host(spNum, 0) - spo_ref.grad[ib].data(0)[n]); + evalVGH_g_err += std::fabs(grad_host(spNum, 1) - spo_ref.grad[ib].data(1)[n]); + evalVGH_g_err += std::fabs(grad_host(spNum, 2) - spo_ref.grad[ib].data(2)[n]); // hess - evalVGH_h_err += std::fabs(hessView(spNum, 0) - spo_ref.hess[ib](n, 0)); - evalVGH_h_err += std::fabs(hessView(spNum, 1) - spo_ref.hess[ib](n, 1)); - evalVGH_h_err += std::fabs(hessView(spNum, 2) - spo_ref.hess[ib](n, 2)); - evalVGH_h_err += std::fabs(hessView(spNum, 3) - spo_ref.hess[ib](n, 3)); - evalVGH_h_err += std::fabs(hessView(spNum, 4) - spo_ref.hess[ib](n, 4)); - evalVGH_h_err += std::fabs(hessView(spNum, 5) - spo_ref.hess[ib](n, 5)); + evalVGH_h_err += std::fabs(hess_host(spNum, 0) - spo_ref.hess[ib].data(0)[n]); + evalVGH_h_err += std::fabs(hess_host(spNum, 1) - spo_ref.hess[ib].data(1)[n]); + evalVGH_h_err += std::fabs(hess_host(spNum, 2) - spo_ref.hess[ib].data(3)[n]); + evalVGH_h_err += std::fabs(hess_host(spNum, 3) - spo_ref.hess[ib].data(4)[n]); + evalVGH_h_err += std::fabs(hess_host(spNum, 4) - spo_ref.hess[ib].data(5)[n]); + evalVGH_h_err += std::fabs(hess_host(spNum, 5) - spo_ref.hess[ib].data(6)[n]); } if (ur[iel] > accept) { @@ -350,12 +355,12 @@ int main(int argc, char** argv) //spo.evaluate_v(pos); spo_ref.evaluate_v(pos); // accumulate error - auto psiView = Kokkos::create_mirror_view(spo_main.psi); - Kokkos::deep_copy(psiView, spo_main.psi); + auto psi_host = Kokkos::create_mirror_view(spo_main.psi); + Kokkos::deep_copy(psi_host, spo_main.psi); for (int ib = 0; ib < spo_ref.nBlocks; ib++) for (int n = 0; n < spo_ref.nSplinesPerBlock; n++) - evalV_v_err += std::fabs(psiView(ib*spo_ref.nSplinesPerBlock+n) - spo_ref.psi[ib][n]); + evalV_v_err += std::fabs(psi_host(ib*spo_ref.nSplinesPerBlock+n) - spo_ref.psi[ib][n]); } } // els } // ions diff --git a/src/Drivers/miniqmc.cpp b/src/Drivers/miniqmc.cpp index 62ce53855..7483248ca 100644 --- a/src/Drivers/miniqmc.cpp +++ b/src/Drivers/miniqmc.cpp @@ -403,13 +403,16 @@ int main(int argc, char** argv) const int teamID = partition_id; Timers[Timer_Init]->start(); // create and initialize movers + std::cerr << "Making a mover...\n"; Mover thiswalker(myPrimes[teamID], ions); // create a spo view in each Mover + std::cerr << "Making the spo for this mover...\n"; thiswalker.spo = build_SPOSet_view(useRef, spo_main, team_size, teamID); // create wavefunction per mover + std::cerr << "Making the wave function for this mover...\n"; build_WaveFunction(useRef, thiswalker.wavefunction, ions, thiswalker.els, thiswalker.rng, enableJ3); - + std::cerr << "Finished making stuff for this mover...\n"; // initial computing thiswalker.els.update(); thiswalker.wavefunction.evaluateLog(thiswalker.els); diff --git a/src/Numerics/Einspline/bspline_ref.h b/src/Numerics/Einspline/bspline_ref.h new file mode 100644 index 000000000..071c29012 --- /dev/null +++ b/src/Numerics/Einspline/bspline_ref.h @@ -0,0 +1,33 @@ +///////////////////////////////////////////////////////////////////////////// +// einspline: a library for creating and evaluating B-splines // +// Copyright (C) 2007 Kenneth P. Esler, Jr. // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation; either version 2 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program; if not, write to the Free Software // +// Foundation, Inc., 51 Franklin Street, Fifth Floor, // +// Boston, MA 02110-1301 USA // +///////////////////////////////////////////////////////////////////////////// + +#ifndef BSPLINE_H +#define BSPLINE_H + +#include "bspline_base.h" +//////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////// +//// Bspline structure definitions //// +//////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////// +#include "bspline_structs.h" +#include "multi_bspline_structs_ref.h" + +#endif diff --git a/src/Numerics/Einspline/multi_bspline_structs_ref.h b/src/Numerics/Einspline/multi_bspline_structs_ref.h new file mode 100644 index 000000000..aeeb11e63 --- /dev/null +++ b/src/Numerics/Einspline/multi_bspline_structs_ref.h @@ -0,0 +1,91 @@ +///////////////////////////////////////////////////////////////////////////// +// einspline: a library for creating and evaluating B-splines // +// Copyright (C) 2007 Kenneth P. Esler, Jr. // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation; either version 2 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program; if not, write to the Free Software // +// Foundation, Inc., 51 Franklin Street, Fifth Floor, // +// Boston, MA 02110-1301 USA // +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTI_BSPLINE_STRUCTS_STD_H +#define MULTI_BSPLINE_STRUCTS_STD_H + +#include +#include + +/////////////////////////// +// Single precision real // +/////////////////////////// +struct multi_UBspline_3d_s +{ + spline_code spcode; + type_code tcode; + float* restrict coefs; + intptr_t x_stride, y_stride, z_stride; + Ugrid x_grid, y_grid, z_grid; + BCtype_s xBC, yBC, zBC; + int num_splines; + size_t coefs_size; +}; + +/////////////////////////// +// Double precision real // +/////////////////////////// +struct multi_UBspline_3d_d +{ + spline_code spcode; + type_code tcode; + double* restrict coefs; + intptr_t x_stride, y_stride, z_stride; + Ugrid x_grid, y_grid, z_grid; + BCtype_d xBC, yBC, zBC; + int num_splines; + size_t coefs_size; +}; + +////////////////////////////// +// Single precision complex // +////////////////////////////// +struct multi_UBspline_3d_c +{ + spline_code spcode; + type_code tcode; + complex_float* restrict coefs; + intptr_t x_stride, y_stride, z_stride; + Ugrid x_grid, y_grid, z_grid; + BCtype_c xBC, yBC, zBC; + int num_splines; + size_t coefs_size; + // temporary storage for laplacian components + complex_float* restrict lapl3; +}; + +////////////////////////////// +// Double precision complex // +////////////////////////////// +struct multi_UBspline_3d_z +{ + spline_code spcode; + type_code tcode; + complex_double* restrict coefs; + intptr_t x_stride, y_stride, z_stride; + Ugrid x_grid, y_grid, z_grid; + BCtype_z xBC, yBC, zBC; + int num_splines; + size_t coefs_size; + // temporary storage for laplacian components + complex_double* restrict lapl3; +}; + +#endif diff --git a/src/Numerics/Spline2/MultiBsplineRef.hpp b/src/Numerics/Spline2/MultiBsplineRef.hpp index cafdb833d..66e759b6b 100644 --- a/src/Numerics/Spline2/MultiBsplineRef.hpp +++ b/src/Numerics/Spline2/MultiBsplineRef.hpp @@ -36,7 +36,7 @@ struct MultiBsplineRef using spliner_type = typename bspline_traits::SplineType; MultiBsplineRef() {} - MultiBsplineRef(const MultiBsplineRef& in) = default; + MultiBsplineRef(const MultiBsplineRef& in) = delete; MultiBsplineRef& operator=(const MultiBsplineRef& in) = delete; /** compute values vals[0,num_splines) @@ -45,12 +45,8 @@ struct MultiBsplineRef * evaluate_vgh(r,psi,grad,hess,ip). */ - void evaluate_v(const spliner_type* restrict spline_m, - T x, - T y, - T z, - T* restrict vals, - size_t num_splines) const; + void evaluate_v( + const spliner_type* restrict spline_m, T x, T y, T z, T* restrict vals, size_t num_splines) const; void evaluate_vgl(const spliner_type* restrict spline_m, T x, @@ -72,12 +68,8 @@ struct MultiBsplineRef }; template -inline void MultiBsplineRef::evaluate_v(const spliner_type* restrict spline_m, - T x, - T y, - T z, - T* restrict vals, - size_t num_splines) const +inline void MultiBsplineRef::evaluate_v( + const spliner_type* restrict spline_m, T x, T y, T z, T* restrict vals, size_t num_splines) const { x -= spline_m->x_grid.start; y -= spline_m->y_grid.start; @@ -183,12 +175,12 @@ inline void MultiBsplineRef::evaluate_vgl(const spliner_type* restrict spline T sum0 = c[0] * coefsv + c[1] * coefsvzs + c[2] * coefsv2zs + c[3] * coefsv3zs; T sum1 = dc[0] * coefsv + dc[1] * coefsvzs + dc[2] * coefsv2zs + dc[3] * coefsv3zs; T sum2 = d2c[0] * coefsv + d2c[1] * coefsvzs + d2c[2] * coefsv2zs + d2c[3] * coefsv3zs; - gx[n] += pre10 * sum0; - gy[n] += pre01 * sum0; - gz[n] += pre00 * sum1; - lx[n] += pre20 * sum0; - ly[n] += pre02 * sum0; - lz[n] += pre00 * sum2; + gx[n] += pre10 * sum0; + gy[n] += pre01 * sum0; + gz[n] += pre00 * sum1; + lx[n] += pre20 * sum0; + ly[n] += pre02 * sum0; + lz[n] += pre00 * sum2; vals[n] += pre00 * sum0; } } @@ -290,15 +282,15 @@ inline void MultiBsplineRef::evaluate_vgh(const spliner_type* restrict spline T sum1 = dc[0] * coefsv + dc[1] * coefsvzs + dc[2] * coefsv2zs + dc[3] * coefsv3zs; T sum2 = d2c[0] * coefsv + d2c[1] * coefsvzs + d2c[2] * coefsv2zs + d2c[3] * coefsv3zs; - hxx[n] += pre20 * sum0; - hxy[n] += pre11 * sum0; - hxz[n] += pre10 * sum1; - hyy[n] += pre02 * sum0; - hyz[n] += pre01 * sum1; - hzz[n] += pre00 * sum2; - gx[n] += pre10 * sum0; - gy[n] += pre01 * sum0; - gz[n] += pre00 * sum1; + hxx[n] += pre20 * sum0; + hxy[n] += pre11 * sum0; + hxz[n] += pre10 * sum1; + hyy[n] += pre02 * sum0; + hyz[n] += pre01 * sum1; + hzz[n] += pre00 * sum2; + gx[n] += pre10 * sum0; + gy[n] += pre01 * sum0; + gz[n] += pre00 * sum1; vals[n] += pre00 * sum0; } } diff --git a/src/Numerics/Spline2/bspline_allocator_ref.cpp b/src/Numerics/Spline2/bspline_allocator_ref.cpp new file mode 100644 index 000000000..b2225b453 --- /dev/null +++ b/src/Numerics/Spline2/bspline_allocator_ref.cpp @@ -0,0 +1,78 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +//////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +/** @file bspline_allocator.cpp + * @brief Implementation of einspline::Allocator member functions + * + * Allocator::Policy is not defined precisely yet but is intended to select + * the specialized allocator. + */ +#include "Numerics/Spline2/bspline_allocator_ref.hpp" +#include "Numerics/Spline2/einspline_allocator_ref.h" + + +multi_UBspline_3d_s* einspline_create_multi_UBspline_3d_s(Ugrid x_grid, + Ugrid y_grid, + Ugrid z_grid, + BCtype_s xBC, + BCtype_s yBC, + BCtype_s zBC, + int num_splines); + +UBspline_3d_s* einspline_create_UBspline_3d_s( + Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC); + +multi_UBspline_3d_d* einspline_create_multi_UBspline_3d_d(Ugrid x_grid, + Ugrid y_grid, + Ugrid z_grid, + BCtype_d xBC, + BCtype_d yBC, + BCtype_d zBC, + int num_splines); + +UBspline_3d_d* einspline_create_UBspline_3d_d( + Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC); + +namespace qmcplusplus +{ +namespace einspline +{ +Allocator::Allocator() : Policy(0) {} + +Allocator::~Allocator() {} + +multi_UBspline_3d_s* Allocator::allocateMultiBspline( + Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, int num_splines) +{ + return einspline_create_multi_UBspline_3d_s(x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines); +} + +multi_UBspline_3d_d* Allocator::allocateMultiBspline( + Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, int num_splines) +{ + return einspline_create_multi_UBspline_3d_d(x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines); +} + +UBspline_3d_d* Allocator::allocateUBspline( + Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC) +{ + return einspline_create_UBspline_3d_d(x_grid, y_grid, z_grid, xBC, yBC, zBC); +} + +UBspline_3d_s* Allocator::allocateUBspline( + Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC) +{ + return einspline_create_UBspline_3d_s(x_grid, y_grid, z_grid, xBC, yBC, zBC); +} + + +} // namespace einspline +} // namespace qmcplusplus diff --git a/src/Numerics/Spline2/bspline_allocator_ref.hpp b/src/Numerics/Spline2/bspline_allocator_ref.hpp new file mode 100644 index 000000000..ec4760dd0 --- /dev/null +++ b/src/Numerics/Spline2/bspline_allocator_ref.hpp @@ -0,0 +1,209 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +//////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +/** @file bspline_allocator.hpp + * @brief Allocator and management classes + */ +#ifndef QMCPLUSPLUS_EINSPLINE_BSPLINE_ALLOCATOR_H +#define QMCPLUSPLUS_EINSPLINE_BSPLINE_ALLOCATOR_H + +#include +#include +#include "Numerics/Spline2/einspline_allocator_ref.h" +#include + +namespace qmcplusplus +{ +namespace einspline +{ +class Allocator +{ + /// Setting the allocation policy: default is using aligned allocator + int Policy; + +public: + /// constructor + Allocator(); +#if (__cplusplus >= 201103L) + /// disable copy constructor + Allocator(const Allocator&) = delete; + /// disable assignement + Allocator& operator=(const Allocator&) = delete; +#endif + /// destructor + ~Allocator(); + + template + void destroy(SplineType* spline) + { + einspline_free(spline->coefs); + free(spline); + } + + /// allocate a single multi-bspline + multi_UBspline_3d_s* allocateMultiBspline(Ugrid x_grid, + Ugrid y_grid, + Ugrid z_grid, + BCtype_s xBC, + BCtype_s yBC, + BCtype_s zBC, + int num_splines); + + /// allocate a double multi-bspline + multi_UBspline_3d_d* allocateMultiBspline(Ugrid x_grid, + Ugrid y_grid, + Ugrid z_grid, + BCtype_d xBC, + BCtype_d yBC, + BCtype_d zBC, + int num_splines); + + /// allocate a single bspline + UBspline_3d_s* + allocateUBspline(Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC); + + /// allocate a UBspline_3d_d + UBspline_3d_d* + allocateUBspline(Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC); + + /** allocate a multi_UBspline_3d_(s,d) + * @tparam T datatype + * @tparam ValT 3D container for start and end + * @tparam IntT 3D container for ng + */ + template + typename bspline_traits::SplineType* + createMultiBspline(T dummy, ValT& start, ValT& end, IntT& ng, bc_code bc, int num_splines); + + /** allocate a UBspline_3d_(s,d) + * @tparam T datatype + * @tparam ValT 3D container for start and end + * @tparam IntT 3D container for ng + */ + template + typename bspline_traits::SingleSplineType* + createUBspline(ValT& start, ValT& end, IntT& ng, bc_code bc); + + /** Set coefficients for a single orbital (band) + * @param i index of the orbital + * @param coeff array of coefficients + * @param spline target MultibsplineType + */ + template + void setCoefficientsForOneOrbital(int i, + Array& coeff, + typename bspline_traits::SplineType* spline); + + /** copy a UBSpline_3d_X to multi_UBspline_3d_X at i-th band + * @param single UBspline_3d_X + * @param multi target multi_UBspline_3d_X + * @param i the band index to copy to + * @param offset starting offset for AoSoA + * @param N shape of AoSoA + */ + template + void copy(UBT* single, MBT* multi, int i, const int* offset, const int* N); +}; + +template +void Allocator::setCoefficientsForOneOrbital(int i, + Array& coeff, + typename bspline_traits::SplineType* spline) +{ + //#pragma omp parallel for collapse(3) + for (int ix = 0; ix < spline->x_grid.num + 3; ix++) + { + for (int iy = 0; iy < spline->y_grid.num + 3; iy++) + { + for (int iz = 0; iz < spline->z_grid.num + 3; iz++) + { + intptr_t xs = spline->x_stride; // These might be unset + intptr_t ys = spline->y_stride; + intptr_t zs = spline->z_stride; + spline->coefs[ix * xs + iy * ys + iz * zs + i] = coeff(ix, iy, iz); + } + } + } +} + + // @DEBUG JPT: ref impl calls this one +template +typename bspline_traits::SplineType* +Allocator::createMultiBspline(T dummy, ValT& start, ValT& end, IntT& ng, bc_code bc, int num_splines) +{ + Ugrid x_grid, y_grid, z_grid; + typename bspline_traits::BCType xBC, yBC, zBC; + x_grid.start = start[0]; + x_grid.end = end[0]; + x_grid.num = ng[0]; + y_grid.start = start[1]; + y_grid.end = end[1]; + y_grid.num = ng[1]; + z_grid.start = start[2]; + z_grid.end = end[2]; + z_grid.num = ng[2]; + xBC.lCode = xBC.rCode = bc; + yBC.lCode = yBC.rCode = bc; + zBC.lCode = zBC.rCode = bc; + return allocateMultiBspline(x_grid, y_grid, z_grid, xBC, yBC, zBC, num_splines); +} + +template +typename bspline_traits::SingleSplineType* +Allocator::createUBspline(ValT& start, ValT& end, IntT& ng, bc_code bc) +{ + Ugrid x_grid, y_grid, z_grid; + typename bspline_traits::BCType xBC, yBC, zBC; + x_grid.start = start[0]; + x_grid.end = end[0]; + x_grid.num = ng[0]; + y_grid.start = start[1]; + y_grid.end = end[1]; + y_grid.num = ng[1]; + z_grid.start = start[2]; + z_grid.end = end[2]; + z_grid.num = ng[2]; + xBC.lCode = xBC.rCode = bc; + yBC.lCode = yBC.rCode = bc; + zBC.lCode = zBC.rCode = bc; + return allocateUBspline(x_grid, y_grid, z_grid, xBC, yBC, zBC); +} + +template +void Allocator::copy(UBT* single, MBT* multi, int i, const int* offset, const int* N) +{ + typedef typename bspline_type::value_type out_type; + typedef typename bspline_type::value_type in_type; + intptr_t x_stride_in = single->x_stride; + intptr_t y_stride_in = single->y_stride; + intptr_t x_stride_out = multi->x_stride; + intptr_t y_stride_out = multi->y_stride; + intptr_t z_stride_out = multi->z_stride; + intptr_t offset0 = static_cast(offset[0]); + intptr_t offset1 = static_cast(offset[1]); + intptr_t offset2 = static_cast(offset[2]); + const intptr_t istart = static_cast(i); + const intptr_t n0 = N[0], n1 = N[1], n2 = N[2]; + for (intptr_t ix = 0; ix < n0; ++ix) + for (intptr_t iy = 0; iy < n1; ++iy) + { + out_type* restrict out = multi->coefs + ix * x_stride_out + iy * y_stride_out + istart; + const in_type* restrict in = + single->coefs + (ix + offset0) * x_stride_in + (iy + offset1) * y_stride_in + offset2; + for (intptr_t iz = 0; iz < n2; ++iz) + { + out[iz * z_stride_out] = static_cast(in[iz]); + } + } +} +} // namespace einspline +} // namespace qmcplusplus +#endif diff --git a/src/Numerics/Spline2/bspline_traits_ref.hpp b/src/Numerics/Spline2/bspline_traits_ref.hpp new file mode 100644 index 000000000..4ff3e782c --- /dev/null +++ b/src/Numerics/Spline2/bspline_traits_ref.hpp @@ -0,0 +1,79 @@ +//////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source +// License. See LICENSE file in top directory for details. +// +// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// +// File developed by: +// +// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. +//////////////////////////////////////////////////////////////////////////////// +// -*- C++ -*- +/** @file bspline_traits.hpp + * + * extend spline/bspline_traints by introducing + * bspline_traits with only speciliazation with 3D + */ +#ifndef QMCPLUSPLUS_BSPLINE_SPLINE2_TRAITS_H +#define QMCPLUSPLUS_BSPLINE_SPLINE2_TRAITS_H + +#include + +namespace qmcplusplus +{ +/** trait class to map (datatype,D) to Einspline engine type */ +template +struct bspline_traits +{}; + +template<> +struct bspline_traits +{ + typedef multi_UBspline_3d_s SplineType; + typedef UBspline_3d_s SingleSplineType; + typedef BCtype_s BCType; + typedef float real_type; + typedef float value_type; +}; + +template<> +struct bspline_traits +{ + typedef multi_UBspline_3d_d SplineType; + typedef UBspline_3d_d SingleSplineType; + typedef BCtype_d BCType; + typedef double real_type; + typedef double value_type; +}; + +/** helper class to determine the value_type of einspline objects + */ +template +struct bspline_type +{}; + +template<> +struct bspline_type +{ + typedef float value_type; +}; + +template<> +struct bspline_type +{ + typedef double value_type; +}; + +template<> +struct bspline_type +{ + typedef float value_type; +}; + +template<> +struct bspline_type +{ + typedef double value_type; +}; +} // namespace qmcplusplus +#endif diff --git a/src/Numerics/Spline2/einspline_allocator_ref.cpp b/src/Numerics/Spline2/einspline_allocator_ref.cpp new file mode 100644 index 000000000..6768b7b99 --- /dev/null +++ b/src/Numerics/Spline2/einspline_allocator_ref.cpp @@ -0,0 +1,325 @@ +///////////////////////////////////////////////////////////////////////////// +// einspline: a library for creating and evaluating B-splines // +// Copyright (C) 2007 Kenneth P. Esler, Jr. // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation; either version 2 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program; if not, write to the Free Software // +// Foundation, Inc., 51 Franklin Street, Fifth Floor, // +// Boston, MA 02110-1301 USA // +///////////////////////////////////////////////////////////////////////////// +/** @file einspline_allocator.c + * + * Gather only 3d_d/3d_s allocation routines. + * Original implementations are einspline/ *_create.c + */ +#include +#include +#include +#include +#include "config.h" +#include "Numerics/Einspline/bspline_ref.h" +#include "Numerics/Spline2/einspline_allocator_ref.h" + +#if defined(HAVE_POSIX_MEMALIGN) + +int posix_memalign(void** memptr, size_t alignment, size_t size); + +void* einspline_alloc(size_t size, size_t alignment) +{ + void* ptr; + int ret = posix_memalign(&ptr, alignment, size); + return ptr; +} + +void einspline_free(void* ptr) { free(ptr); } + +#else + +void* einspline_alloc(size_t size, size_t alignment) +{ + size += (alignment - 1) + sizeof(void*); + void* ptr = malloc(size); + if (ptr == NULL) + return NULL; + else + { + void* shifted = (char*)ptr + sizeof(void*); // make room to save original pointer + size_t offset = alignment - (size_t)shifted % (size_t)alignment; + void* aligned = (char*)shifted + offset; + *((void**)aligned - 1) = ptr; + return aligned; + } +} + +void einspline_free(void* aligned) +{ + void* ptr = *((void**)aligned - 1); + free(ptr); +} +#endif + +multi_UBspline_3d_s* einspline_create_multi_UBspline_3d_s( + Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, int num_splines) +{ + // Create new spline + multi_UBspline_3d_s* restrict spline = (multi_UBspline_3d_s*)malloc(sizeof(multi_UBspline_3d_s)); + if (!spline) + { + fprintf(stderr, "Out of memory allocating spline in create_multi_UBspline_3d_s.\n"); + abort(); + } + spline->spcode = MULTI_U3D; + spline->tcode = SINGLE_REAL; + spline->xBC = xBC; + spline->yBC = yBC; + spline->zBC = zBC; + spline->num_splines = num_splines; + // Setup internal variables + int Mx = x_grid.num; + int My = y_grid.num; + int Mz = z_grid.num; + int Nx, Ny, Nz; + + if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) + Nx = Mx + 3; + else + Nx = Mx + 2; + x_grid.delta = (x_grid.end - x_grid.start) / (double)(Nx - 3); + x_grid.delta_inv = 1.0 / x_grid.delta; + spline->x_grid = x_grid; + + if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) + Ny = My + 3; + else + Ny = My + 2; + y_grid.delta = (y_grid.end - y_grid.start) / (double)(Ny - 3); + y_grid.delta_inv = 1.0 / y_grid.delta; + spline->y_grid = y_grid; + + if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) + Nz = Mz + 3; + else + Nz = Mz + 2; + z_grid.delta = (z_grid.end - z_grid.start) / (double)(Nz - 3); + z_grid.delta_inv = 1.0 / z_grid.delta; + spline->z_grid = z_grid; + + const int ND = QMC_CLINE / sizeof(float); + const size_t N = ((num_splines + ND - 1) / ND) * ND; + // int N= (num_splines%ND) ? (num_splines+ND-num_splines%ND) : num_splines; + spline->x_stride = (size_t)Ny * (size_t)Nz * (size_t)N; + spline->y_stride = (size_t)Nz * N; + spline->z_stride = N; + + spline->coefs_size = (size_t)Nx * spline->x_stride; + spline->coefs = (float*)einspline_alloc(sizeof(float) * spline->coefs_size, QMC_CLINE); + // printf("Einepline allocator %d %d %d %zd (%d) %zu + // %d\n",Nx,Ny,Nz,N,num_splines,spline->coefs_size,QMC_CLINE); + + if (!spline->coefs) + { + fprintf(stderr, + "Out of memory allocating spline coefficients in " + "create_multi_UBspline_3d_s.\n"); + abort(); + } + +#if 0 + //test first-touch later + const size_t xs = spline->x_stride; + const size_t ys = spline->y_stride; + const size_t zs = spline->z_stride; + + const float czero=0; +#pragma omp parallel for collapse(3) + for(size_t i=0; icoefs + i*xs + j*ys + k*zs; + for(size_t s=0; sspcode = MULTI_U3D; + spline->tcode = DOUBLE_REAL; + spline->xBC = xBC; + spline->yBC = yBC; + spline->zBC = zBC; + spline->num_splines = num_splines; + + // Setup internal variables + int Mx = x_grid.num; + int My = y_grid.num; + int Mz = z_grid.num; + int Nx, Ny, Nz; + + if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) + Nx = Mx + 3; + else + Nx = Mx + 2; + x_grid.delta = (x_grid.end - x_grid.start) / (double)(Nx - 3); + x_grid.delta_inv = 1.0 / x_grid.delta; + spline->x_grid = x_grid; + + if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) + Ny = My + 3; + else + Ny = My + 2; + y_grid.delta = (y_grid.end - y_grid.start) / (double)(Ny - 3); + y_grid.delta_inv = 1.0 / y_grid.delta; + spline->y_grid = y_grid; + + if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) + Nz = Mz + 3; + else + Nz = Mz + 2; + z_grid.delta = (z_grid.end - z_grid.start) / (double)(Nz - 3); + z_grid.delta_inv = 1.0 / z_grid.delta; + spline->z_grid = z_grid; + + const int ND = QMC_CLINE / sizeof(double); + int N = (num_splines % ND) ? (num_splines + ND - num_splines % ND) : num_splines; + + spline->x_stride = (size_t)Ny * (size_t)Nz * (size_t)N; + spline->y_stride = Nz * N; + spline->z_stride = N; + + spline->coefs_size = (size_t)Nx * spline->x_stride; + spline->coefs = (double*)einspline_alloc(sizeof(double) * spline->coefs_size, QMC_CLINE); + + if (!spline->coefs) + { + fprintf(stderr, + "Out of memory allocating spline coefficients in " + "create_multi_UBspline_3d_d.\n"); + abort(); + } + + return spline; +} + +UBspline_3d_d* einspline_create_UBspline_3d_d( + Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC) +{ + // Create new spline + UBspline_3d_d* restrict spline = (UBspline_3d_d*)malloc(sizeof(UBspline_3d_d)); + spline->spcode = U3D; + spline->tcode = DOUBLE_REAL; + spline->xBC = xBC; + spline->yBC = yBC; + spline->zBC = zBC; + + // Setup internal variables + int Mx = x_grid.num; + int My = y_grid.num; + int Mz = z_grid.num; + int Nx, Ny, Nz; + + if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) + Nx = Mx + 3; + else + Nx = Mx + 2; + x_grid.delta = (x_grid.end - x_grid.start) / (double)(Nx - 3); + x_grid.delta_inv = 1.0 / x_grid.delta; + spline->x_grid = x_grid; + + if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) + Ny = My + 3; + else + Ny = My + 2; + y_grid.delta = (y_grid.end - y_grid.start) / (double)(Ny - 3); + y_grid.delta_inv = 1.0 / y_grid.delta; + spline->y_grid = y_grid; + + if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) + Nz = Mz + 3; + else + Nz = Mz + 2; + z_grid.delta = (z_grid.end - z_grid.start) / (double)(Nz - 3); + z_grid.delta_inv = 1.0 / z_grid.delta; + spline->z_grid = z_grid; + + spline->x_stride = Ny * Nz; + spline->y_stride = Nz; + + spline->coefs_size = (size_t)Nx * (size_t)Ny * (size_t)Nz; + + spline->coefs = (double*)einspline_alloc(sizeof(double) * spline->coefs_size, QMC_CLINE); + + return spline; +} + +UBspline_3d_s* einspline_create_UBspline_3d_s( + Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC) +{ + // Create new spline + UBspline_3d_s* spline = (UBspline_3d_s*)malloc(sizeof(UBspline_3d_s)); + spline->spcode = U3D; + spline->tcode = SINGLE_REAL; + spline->xBC = xBC; + spline->yBC = yBC; + spline->zBC = zBC; + // Setup internal variables + int Mx = x_grid.num; + int My = y_grid.num; + int Mz = z_grid.num; + int Nx, Ny, Nz; + + if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) + Nx = Mx + 3; + else + Nx = Mx + 2; + x_grid.delta = (x_grid.end - x_grid.start) / (double)(Nx - 3); + x_grid.delta_inv = 1.0 / x_grid.delta; + spline->x_grid = x_grid; + + if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) + Ny = My + 3; + else + Ny = My + 2; + y_grid.delta = (y_grid.end - y_grid.start) / (double)(Ny - 3); + y_grid.delta_inv = 1.0 / y_grid.delta; + spline->y_grid = y_grid; + + if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) + Nz = Mz + 3; + else + Nz = Mz + 2; + z_grid.delta = (z_grid.end - z_grid.start) / (double)(Nz - 3); + z_grid.delta_inv = 1.0 / z_grid.delta; + spline->z_grid = z_grid; + + spline->x_stride = Ny * Nz; + spline->y_stride = Nz; + + spline->coefs_size = (size_t)Nx * (size_t)Ny * (size_t)Nz; + spline->coefs = (float*)einspline_alloc(sizeof(float) * spline->coefs_size, QMC_CLINE); + + return spline; +} diff --git a/src/Numerics/Spline2/einspline_allocator_ref.h b/src/Numerics/Spline2/einspline_allocator_ref.h new file mode 100644 index 000000000..45c0922fa --- /dev/null +++ b/src/Numerics/Spline2/einspline_allocator_ref.h @@ -0,0 +1,35 @@ +///////////////////////////////////////////////////////////////////////////// +// einspline: a library for creating and evaluating B-splines // +// Copyright (C) 2007 Kenneth P. Esler, Jr. // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation; either version 2 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program; if not, write to the Free Software // +// Foundation, Inc., 51 Franklin Street, Fifth Floor, // +// Boston, MA 02110-1301 USA // +///////////////////////////////////////////////////////////////////////////// +/** @file einspline_allocator.h + * + * Rename aligned_alloc/aligned_free as einspline_alloc/einspline_free to + * avoid naming conflicts with the standards + */ + +#ifndef EINSPLINE_ALIGNED_ALLOC_H +#define EINSPLINE_ALIGNED_ALLOC_H + + +void* einspline_alloc(size_t size, size_t alignment); + +void einspline_free(void* ptr); + + +#endif diff --git a/src/QMCWaveFunctions/SPOSet_builder.cpp b/src/QMCWaveFunctions/SPOSet_builder.cpp index a53b01c28..6961b3daa 100644 --- a/src/QMCWaveFunctions/SPOSet_builder.cpp +++ b/src/QMCWaveFunctions/SPOSet_builder.cpp @@ -34,13 +34,14 @@ SPOSet* build_SPOSet(bool useRef, } else { - auto* spo_main = new einspline_spo; + auto* spo_main = new einspline_spo; spo_main->set(nx, ny, nz, num_splines, nblocks); spo_main->Lattice.set(lattice_b); return dynamic_cast(spo_main); } } + //@DEBUG: Problem is here. The original call to the einspline_ SPOSet* build_SPOSet_view(bool useRef, const SPOSet* SPOSet_main, int team_size, int member_id) { if (useRef) @@ -52,9 +53,11 @@ SPOSet* build_SPOSet_view(bool useRef, const SPOSet* SPOSet_main, int team_size, return dynamic_cast(spo_view); } else - { - auto* temp_ptr = dynamic_cast*>(SPOSet_main); - auto* spo_view = new einspline_spo(*temp_ptr, team_size, member_id); + { + auto* temp_ptr = dynamic_cast*>(SPOSet_main); + // Original - why are team_size, member_id passed? + //auto* spo_view = new einspline_spo(*temp_ptr, team_size, member_id); + auto* spo_view = new einspline_spo(*temp_ptr); return dynamic_cast(spo_view); } } diff --git a/src/QMCWaveFunctions/einspline_spo.hpp b/src/QMCWaveFunctions/einspline_spo.hpp index 616cea3e4..00ad49df0 100644 --- a/src/QMCWaveFunctions/einspline_spo.hpp +++ b/src/QMCWaveFunctions/einspline_spo.hpp @@ -59,7 +59,8 @@ struct einspline_spo : public SPOSet timer = TimerManager.createTimer("Single-Particle Orbitals", timer_level_fine); } /// disable copy constructor - einspline_spo(const einspline_spo& in) = default; + einspline_spo(const einspline_spo& in) = delete; + /// disable copy operator einspline_spo& operator=(const einspline_spo& in) = delete; diff --git a/src/QMCWaveFunctions/einspline_spo_ref.hpp b/src/QMCWaveFunctions/einspline_spo_ref.hpp index a19ab3f23..74ae0b957 100644 --- a/src/QMCWaveFunctions/einspline_spo_ref.hpp +++ b/src/QMCWaveFunctions/einspline_spo_ref.hpp @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include "Numerics/OhmmsPETE/OhmmsArray.h" @@ -38,9 +38,9 @@ struct einspline_spo_ref : public SPOSet { /// define the einsplie data object type using spline_type = typename bspline_traits::SplineType; - using vContainer_type = Kokkos::View; - using gContainer_type = Kokkos::View; - using hContainer_type = Kokkos::View; + using vContainer_type = aligned_vector; + using gContainer_type = VectorSoAContainer; + using hContainer_type = VectorSoAContainer; using lattice_type = CrystalLattice; /// number of blocks @@ -61,10 +61,11 @@ struct einspline_spo_ref : public SPOSet /// compute engine MultiBsplineRef compute_engine; - Kokkos::View einsplines; - Kokkos::View psi; - Kokkos::View grad; - Kokkos::View hess; + aligned_vector einsplines; + aligned_vector psi; + aligned_vector grad; + aligned_vector hess; + /// Timer NewTimer* timer; @@ -95,10 +96,9 @@ struct einspline_spo_ref : public SPOSet firstBlock = nBlocks * member_id; lastBlock = std::min(in.nBlocks, nBlocks * (member_id + 1)); nBlocks = lastBlock - firstBlock; - // einsplines.resize(nBlocks); - einsplines = Kokkos::View("einsplines", nBlocks); + einsplines.resize(nBlocks); for (int i = 0, t = firstBlock; i < nBlocks; ++i, ++t) - einsplines(i) = in.einsplines(t); + einsplines[i] = in.einsplines[t]; resize(); timer = TimerManager.createTimer("Single-Particle Orbitals Ref", timer_level_fine); } @@ -106,41 +106,22 @@ struct einspline_spo_ref : public SPOSet /// destructors ~einspline_spo_ref() { - //Note the change in garbage collection here. The reason for doing this is that by - //changing einsplines to a view, it's more natural to work by reference than by raw pointer. - // To maintain current interface, redoing the input types of allocate and destroy to call by references - // would need to be propagated all the way down. - // However, since we've converted the large chunks of memory to views, garbage collection is - // handled automatically. Thus, setting the spline_type objects to empty views lets Kokkos handle the Garbage collection. - if (Owner) - einsplines = Kokkos::View(); - - // for (int i = 0; i < nBlocks; ++i) - // myAllocator.destroy(einsplines(i)); + for (int i = 0; i < nBlocks; ++i) + myAllocator.destroy(einsplines[i]); } /// resize the containers void resize() { - // psi.resize(nBlocks); - // grad.resize(nBlocks); - // hess.resize(nBlocks); - - psi = Kokkos::View("Psi", nBlocks); - grad = Kokkos::View("Grad", nBlocks); - hess = Kokkos::View("Hess", nBlocks); - + psi.resize(nBlocks); + grad.resize(nBlocks); + hess.resize(nBlocks); for (int i = 0; i < nBlocks; ++i) { - //psi[i].resize(nSplinesPerBlock); - //grad[i].resize(nSplinesPerBlock); - //hess[i].resize(nSplinesPerBlock); - - //Using the "view-of-views" placement-new construct. - new (&psi(i)) vContainer_type("psi_i", nSplinesPerBlock); - new (&grad(i)) gContainer_type("grad_i", nSplinesPerBlock); - new (&hess(i)) hContainer_type("hess_i", nSplinesPerBlock); + psi[i].resize(nSplinesPerBlock); + grad[i].resize(nSplinesPerBlock); + hess[i].resize(nSplinesPerBlock); } } @@ -152,41 +133,26 @@ struct einspline_spo_ref : public SPOSet nSplinesPerBlock = num_splines / nblocks; firstBlock = 0; lastBlock = nBlocks; - if (!einsplines.extent(0)) + if (einsplines.empty()) { Owner = true; TinyVector ng(nx, ny, nz); PosType start(0); PosType end(1); - - // einsplines.resize(nBlocks); - einsplines = Kokkos::View("einsplines", nBlocks); - + einsplines.resize(nBlocks); RandomGenerator myrandom(11); - //Array coef_data(nx+3, ny+3, nz+3); - Kokkos::View coef_data("coef_data", nx + 3, ny + 3, nz + 3); - int totalNumCoefs = coef_data.extent(0)*coef_data.extent(1)*coef_data.extent(2); - std::vector mydata(totalNumCoefs); - + Array coef_data(nx + 3, ny + 3, nz + 3); for (int i = 0; i < nBlocks; ++i) { - einsplines(i) = - *myAllocator.createMultiBspline(T(0), start, end, ng, PERIODIC, nSplinesPerBlock); + einsplines[i] = + myAllocator.createMultiBspline(T(0), start, end, ng, PERIODIC, nSplinesPerBlock); if (init_random) { for (int j = 0; j < nSplinesPerBlock; ++j) { - myrandom.generate_uniform(&mydata[0], totalNumCoefs); - int idx = 0; - for (int ix = 0; ix < coef_data.extent(0); ix++) { - for (int iy = 0; iy < coef_data.extent(1); iy++) { - for (int iz = 0; iz < coef_data.extent(2); iz++) { - coef_data(ix,iy,iz) = mydata[idx]; - idx++; - } - } - } - myAllocator.setCoefficientsForOneOrbital(j, coef_data, &einsplines(i)); + // Generate different coefficients for each orbital + myrandom.generate_uniform(coef_data.data(), coef_data.size()); + myAllocator.setCoefficientsForOneOrbital(j, coef_data, einsplines[i]); } } } @@ -201,16 +167,16 @@ struct einspline_spo_ref : public SPOSet auto u = Lattice.toUnit_floor(p); for (int i = 0; i < nBlocks; ++i) - compute_engine.evaluate_v(&einsplines(i), u[0], u[1], u[2], psi(i).data(), nSplinesPerBlock); + compute_engine.evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); } /** evaluate psi */ inline void evaluate_v_pfor(const PosType& p) { auto u = Lattice.toUnit_floor(p); -#pragma omp for nowait + #pragma omp for nowait for (int i = 0; i < nBlocks; ++i) - compute_engine.evaluate_v(&einsplines(i), u[0], u[1], u[2], psi(i).data(), nSplinesPerBlock); + compute_engine.evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); } /** evaluate psi, grad and lap */ @@ -218,13 +184,13 @@ struct einspline_spo_ref : public SPOSet { auto u = Lattice.toUnit_floor(p); for (int i = 0; i < nBlocks; ++i) - compute_engine.evaluate_vgl(&einsplines(i), + compute_engine.evaluate_vgl(einsplines[i], u[0], u[1], u[2], - psi(i).data(), - grad(i).data(), - hess(i).data(), + psi[i].data(), + grad[i].data(), + hess[i].data(), nSplinesPerBlock); } @@ -232,15 +198,15 @@ struct einspline_spo_ref : public SPOSet inline void evaluate_vgl_pfor(const PosType& p) { auto u = Lattice.toUnit_floor(p); -#pragma omp for nowait + #pragma omp for nowait for (int i = 0; i < nBlocks; ++i) - compute_engine.evaluate_vgl(&einsplines(i), + compute_engine.evaluate_vgl(einsplines[i], u[0], u[1], u[2], - psi(i).data(), - grad(i).data(), - hess(i).data(), + psi[i].data(), + grad[i].data(), + hess[i].data(), nSplinesPerBlock); } @@ -251,13 +217,13 @@ struct einspline_spo_ref : public SPOSet auto u = Lattice.toUnit_floor(p); for (int i = 0; i < nBlocks; ++i) - compute_engine.evaluate_vgh(&einsplines(i), + compute_engine.evaluate_vgh(einsplines[i], u[0], u[1], u[2], - psi(i).data(), - grad(i).data(), - hess(i).data(), + psi[i].data(), + grad[i].data(), + hess[i].data(), nSplinesPerBlock); } @@ -265,15 +231,15 @@ struct einspline_spo_ref : public SPOSet inline void evaluate_vgh_pfor(const PosType& p) { auto u = Lattice.toUnit_floor(p); -#pragma omp for nowait + #pragma omp for nowait for (int i = 0; i < nBlocks; ++i) - compute_engine.evaluate_vgh(&einsplines(i), + compute_engine.evaluate_vgh(einsplines[i], u[0], u[1], u[2], - psi(i).data(), - grad(i).data(), - hess(i).data(), + psi[i].data(), + grad[i].data(), + hess[i].data(), nSplinesPerBlock); } From 0b4b0a729b894d5fd9daede90296775c9093c4d1 Mon Sep 17 00:00:00 2001 From: Joshua Townsend Date: Thu, 25 Apr 2019 17:16:52 -0600 Subject: [PATCH 2/2] check_spo now passes on both CPU and GPU! Turns out I'm a dummy and had a fencepost error in the calculation of the error between the reference and kokkos implementations. Changing a "6" to a "5" fixed the problem, and now check_spo passes on both power9, and V100 ! --- src/Drivers/check_spo.cpp | 21 +++++++++-------- .../Spline2/bspline_allocator_ref.hpp | 23 +++++++++++-------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/Drivers/check_spo.cpp b/src/Drivers/check_spo.cpp index 6c7387c64..0ce853793 100644 --- a/src/Drivers/check_spo.cpp +++ b/src/Drivers/check_spo.cpp @@ -220,17 +220,20 @@ int main(int argc, char** argv) cout << " psi[0] = " << psiMirror(0) << ", psi[4] = " << psiMirror(4) << endl; */ - // @DEBUG: problem is in here + // REF std::cout << "Setting up the reference implementation..."; spo_ref_main.set(nx, ny, nz, norb, nTiles); spo_ref_main.Lattice.set(lattice_b); std::cout << "Done." << std::endl; - /* + /* cout << " looking at the reference version:" << endl; - cout << " coef(2,2,0,0) = " << spo_ref_main.einsplines(0).coefs_view(2,2,0,0) << endl; - cout << " coef(2,0,2,0) = " << spo_ref_main.einsplines(0).coefs_view(2,0,2,0) << endl; - cout << " coef(0,2,2,0) = " << spo_ref_main.einsplines(0).coefs_view(0,2,2,0) << endl; + intptr_t xs = spo_ref_main.einsplines(0).x_stride; + intptr_t ys = spo_ref_main.einsplines(0).y_stride; + intptr_t zs = spo_ref_main.einsplines(0).z_stride; + cout << " coef(2,2,0,0) = " << spo_ref_main.einsplines(0).coefs[xs*2+ys*2+0*zs] << endl; + cout << " coef(2,0,2,0) = " << spo_ref_main.einsplines(0).coefs[xs*2+ys*0+zs*2] << endl; + cout << " coef(0,2,2,0) = " << spo_ref_main.einsplines(0).coefs[xs*0+ys*2+zs*2] << endl; spo_ref_main.evaluate_v(p); cout << " doing evaluate v with position: (" << p[0] << ", " << p[1] << ", " << p[2] << ")" << endl; cout << " psi[0] = " << spo_ref_main.psi(0).operator()(0) << ", psi[4] = " << spo_ref_main.psi(0).operator()(4) << endl; @@ -325,10 +328,10 @@ int main(int argc, char** argv) // hess evalVGH_h_err += std::fabs(hess_host(spNum, 0) - spo_ref.hess[ib].data(0)[n]); evalVGH_h_err += std::fabs(hess_host(spNum, 1) - spo_ref.hess[ib].data(1)[n]); - evalVGH_h_err += std::fabs(hess_host(spNum, 2) - spo_ref.hess[ib].data(3)[n]); - evalVGH_h_err += std::fabs(hess_host(spNum, 3) - spo_ref.hess[ib].data(4)[n]); - evalVGH_h_err += std::fabs(hess_host(spNum, 4) - spo_ref.hess[ib].data(5)[n]); - evalVGH_h_err += std::fabs(hess_host(spNum, 5) - spo_ref.hess[ib].data(6)[n]); + evalVGH_h_err += std::fabs(hess_host(spNum, 2) - spo_ref.hess[ib].data(2)[n]); + evalVGH_h_err += std::fabs(hess_host(spNum, 3) - spo_ref.hess[ib].data(3)[n]); + evalVGH_h_err += std::fabs(hess_host(spNum, 4) - spo_ref.hess[ib].data(4)[n]); + evalVGH_h_err += std::fabs(hess_host(spNum, 5) - spo_ref.hess[ib].data(5)[n]); } if (ur[iel] > accept) { diff --git a/src/Numerics/Spline2/bspline_allocator_ref.hpp b/src/Numerics/Spline2/bspline_allocator_ref.hpp index ec4760dd0..a48d463eb 100644 --- a/src/Numerics/Spline2/bspline_allocator_ref.hpp +++ b/src/Numerics/Spline2/bspline_allocator_ref.hpp @@ -113,6 +113,8 @@ class Allocator void copy(UBT* single, MBT* multi, int i, const int* offset, const int* N); }; + + // @DEBUG: JPT ref calls this one template void Allocator::setCoefficientsForOneOrbital(int i, Array& coeff, @@ -120,18 +122,19 @@ void Allocator::setCoefficientsForOneOrbital(int i, { //#pragma omp parallel for collapse(3) for (int ix = 0; ix < spline->x_grid.num + 3; ix++) - { - for (int iy = 0; iy < spline->y_grid.num + 3; iy++) { - for (int iz = 0; iz < spline->z_grid.num + 3; iz++) - { - intptr_t xs = spline->x_stride; // These might be unset - intptr_t ys = spline->y_stride; - intptr_t zs = spline->z_stride; - spline->coefs[ix * xs + iy * ys + iz * zs + i] = coeff(ix, iy, iz); - } + for (int iy = 0; iy < spline->y_grid.num + 3; iy++) + { + for (int iz = 0; iz < spline->z_grid.num + 3; iz++) + { + intptr_t xs = spline->x_stride; + intptr_t ys = spline->y_stride; + intptr_t zs = spline->z_stride; + int idx = ix * xs + iy * ys + iz * zs + i; + spline->coefs[idx] = coeff(ix, iy, iz); + } + } } - } } // @DEBUG JPT: ref impl calls this one