diff --git a/src/synergia/bunch/bunch.h b/src/synergia/bunch/bunch.h index f28a341d8..9886550b3 100644 --- a/src/synergia/bunch/bunch.h +++ b/src/synergia/bunch/bunch.h @@ -658,7 +658,7 @@ class bunch_t { // checkpoint partiles void - save_checkpoint_particles(Hdf5_file& file, int idx) const + save_checkpoint_particles(Hdf5_file& file, int idx) { get_bunch_particles(PG::regular).save_checkpoint_particles(file, idx); get_bunch_particles(PG::spectator).save_checkpoint_particles(file, idx); diff --git a/src/synergia/bunch/bunch_particles.cc b/src/synergia/bunch/bunch_particles.cc index cb4bb25fd..56d94d225 100644 --- a/src/synergia/bunch/bunch_particles.cc +++ b/src/synergia/bunch/bunch_particles.cc @@ -718,8 +718,7 @@ bunch_particles_t::print_particle(size_t idx, Logger& logger) const template <> void -bunch_particles_t::save_checkpoint_particles(Hdf5_file& file, - int idx) const +bunch_particles_t::save_checkpoint_particles(Hdf5_file& file, int idx) { checkout_particles(); diff --git a/src/synergia/bunch/bunch_particles.h b/src/synergia/bunch/bunch_particles.h index b111df11c..33a019374 100644 --- a/src/synergia/bunch/bunch_particles.h +++ b/src/synergia/bunch/bunch_particles.h @@ -16,6 +16,9 @@ #include #endif +// To keep track of the memory location for the particles +enum class MemoryLocation { Host, Device }; + enum class ParticleGroup { regular = 0, spectator = 1 }; using Particles = Kokkos::View get_local_particle_count_in_range(int num_part, int offset) const { @@ -303,17 +316,19 @@ class bunch_particles_t { // copy particles/masks between host and device memories void - checkin_particles() const + checkin_particles() { Kokkos::deep_copy(parts, hparts); Kokkos::deep_copy(masks, hmasks); + memory_location = MemoryLocation::Device; } void - checkout_particles() const + checkout_particles() { Kokkos::deep_copy(hparts, parts); Kokkos::deep_copy(hmasks, masks); + memory_location = MemoryLocation::Host; } // change capacity (can only increase) @@ -395,7 +410,7 @@ class bunch_particles_t { Commxx const& comm) const; // checkpoint save/load - void save_checkpoint_particles(Hdf5_file& file, int idx) const; + void save_checkpoint_particles(Hdf5_file& file, int idx); void load_checkpoint_particles(Hdf5_file& file, int idx); // assign ids cooperatively @@ -427,6 +442,7 @@ class bunch_particles_t { save(AR& ar) const { ar(CEREAL_NVP(label)); + ar(CEREAL_NVP(memory_location)); ar(CEREAL_NVP(n_valid)); ar(CEREAL_NVP(n_active)); ar(CEREAL_NVP(n_reserved)); @@ -442,6 +458,7 @@ class bunch_particles_t { load(AR& ar) { ar(CEREAL_NVP(label)); + ar(CEREAL_NVP(memory_location)); ar(CEREAL_NVP(n_valid)); ar(CEREAL_NVP(n_active)); ar(CEREAL_NVP(n_reserved)); @@ -580,6 +597,7 @@ inline bunch_particles_t::bunch_particles_t(ParticleGroup pg, Commxx const& comm) : group(pg) , label(pg == PG::regular ? "particles" : "spectators") + , memory_location(MemoryLocation::Host) , n_valid(0) , n_active(0) , n_reserved(0) diff --git a/src/synergia/bunch/core_diagnostics.cc b/src/synergia/bunch/core_diagnostics.cc index 81d6bb7a4..b4a1eee24 100644 --- a/src/synergia/bunch/core_diagnostics.cc +++ b/src/synergia/bunch/core_diagnostics.cc @@ -1,10 +1,6 @@ #include -#include #include -#include "synergia/foundation/physical_constants.h" -#include "synergia/utils/logger.h" - #include "core_diagnostics.h" namespace core_diagnostics_impl { @@ -210,8 +206,13 @@ Core_diagnostics::calculate_mean(Bunch const& bunch) karray1d mean("mean", 6); - auto particles = bunch.get_local_particles(); - auto masks = bunch.get_local_particle_masks(); + auto bparts = bunch.get_bunch_particles(); + if (bparts.get_memory_location() == MemoryLocation::Host) { + std::runtime_error("Bunch particles are active on Host memory space!"); + } + + auto particles = bparts.parts; + auto masks = bparts.masks; const int npart = bunch.size(); particle_reducer pr(particles, masks); @@ -235,8 +236,12 @@ Core_diagnostics::calculate_z_mean(Bunch const& bunch) double mean = 0; - auto particles = bunch.get_local_particles(); - auto masks = bunch.get_local_particle_masks(); + auto bparts = bunch.get_bunch_particles(); + if (bparts.get_memory_location() == MemoryLocation::Host) { + std::runtime_error("Bunch particles are active on Host memory space!"); + } + auto particles = bparts.parts; + auto masks = bparts.masks; const int npart = bunch.size(); particle_reducer pr(particles, masks); @@ -259,8 +264,12 @@ Core_diagnostics::calculate_abs_mean(Bunch const& bunch) karray1d abs_mean("abs_mean", 6); - auto particles = bunch.get_local_particles(); - auto masks = bunch.get_local_particle_masks(); + auto bparts = bunch.get_bunch_particles(); + if (bparts.get_memory_location() == MemoryLocation::Host) { + std::runtime_error("Bunch particles are active on Host memory space!"); + } + auto particles = bparts.parts; + auto masks = bparts.masks; const int npart = bunch.size(); particle_reducer pr(particles, masks); @@ -288,8 +297,12 @@ Core_diagnostics::calculate_std(Bunch const& bunch, karray1d const& mean) karray1d std("std", 6); - auto particles = bunch.get_local_particles(); - auto masks = bunch.get_local_particle_masks(); + auto bparts = bunch.get_bunch_particles(); + if (bparts.get_memory_location() == MemoryLocation::Host) { + std::runtime_error("Bunch particles are active on Host memory space!"); + } + auto particles = bparts.parts; + auto masks = bparts.masks; const int npart = bunch.size(); particle_reducer pr(particles, masks, mean); @@ -312,8 +325,12 @@ Core_diagnostics::calculate_sum2(Bunch const& bunch, karray1d const& mean) karray2d_row sum2("sum2", 6, 6); - auto particles = bunch.get_local_particles(); - auto masks = bunch.get_local_particle_masks(); + auto bparts = bunch.get_bunch_particles(); + if (bparts.get_memory_location() == MemoryLocation::Host) { + std::runtime_error("Bunch particles are active on Host memory space!"); + } + auto particles = bparts.parts; + auto masks = bparts.masks; auto npart = bunch.size(); particle_reducer pr(particles, masks, mean); @@ -354,8 +371,12 @@ Core_diagnostics::calculate_min(Bunch const& bunch) min(1) = 1e100; min(2) = 1e100; - auto particles = bunch.get_local_particles(); - auto masks = bunch.get_local_particle_masks(); + auto bparts = bunch.get_bunch_particles(); + if (bparts.get_memory_location() == MemoryLocation::Host) { + std::runtime_error("Bunch particles are active on Host memory space!"); + } + auto particles = bparts.parts; + auto masks = bparts.masks; const int npart = bunch.size(); particle_reducer pr(particles, masks); @@ -379,8 +400,12 @@ Core_diagnostics::calculate_max(Bunch const& bunch) max(1) = -1e100; max(2) = -1e100; - auto particles = bunch.get_local_particles(); - auto masks = bunch.get_local_particle_masks(); + auto bparts = bunch.get_bunch_particles(); + if (bparts.get_memory_location() == MemoryLocation::Host) { + std::runtime_error("Bunch particles are active on Host memory space!"); + } + auto particles = bparts.parts; + auto masks = bparts.masks; const int npart = bunch.size(); particle_reducer pr(particles, masks); @@ -399,8 +424,12 @@ Core_diagnostics::calculate_spatial_mean_stddev(Bunch const& bunch) using core_diagnostics_impl::particle_reducer; using core_diagnostics_impl::spatial_mean_stddev_tag; - auto particles = bunch.get_local_particles(); - auto masks = bunch.get_local_particle_masks(); + auto bparts = bunch.get_bunch_particles(); + if (bparts.get_memory_location() == MemoryLocation::Host) { + std::runtime_error("Bunch particles are active on Host memory space!"); + } + auto particles = bparts.parts; + auto masks = bparts.masks; const int npart = bunch.size(); const auto total_bunch_particles = bunch.get_total_num(); diff --git a/src/synergia/bunch/tests/test_bunch.cc b/src/synergia/bunch/tests/test_bunch.cc index df1492170..3c5e11747 100644 --- a/src/synergia/bunch/tests/test_bunch.cc +++ b/src/synergia/bunch/tests/test_bunch.cc @@ -41,17 +41,16 @@ TEST_CASE("Bunch", "[Bunch]") CHECK(p2(0, 6) == 123); CHECK(p2(1, 6) == 124); CHECK(p2(4, 6) == 127); - - CHECK(bunch.get_real_num() == Approx(1e13) ); - bunch.set_real_num(1.2e13); - CHECK(bunch.get_real_num() == Approx(1.2e13) ); + CHECK(bunch.get_real_num() == Approx(1e13)); + bunch.set_real_num(1.2e13); + CHECK(bunch.get_real_num() == Approx(1.2e13)); } #if defined SYNERGIA_HAVE_OPENPMD void -check_particle_values(BunchParticles const& bp1, BunchParticles const& bp2) +check_particle_values(BunchParticles& bp1, BunchParticles& bp2) { bp1.checkout_particles(); bp2.checkout_particles(); diff --git a/src/synergia/bunch/tests/test_bunch_particles.cc b/src/synergia/bunch/tests/test_bunch_particles.cc index 738dc1a3a..9d742e492 100644 --- a/src/synergia/bunch/tests/test_bunch_particles.cc +++ b/src/synergia/bunch/tests/test_bunch_particles.cc @@ -25,7 +25,7 @@ init_particle_values(BunchParticles& bp) } void -check_particle_values(BunchParticles const& bp) +check_particle_values(BunchParticles& bp) { bp.checkout_particles(); diff --git a/src/synergia/simulation/bunch_simulator.cc b/src/synergia/simulation/bunch_simulator.cc index 16fed5f17..884c6e0ff 100644 --- a/src/synergia/simulation/bunch_simulator.cc +++ b/src/synergia/simulation/bunch_simulator.cc @@ -502,7 +502,7 @@ namespace { } void -Bunch_simulator::save_checkpoint_particles(std::string const& fname) const +Bunch_simulator::save_checkpoint_particles(std::string const& fname) { Hdf5_file file(fname, Hdf5_file::Flag::truncate, *comm); auto bunches = get_bunch_ptrs(trains); diff --git a/src/synergia/simulation/bunch_simulator.h b/src/synergia/simulation/bunch_simulator.h index 6dc4ee6ad..80b669b23 100644 --- a/src/synergia/simulation/bunch_simulator.h +++ b/src/synergia/simulation/bunch_simulator.h @@ -430,7 +430,7 @@ class Bunch_simulator { const_karray1d limits); // serialization helper - void save_checkpoint_particles(std::string const& fname) const; + void save_checkpoint_particles(std::string const& fname); void load_checkpoint_particles(std::string const& fname); std::string diff --git a/src/synergia/simulation/propagator.cc b/src/synergia/simulation/propagator.cc index 47c12c4ee..f19af0f88 100644 --- a/src/synergia/simulation/propagator.cc +++ b/src/synergia/simulation/propagator.cc @@ -46,6 +46,21 @@ Propagator::do_step(Bunch_simulator& simulator, // lattice elements has been updated lattice.update(); + // ensure that particles are on the device in case any of the + // following might have transferred them onto the host + // custom diagnostics routines, turn_end_action, etc + Kokkos::Profiling::pushRegion("memory-location check before step-apply"); + for (auto& train : simulator.get_trains()) { + for (auto& bunch : train.get_bunches()) { + auto bparts = bunch.get_bunch_particles(); + if (bparts.get_memory_location() == MemoryLocation::Host) { + std::runtime_error( + "Bunch particles are active on Host memory space!"); + } + } + } + Kokkos::Profiling::popRegion(); + // propagate through the step step.apply(simulator, logger); diff --git a/src/synergia/simulation/step.cc b/src/synergia/simulation/step.cc index 5f47339f0..249449d26 100644 --- a/src/synergia/simulation/step.cc +++ b/src/synergia/simulation/step.cc @@ -4,27 +4,30 @@ #include "synergia/foundation/physical_constants.h" namespace { - void - apply_longitudinal_boundary(Bunch& bunch) - { - // Bunch longitudinal boundary condition - auto lb = bunch.get_longitudinal_boundary(); - - switch (lb.first) { - case LongitudinalBoundary::periodic: - apply_longitudinal_periodicity(bunch, lb.second); - break; - - case LongitudinalBoundary::aperture: apply_zcut(bunch, lb.second); break; - - case LongitudinalBoundary::bucket_barrier: - apply_longitudinal_bucket_barrier(bunch, lb.second); - break; - - case LongitudinalBoundary::open: - default: break; + void + apply_longitudinal_boundary(Bunch& bunch) + { + // Bunch longitudinal boundary condition + auto lb = bunch.get_longitudinal_boundary(); + + switch (lb.first) { + case LongitudinalBoundary::periodic: + apply_longitudinal_periodicity(bunch, lb.second); + break; + + case LongitudinalBoundary::aperture: + apply_zcut(bunch, lb.second); + break; + + case LongitudinalBoundary::bucket_barrier: + apply_longitudinal_bucket_barrier(bunch, lb.second); + break; + + case LongitudinalBoundary::open: + default: + break; + } } - } } Step::Step(double length) : operators(), step_betas(), length(length) {} @@ -32,43 +35,45 @@ Step::Step(double length) : operators(), step_betas(), length(length) {} void Step::create_operations(Lattice const& lattice) { - for (auto& op : operators) { op->create_operations(lattice); } + for (auto& op : operators) { + op->create_operations(lattice); + } } void Step::apply(Bunch_simulator& simulator, Logger& logger) const { - if (simulator[0].get_bunch_array_size() == 0) { - throw std::runtime_error( - "Step::apply() unable to proceed. no bunch in the simulator"); - } + if (simulator[0].get_bunch_array_size() == 0) { + throw std::runtime_error( + "Step::apply() unable to proceed. no bunch in the simulator"); + } - // time [s] in accelerator frame - double ref_beta = simulator[0][0].get_reference_particle().get_beta(); - double time = length / (ref_beta * pconstants::c); + // time [s] in accelerator frame + double ref_beta = simulator[0][0].get_reference_particle().get_beta(); + double time = length / (ref_beta * pconstants::c); - for (auto const& op : operators) { - double t0 = MPI_Wtime(); + for (auto const& op : operators) { + double t0 = MPI_Wtime(); - logger(LoggerV::INFO_OPR) << "\n Operator start:\n"; + logger(LoggerV::INFO_OPR) << "\n Operator start:\n"; - // operator apply - op->apply(simulator, time, logger); + // operator apply + op->apply(simulator, time, logger); - double t1 = MPI_Wtime(); + double t1 = MPI_Wtime(); - logger(LoggerV::INFO_OPR) - << " Operator finish: operator: name = " << op->get_name() - << ", type = " << op->get_type() << ", time = " << std::fixed - << std::setprecision(3) << t1 - t0 << "s" - << "\n"; + logger(LoggerV::INFO_OPR) + << " Operator finish: operator: name = " << op->get_name() + << ", type = " << op->get_type() << ", time = " << std::fixed + << std::setprecision(3) << t1 - t0 << "s" + << "\n"; - // per operator diagnostics action - simulator.diag_action_operator(*op); + // per operator diagnostics action + simulator.diag_action_operator(*op); - // longitudinal conditions - for (auto& train : simulator.get_trains()) - for (auto& bunch : train.get_bunches()) - apply_longitudinal_boundary(bunch); - } + // longitudinal conditions + for (auto& train : simulator.get_trains()) + for (auto& bunch : train.get_bunches()) + apply_longitudinal_boundary(bunch); + } }