Skip to content

Commit

Permalink
PackInfo code generation (#4994)
Browse files Browse the repository at this point in the history
Fixes #4921

Description of changes:
- reduce volume of the LB communication during the streaming step
- avoid superfluous LB ghost communication outside the streaming step
- use AVX streaming kernels
  • Loading branch information
kodiakhq[bot] authored Sep 27, 2024
2 parents 9527087 + 960c2ad commit 03033c4
Show file tree
Hide file tree
Showing 35 changed files with 3,906 additions and 48 deletions.
3 changes: 2 additions & 1 deletion maintainer/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,5 @@ add_custom_target(
COMMAND ${CMAKE_CTEST_COMMAND} --timeout ${ESPRESSO_TEST_TIMEOUT}
${ESPRESSO_CTEST_ARGS} --output-on-failure)

add_dependencies(benchmark benchmark_python benchmarks_data)
add_dependencies(benchmark_python pypresso benchmarks_data)
add_dependencies(benchmark benchmark_python)
2 changes: 1 addition & 1 deletion maintainer/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def get_timings(system, n_steps, n_iterations, verbose=True):
energy = system.analysis.energy()["total"]
verlet = system.cell_system.get_state()["verlet_reuse"]
print(
f"step {i}, time: {1000 * t:.1f} ms, verlet: {verlet:.2f}, energy: {energy:.2e}")
f"step {i}, time: {1000 * t:.2f} ms, verlet: {verlet:.2f}, energy: {energy:.2e}")
return np.array(timings)


Expand Down
2 changes: 1 addition & 1 deletion maintainer/benchmarks/lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@

# average time
avg, ci = benchmarks.get_average_time(timings)
print(f"average: {1000 * avg:.1f} +/- {1000 * ci:.1f} ms (95% C.I.)")
print(f"average: {1000 * avg:.2f} +/- {1000 * ci:.2f} ms (95% C.I.)")

# write report
benchmarks.write_report(args.output, n_proc, timings, measurement_steps)
39 changes: 32 additions & 7 deletions maintainer/walberla_kernels/generate_lb_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def paramlist(parameters, keys):
stencil = lbmpy.stencils.LBStencil(lbmpy.enums.Stencil.D3Q19)
fields = pystencils_espresso.generate_fields(config, stencil)
force_field = fields["force"]
lbm_opt = lbmpy.LBMOptimisation(symbolic_field=fields["pdfs"])
streaming_pattern = "push"

# LB Method definition
method = lbmpy.creationfunctions.create_mrt_orthogonal(
Expand Down Expand Up @@ -133,12 +135,11 @@ def paramlist(parameters, keys):
force_model=lbmpy.ForceModel.GUO,
force=force_field.center_vector,
kernel_type="collide_only")
lbm_opt = lbmpy.LBMOptimisation(symbolic_field=fields["pdfs"])
le_collision_rule_unthermalized = lbmpy.create_lb_update_rule(
le_update_rule_unthermalized = lbmpy.create_lb_update_rule(
lbm_config=le_config,
lbm_optimisation=lbm_opt)
le_collision_rule_unthermalized = lees_edwards.add_lees_edwards_to_collision(
config, le_collision_rule_unthermalized,
config, le_update_rule_unthermalized,
fields["pdfs"], stencil, 1) # shear_dir_normal y
for params, target_suffix in paramlist(parameters, ("GPU", "CPU", "AVX")):
pystencils_espresso.generate_collision_sweep(
Expand All @@ -153,8 +154,8 @@ def paramlist(parameters, keys):
ps.TypedSymbol(f"block_offset_{i}", np.uint32)
for i in range(3))

# generate thermalized LB
collision_rule_thermalized = lbmpy.creationfunctions.create_lb_collision_rule(
# generate thermalized LB collision rule
lb_collision_rule_thermalized = lbmpy.creationfunctions.create_lb_collision_rule(
method,
zero_centered=False,
fluctuating={
Expand All @@ -170,7 +171,7 @@ def paramlist(parameters, keys):
pystencils_espresso.generate_collision_sweep(
ctx,
method,
collision_rule_thermalized,
lb_collision_rule_thermalized,
stem,
params,
block_offset=block_offsets,
Expand All @@ -192,6 +193,30 @@ def paramlist(parameters, keys):
ctx, config, method, templates
)

# generate PackInfo
assignments = pystencils_espresso.generate_pack_info_pdfs_field_assignments(
fields, streaming_pattern="pull")
spec = pystencils_espresso.generate_pack_info_vector_field_specifications(
config, stencil, force_field.layout)
for params, target_suffix in paramlist(parameters, ["CPU"]):
pystencils_walberla.generate_pack_info_from_kernel(
ctx, f"PackInfoPdf{precision_prefix}{target_suffix}", assignments,
kind="pull", **params)
pystencils_walberla.generate_pack_info(
ctx, f"PackInfoVec{precision_prefix}{target_suffix}", spec, **params)
if target_suffix == "CUDA":
continue
token = "\n //TODO: optimize by generating kernel for this case\n"
for field_suffix in ["Pdf", "Vec"]:
class_name = f"PackInfo{field_suffix}{precision_prefix}{target_suffix}" # nopep8
with open(f"{class_name}.h", "r+") as f:
content = f.read()
assert token in content
content = content.replace(token, "\n")
f.seek(0)
f.truncate()
f.write(content)

# boundary conditions
ubb_dynamic = lbmpy_espresso.UBB(
lambda *args: None, dim=3, data_type=config.data_type.default_factory())
Expand All @@ -202,7 +227,7 @@ def paramlist(parameters, keys):
lbmpy_walberla.generate_boundary(
ctx, f"Dynamic_UBB_{precision_suffix}{target_suffix}", ubb_dynamic,
method, additional_data_handler=ubb_data_handler,
streaming_pattern="push", target=target)
streaming_pattern=streaming_pattern, target=target)

with open(f"Dynamic_UBB_{precision_suffix}{target_suffix}.h", "r+") as f:
content = f.read()
Expand Down
57 changes: 55 additions & 2 deletions maintainer/walberla_kernels/pystencils_espresso.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,8 @@ def __init__(self, dim, time_step=ps.typing.TypedSymbol(
data_type_np = {'double': 'float64', 'float': 'float32'}


def generate_fields(config, stencil):
def generate_fields(config, stencil, field_layout='fzyx'):
dtype = data_type_np[config.data_type.default_factory().c_name]
field_layout = 'fzyx'
q = len(stencil)
dim = len(stencil[0])

Expand Down Expand Up @@ -208,6 +207,60 @@ def generate_fields(config, stencil):
return fields


def generate_pack_info_pdfs_field_assignments(fields, streaming_pattern):
"""
Visualize the stencil directions with::
import lbmpy
import matplotlib.pyplot as plt
stencil = lbmpy.LBStencil(lbmpy.Stencil.D3Q19)
stencil.plot(data=[i for i in range(19)])
plt.show()
"""
stencil = lbmpy.enums.Stencil.D3Q19
lbm_config = lbmpy.LBMConfig(stencil=stencil,
method=lbmpy.Method.CUMULANT,
compressible=True,
zero_centered=False,
weighted=True,
streaming_pattern=streaming_pattern,
relaxation_rate=sp.Symbol("omega_shear"),
)
lbm_opt = lbmpy.LBMOptimisation(
symbolic_field=fields["pdfs" if streaming_pattern ==
"pull" else "pdfs_tmp"],
symbolic_temporary_field=fields["pdfs" if streaming_pattern ==
"push" else "pdfs_tmp"],
field_layout=fields['pdfs'].layout)
lbm_update_rule = lbmpy.create_lb_update_rule(
lbm_config=lbm_config,
lbm_optimisation=lbm_opt)
return lbm_update_rule.all_assignments


def generate_pack_info_vector_field_specifications(config, stencil, layout):
import collections
import itertools
field = ps.Field.create_generic(
"field",
3,
data_type_np[config.data_type.default_factory().c_name],
index_dimensions=1,
layout=layout,
index_shape=(3,)
)
q = len(stencil)
coord = itertools.product(*[(-1, 0, 1)] * 3)
if q == 19:
dirs = tuple((i, j, k) for i, j, k in coord if i**2 + j**2 + k**2 != 3)
else:
dirs = tuple((i, j, k) for i, j, k in coord)
spec = collections.defaultdict(set)
spec[dirs] = {field[0, 0, 0](i) for i in range(3)}
return spec


def generate_config(ctx, params):
return pystencils_walberla.utility.config_from_context(ctx, **params)

Expand Down
7 changes: 7 additions & 0 deletions src/core/integrate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ int System::System::integrate(int n_steps, int reuse_forces) {
propagation.lb_skipped_md_steps = 0;
propagation.ek_skipped_md_steps = 0;
lb.propagate();
lb.ghost_communication_vel();
ek.propagate();
}
} else if (lb_active) {
Expand All @@ -654,6 +655,9 @@ int System::System::integrate(int n_steps, int reuse_forces) {
#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
if (thermostat->lb and
(propagation.used_propagations & PropagationMode::TRANS_LB_TRACER)) {
if (lb_active) {
lb.ghost_communication_vel();
}
lb_tracers_propagate(*cell_structure, lb, time_step);
}
#endif
Expand All @@ -678,6 +682,9 @@ int System::System::integrate(int n_steps, int reuse_forces) {
}

} // for-loop over integration steps
if (lb_active) {
lb.ghost_communication();
}
lees_edwards->update_box_params(*box_geo, sim_time);
#ifdef CALIPER
CALI_CXX_MARK_LOOP_END(integration_loop);
Expand Down
3 changes: 3 additions & 0 deletions src/core/lb/LBNone.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ namespace LB {

struct LBNone {
void propagate() { throw NoLBActive{}; }
void ghost_communication() { throw NoLBActive{}; }
void ghost_communication_pdf() { throw NoLBActive{}; }
void ghost_communication_vel() { throw NoLBActive{}; }
double get_agrid() const { throw NoLBActive{}; }
double get_tau() const { throw NoLBActive{}; }
double get_kT() const { throw NoLBActive{}; }
Expand Down
10 changes: 10 additions & 0 deletions src/core/lb/LBWalberla.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ Utils::VectorXd<9> LBWalberla::get_pressure_tensor() const {

void LBWalberla::propagate() { lb_fluid->integrate(); }

void LBWalberla::ghost_communication() { lb_fluid->ghost_communication(); }

void LBWalberla::ghost_communication_pdf() {
lb_fluid->ghost_communication_vel();
}

void LBWalberla::ghost_communication_vel() {
lb_fluid->ghost_communication_vel();
}

void LBWalberla::lebc_sanity_checks(unsigned int shear_direction,
unsigned int shear_plane_normal) const {
lb_fluid->check_lebc(shear_direction, shear_plane_normal);
Expand Down
3 changes: 3 additions & 0 deletions src/core/lb/LBWalberla.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ struct LBWalberla {
std::vector<Utils::Vector3d>
get_velocities_at_pos(std::vector<Utils::Vector3d> const &pos);
void propagate();
void ghost_communication();
void ghost_communication_pdf();
void ghost_communication_vel();
void veto_time_step(double time_step) const;
void veto_kT(double kT) const;
void sanity_checks(System::System const &system) const;
Expand Down
15 changes: 15 additions & 0 deletions src/core/lb/Solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,21 @@ void Solver::propagate() {
std::visit([](auto &ptr) { ptr->propagate(); }, *impl->solver);
}

void Solver::ghost_communication() {
check_solver(impl);
std::visit([](auto &ptr) { ptr->ghost_communication(); }, *impl->solver);
}

void Solver::ghost_communication_pdf() {
check_solver(impl);
std::visit([](auto &ptr) { ptr->ghost_communication_pdf(); }, *impl->solver);
}

void Solver::ghost_communication_vel() {
check_solver(impl);
std::visit([](auto &ptr) { ptr->ghost_communication_vel(); }, *impl->solver);
}

void Solver::sanity_checks() const {
if (impl->solver) {
auto const &system = get_system();
Expand Down
15 changes: 15 additions & 0 deletions src/core/lb/Solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ struct Solver : public System::Leaf<Solver> {
*/
void propagate();

/**
* @brief Perform a full ghost communication.
*/
void ghost_communication();

/**
* @brief Perform a ghost communication of the PDF field.
*/
void ghost_communication_pdf();

/**
* @brief Perform a ghost communication of the velocity field.
*/
void ghost_communication_vel();

/**
* @brief Perform a full initialization of the lattice-Boltzmann system.
* All derived parameters and the fluid are reset to their default values.
Expand Down
1 change: 1 addition & 0 deletions src/core/lb/particle_coupling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ void System::System::lb_couple_particles() {
auto const ghost_particles = cell_structure->ghost_particles();
LB::ParticleCoupling coupling{*thermostat->lb, lb, *box_geo, *local_geo};
LB::CouplingBookkeeping bookkeeping{*cell_structure};
lb.ghost_communication_vel();
std::vector<Particle *> particles{};
for (auto const *particle_range : {&real_particles, &ghost_particles}) {
for (auto &p : *particle_range) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@ CylindricalLBFluxDensityProfileAtParticlePositions::evaluate(
local_folded_positions.reserve(local_particles.size());
local_flux_densities.reserve(local_particles.size());

auto const &system = System::get_system();
auto &system = System::get_system();
auto const &box_geo = *system.box_geo;
auto const &lb = system.lb;
auto &lb = system.lb;
auto const vel_conv = lb.get_lattice_speed();
lb.ghost_communication_pdf();
lb.ghost_communication_vel();

for (auto const &p : local_particles) {
auto const pos = box_geo.folded_position(traits.position(p));
Expand Down
3 changes: 2 additions & 1 deletion src/core/observables/CylindricalLBVelocityProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ std::vector<double> CylindricalLBVelocityProfile::operator()(
decltype(sampling_positions) local_positions{};
std::vector<vel_type> local_velocities{};

auto const &lb = System::get_system().lb;
auto &lb = System::get_system().lb;
auto const vel_conv = lb.get_lattice_speed();
lb.ghost_communication_vel();

for (auto const &pos : sampling_positions) {
if (auto const vel = lb.get_interpolated_velocity(pos)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@ std::vector<double> CylindricalLBVelocityProfileAtParticlePositions::evaluate(
local_folded_positions.reserve(local_particles.size());
local_velocities.reserve(local_particles.size());

auto const &system = System::get_system();
auto &system = System::get_system();
auto const &box_geo = *system.box_geo;
auto const &lb = system.lb;
auto &lb = system.lb;
auto const vel_conv = lb.get_lattice_speed();
lb.ghost_communication_vel();

for (auto const &p : local_particles) {
auto const pos = box_geo.folded_position(traits.position(p));
Expand Down
3 changes: 2 additions & 1 deletion src/core/observables/LBVelocityProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ LBVelocityProfile::operator()(boost::mpi::communicator const &comm) const {
decltype(sampling_positions) local_positions{};
std::vector<vel_type> local_velocities{};

auto const &lb = System::get_system().lb;
auto &lb = System::get_system().lb;
auto const vel_conv = lb.get_lattice_speed();
lb.ghost_communication_vel();

for (auto const &pos : sampling_positions) {
if (auto const vel = lb.get_interpolated_velocity(pos)) {
Expand Down
3 changes: 3 additions & 0 deletions src/core/unit_tests/lb_particle_coupling_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,9 @@ BOOST_AUTO_TEST_CASE(lb_exceptions) {
BOOST_CHECK_THROW(lb.lebc_sanity_checks(0u, 1u), NoLBActive);
BOOST_CHECK_THROW(lb.propagate(), NoLBActive);
BOOST_CHECK_THROW(lb.update_collision_model(), NoLBActive);
BOOST_CHECK_THROW(lb.ghost_communication(), NoLBActive);
BOOST_CHECK_THROW(lb.ghost_communication_pdf(), NoLBActive);
BOOST_CHECK_THROW(lb.ghost_communication_vel(), NoLBActive);
BOOST_CHECK_THROW(lb.on_cell_structure_change(), NoLBActive);
BOOST_CHECK_THROW(lb.on_boxl_change(), NoLBActive);
BOOST_CHECK_THROW(lb.on_node_grid_change(), NoLBActive);
Expand Down
1 change: 1 addition & 0 deletions src/script_interface/walberla/LBFluid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ void LBFluid::do_construct(VariantMap const &params) {
::LB::LBWalberla::update_collision_model(*m_instance, *m_lb_params, lb_kT,
static_cast<unsigned int>(seed));
m_instance->set_external_force(lb_ext_f);
m_instance->ghost_communication();
for (auto &vtk : m_vtk_writers) {
vtk->attach_to_lattice(m_instance, get_latice_to_md_units_conversion());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,23 @@ class LBWalberlaBase : public LatticeModel {
public:
~LBWalberlaBase() override = default;

/** @brief Integrate LB for one time step. */
/**
* @brief Integrate LB for one time step.
* The ghost layer may be out-of-date after integration.
* Call @ref ghost_communication() to refresh them before
* calling any getter function that reads from the halo region.
*/
virtual void integrate() = 0;

/** @brief Perform ghost communication of PDF and applied forces. */
/** @brief Perform a full ghost communication. */
virtual void ghost_communication() = 0;

/** @brief Perform a ghost communication of the PDF field. */
virtual void ghost_communication_pdf() = 0;

/** @brief Perform a ghost communication of the velocity field. */
virtual void ghost_communication_vel() = 0;

/** @brief Number of discretized velocities in the PDF. */
virtual std::size_t stencil_size() const noexcept = 0;

Expand Down
Loading

0 comments on commit 03033c4

Please sign in to comment.