Skip to content

Commit

Permalink
Update gpu elixir and async array synchronization for sycl (AMReX-Cod…
Browse files Browse the repository at this point in the history
…es#3498)

Updated the way the GpuElixir and GpuAsyncArray synchronize. Instead of
a Gpu::streamSynchronize(); it now uses a host task by default since it
was implemented in SYCL 2020. Removed the codeplay-extension version.

In addition, the option SYCL_PARALLEL_LINK_JOBS was added to dpcpp.mak.
Setting it to a positive integer <n> then allows linking the SYCL code
with <n> threads, speeding up the whole compilation process, especially
when using AOT in SYCL.
  • Loading branch information
cbauinge authored Aug 21, 2023
1 parent dda0d97 commit 07c4332
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 30 deletions.
9 changes: 1 addition & 8 deletions Src/Base/AMReX_GpuAsyncArray.H
Original file line number Diff line number Diff line change
Expand Up @@ -86,26 +86,19 @@ public:
amrex_asyncarray_delete, p));
#endif
#elif defined(AMREX_USE_SYCL)
#ifdef AMREX_USE_CODEPLAY_HOST_TASK
auto* pd = d_data;
auto* ph = h_data;
auto& q = *(Gpu::gpuStream().queue);
try {
q.submit([&] (sycl::handler& h) {
h.codeplay_host_task([=] () {
h.host_task([=] () {
The_Arena()->free(pd);
The_Pinned_Arena()->free(ph);
});
});
} catch (sycl::exception const& ex) {
amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
}
#else
// xxxxx SYCL todo
Gpu::streamSynchronize();
The_Arena()->free(d_data);
The_Pinned_Arena()->free(h_data);
#endif
#endif
}
}
Expand Down
32 changes: 12 additions & 20 deletions Src/Base/AMReX_GpuElixir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,27 +48,19 @@ Elixir::clear () noexcept
amrex_elixir_delete, (void*)p));
#endif
#elif defined(AMREX_USE_SYCL)
#ifdef AMREX_USE_CODEPLAY_HOST_TASK
auto lpa = std::move(m_pa);
auto& q = *(Gpu::gpuStream().queue);
try {
q.submit([&] (sycl::handler& h) {
h.codeplay_host_task([=] () {
for (auto const& pa : lpa) {
pa.second->free(pa.first);
}
});
auto lpa = std::move(m_pa);
auto& q = *(Gpu::gpuStream().queue);
try {
q.submit([&] (sycl::handler& h) {
h.host_task([=] () {
for (auto const& pa : lpa) {
pa.second->free(pa.first);
}
});
} catch (sycl::exception const& ex) {
amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
}
#else
// xxxxx SYCL todo
Gpu::streamSynchronize();
for (auto const& pa : m_pa) {
pa.second->free(pa.first);
}
#endif
});
} catch (sycl::exception const& ex) {
amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
}
#endif
}
}
Expand Down
8 changes: 6 additions & 2 deletions Tools/GNUMake/comps/dpcpp.mak
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ ifeq ($(DEBUG),TRUE)

else

CXXFLAGS += -g1 -O3 # // xxxx SYCL: todo -g in beta6 causes a lot of warning messages
CFLAGS += -g1 -O3 # and makes linking much slower
CXXFLAGS += -gline-tables-only -fdebug-info-for-profiling -O3 # // xxxx SYCL: todo -g in beta6 causes a lot of warning messages
CFLAGS += -gline-tables-only -fdebug-info-for-profiling -O3 # and makes linking much slower
FFLAGS += -g -O3
F90FLAGS += -g -O3

Expand Down Expand Up @@ -125,6 +125,10 @@ endif

LDFLAGS += -fsycl-device-lib=libc,libm-fp32,libm-fp64

ifdef SYCL_PARALLEL_LINK_JOBS
LDFLAGS += -fsycl-max-parallel-link-jobs=$(SYCL_PARALLEL_LINK_JOBS)
endif

ifeq ($(SYCL_AOT),TRUE)
ifndef AMREX_INTEL_ARCH
ifdef INTEL_ARCH
Expand Down

0 comments on commit 07c4332

Please sign in to comment.