diff --git a/src/outputs/histogram.cpp b/src/outputs/histogram.cpp index 88c6ced2ecd3..366856f71298 100644 --- a/src/outputs/histogram.cpp +++ b/src/outputs/histogram.cpp @@ -207,8 +207,10 @@ Histogram::Histogram(ParameterInput *pin, const std::string &block_name, const auto nybins = ndim_ == 2 ? y_edges_.extent_int(0) - 1 : 1; result_ = ParArray2D(prefix + "result", nybins, nxbins); - scatter_result = - Kokkos::Experimental::ScatterView(result_.KokkosView()); + scatter_result = Kokkos::Experimental::ScatterView< + Real **, LayoutWrapper, parthenon::DevExecSpace, Kokkos::Experimental::ScatterSum, + Kokkos::Experimental::ScatterDuplicated, Kokkos::Experimental::ScatterNonAtomic>( + result.KokkosView()); accumulate_ = pin->GetOrAddBoolean(block_name, prefix + "accumulate", false); weight_by_vol_ = pin->GetOrAddBoolean(block_name, prefix + "weight_by_volume", false); diff --git a/src/outputs/outputs.hpp b/src/outputs/outputs.hpp index 4fd64236a19c..751b29fb5539 100644 --- a/src/outputs/outputs.hpp +++ b/src/outputs/outputs.hpp @@ -250,9 +250,14 @@ struct Histogram { int weight_var_component_; ParArray2D result_; // resulting histogram - // temp view for histogram reduction for better performance (switches - // between atomics and data duplication depending on the platform) - Kokkos::Experimental::ScatterView scatter_result; + // temp view for histogram reduction for better performance. + // In theory, switches between atomics and data duplication depending on the platform. + // In practice, the defaults turned out to be extremely slow on MI250X so we now use the + // hardcoded non-atomic version (for now). + Kokkos::Experimental::ScatterView< + Real **, LayoutWrapper, parthenon::DevExecSpace, Kokkos::Experimental::ScatterSum, + Kokkos::Experimental::ScatterDuplicated, Kokkos::Experimental::ScatterNonAtomic> + scatter_result; Histogram(ParameterInput *pin, const std::string &block_name, const std::string &name); void CalcHist(Mesh *pm); };