From 744b2a371a54cd0889c2f226a24d51381c5afdf1 Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Mon, 6 Nov 2023 15:24:36 -0600 Subject: [PATCH 01/26] initial support for hpx --- configure.ac | 15 ++++++ threads/Makefile.am | 28 ++++++++-- threads/hpx.cpp | 128 ++++++++++++++++++++++++++++++++++++++++++++ threads/threads.h | 19 ++++++- 4 files changed, 186 insertions(+), 4 deletions(-) create mode 100644 threads/hpx.cpp diff --git a/configure.ac b/configure.ac index b7fd7f1e9..604b8011b 100644 --- a/configure.ac +++ b/configure.ac @@ -296,6 +296,7 @@ AC_LIBTOOL_WIN32_DLL AC_PROG_LIBTOOL AC_PROG_RANLIB AC_PROG_CPP +AC_PROG_CXX AC_CHECK_PROG(OCAMLBUILD, ocamlbuild, ocamlbuild) @@ -677,6 +678,19 @@ if test "$enable_openmp" = "yes"; then AX_OPENMP([], [AC_MSG_ERROR([don't know how to enable OpenMP])]) fi +AC_ARG_ENABLE(hpx, [AC_HELP_STRING([--enable-hpx],[use HPX for parallelism])], enable_hpx=$enableval, enable_hpx=no) + +if test "$enable_hpx" = "yes"; then + AC_SUBST(HPX_LIBS) + AC_SUBST(HPX_CFLAGS) + PKG_CHECK_EXISTS([hpx_component],[],[ + AC_MSG_ERROR([required library HPX not found, check PKG_CONFIG_PATH]) + ]) + PKG_CHECK_MODULES([HPX], [hpx_component]) + AC_SUBST(HPX_CXXFLAGS, ["$HPX_CFLAGS"]) + AC_DEFINE(HAVE_HPX,1,[Define to enable HPX]) +fi + AC_ARG_ENABLE(threads, [AC_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no) if test "$enable_threads" = "yes"; then @@ -721,6 +735,7 @@ fi AC_SUBST(THREADLIBS) AM_CONDITIONAL(THREADS, test "$enable_threads" = "yes") AM_CONDITIONAL(OPENMP, test "$enable_openmp" = "yes") +AM_CONDITIONAL(HPX, test "$enable_hpx" = "yes") AM_CONDITIONAL(SMP, test "$enable_threads" = "yes" -o "$enable_openmp" = "yes") AM_CONDITIONAL(COMBINED_THREADS, test x"$with_combined_threads" = xyes) diff --git a/threads/Makefile.am b/threads/Makefile.am index f5c170a9a..a02dfa114 100644 --- a/threads/Makefile.am +++ b/threads/Makefile.am @@ -7,14 +7,22 @@ else FFTWOMPLIB = endif +if HPX +FFTWHPXLIB = libfftw3@PREC_SUFFIX@_hpx.la +else +FFTWHPXLIB = +endif + if THREADS if COMBINED_THREADS noinst_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la -else +endif +if OPENMP lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la $(FFTWOMPLIB) endif -else -lib_LTLIBRARIES = $(FFTWOMPLIB) +if HPX +lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la $(FFTWHPXLIB) +endif endif libfftw3@PREC_SUFFIX@_threads_la_SOURCES = api.c conf.c threads.c \ @@ -26,6 +34,7 @@ if !COMBINED_THREADS libfftw3@PREC_SUFFIX@_threads_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la endif +if OPENMP libfftw3@PREC_SUFFIX@_omp_la_SOURCES = api.c conf.c openmp.c \ threads.h dft-vrank-geq1.c ct.c rdft-vrank-geq1.c hc2hc.c \ vrank-geq1-rdft2.c f77api.c f77funcs.h @@ -34,3 +43,16 @@ libfftw3@PREC_SUFFIX@_omp_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ if !COMBINED_THREADS libfftw3@PREC_SUFFIX@_omp_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la endif +endif + +if HPX +libfftw3@PREC_SUFFIX@_hpx_la_SOURCES = conf.c hpx.cpp \ +threads.h dft-vrank-geq1.c rdft-vrank-geq1.c hc2hc.c \ +vrank-geq1-rdft2.c f77api.c f77funcs.h +libfftw3@PREC_SUFFIX@_hpx_la_CXXFLAGS = $(HPX_CXXFLAGS) +libfftw3@PREC_SUFFIX@_hpx_la_CFLAGS = $(AM_CFLAGS) +libfftw3@PREC_SUFFIX@_hpx_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +if !COMBINED_THREADS +libfftw3@PREC_SUFFIX@_hpx_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la +endif +endif diff --git a/threads/hpx.cpp b/threads/hpx.cpp new file mode 100644 index 000000000..37411e9e7 --- /dev/null +++ b/threads/hpx.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * Copyright (c) 2023 Tactical Computing Labs, LLC (Christopher Taylor) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* openmp.c: thread spawning via HPX */ + +#include "threads/threads.h" +#include "ct.c" +#include "api.c" + +#if !defined(HAVE_HPX) + +#error HPX enabled but not using HPX + +#else + +#include +#include +#include + +#endif + +extern "C" { + +int X(ithreads_init)(void) +{ + return hpx::start( nullptr, 0, nullptr ); +} + +/* Distribute a loop from 0 to loopmax-1 over nthreads threads. + proc(d) is called to execute a block of iterations from d->min + to d->max-1. d->thr_num indicate the number of the thread + that is executing proc (from 0 to nthreads-1), and d->data is + the same as the data parameter passed to X(spawn_loop). + + This function returns only after all the threads have completed. */ +void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) +{ + int block_size; + spawn_data d; + int i; + + A(loopmax >= 0); + A(nthr > 0); + A(proc); + + if (!loopmax) return; + + /* Choose the block size and number of threads in order to (1) + minimize the critical path and (2) use the fewest threads that + achieve the same critical path (to minimize overhead). + e.g. if loopmax is 5 and nthr is 4, we should use only 3 + threads with block sizes of 2, 2, and 1. */ + block_size = (loopmax + nthr - 1) / nthr; + nthr = (loopmax + block_size - 1) / block_size; + + if (X(spawnloop_callback)) { /* user-defined spawnloop backend */ + spawn_data *sdata; + STACK_MALLOC(spawn_data *, sdata, sizeof(spawn_data) * nthr); + for (i = 0; i < nthr; ++i) { + spawn_data *d = &sdata[i]; + d->max = (d->min = i * block_size) + block_size; + if (d->max > loopmax) + d->max = loopmax; + d->thr_num = i; + d->data = data; + } + X(spawnloop_callback)(proc, sdata, sizeof(spawn_data), nthr, X(spawnloop_callback_data)); + STACK_FREE(sdata); + return; + } + + hpx::threads::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc]() + { + std::vector> futures; + futures.reserve(nthr); + std::vector sdata; + sdata.reserve(nthr); + + for (int tid = 0; tid < nthr; ++tid) + { + sdata.push_back(d); + futures.push_back(hpx::async([tid, &sdata, &data, &proc, block_size, loopmax]() + { + sdata[tid].max = (sdata[tid].min = tid * block_size) + block_size; + if (sdata[tid].max > loopmax) { + sdata[tid].max = loopmax; + } + sdata[tid].thr_num = tid; + sdata[tid].data = data; + proc(&sdata[tid]); + })); + } + + hpx::wait_all(futures); + }); +} + +void X(threads_cleanup)(void) +{ + hpx::post([]() { hpx::finalize(); }); + hpx::stop(); +} + +/* FIXME [Matteo Frigo 2015-05-25] What does "thread-safe" + mean for openmp? */ +void X(threads_register_planner_hooks)(void) +{ +} + +} // end extern "C" diff --git a/threads/threads.h b/threads/threads.h index e48db3fbc..738a22d3d 100644 --- a/threads/threads.h +++ b/threads/threads.h @@ -17,6 +17,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ +#ifdef __cplusplus +#if defined(HAVE_HPX) +#pragma once +#endif +#endif #ifndef __THREADS_H__ #define __THREADS_H__ @@ -32,10 +37,23 @@ typedef struct { typedef void *(*spawn_function) (spawn_data *); +#ifdef __cplusplus +#if defined(HAVE_HPX) +extern "C" { +#endif +#endif + void X(spawn_loop)(int loopmax, int nthreads, spawn_function proc, void *data); int X(ithreads_init)(void); void X(threads_cleanup)(void); +void X(threads_register_planner_hooks)(void); + +#ifdef __cplusplus +#if defined(HAVE_HPX) +} // end extern "C" +#endif +#endif typedef void (*spawnloop_function)(spawn_function, spawn_data *, size_t, int, void *); extern spawnloop_function X(spawnloop_callback); @@ -55,6 +73,5 @@ hc2hc_solver *X(mksolver_hc2hc_threads)(size_t size, INT r, hc2hc_mkinferior mkc void X(threads_conf_standard)(planner *p); void X(threads_register_hooks)(void); void X(threads_unregister_hooks)(void); -void X(threads_register_planner_hooks)(void); #endif /* __THREADS_H__ */ From 78216afe9f5aa290791835baebea83a0c6730eee Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Tue, 7 Nov 2023 14:25:54 -0500 Subject: [PATCH 02/26] adding hpx to cmake --- CMakeLists.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 16408517d..29e6c9afb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ option (BUILD_SHARED_LIBS "Build shared libraries" ON) option (BUILD_TESTS "Build tests" ON) option (ENABLE_OPENMP "Use OpenMP for multithreading" OFF) +option (ENABLE_HPX "Use HPX for multithreading" OFF) option (ENABLE_THREADS "Use pthread for multithreading" OFF) option (WITH_COMBINED_THREADS "Merge thread library" OFF) @@ -122,6 +123,10 @@ if (Threads_FOUND) set (HAVE_THREADS TRUE) endif () +if(ENABLE_OPENMP AND ENABLE_HPX) + message(FATAL "OpenMP and HPX cannot be enabled at the same time") +endif () + if (ENABLE_OPENMP) find_package (OpenMP) endif () @@ -129,6 +134,13 @@ if (OPENMP_FOUND) set (HAVE_OPENMP TRUE) endif () +if (ENABLE_HPX) + find_package (HPX) +endif () +if (HPX_FOUND) + set (HAVE_HPX TRUE) +endif () + include (CheckCCompilerFlag) if (ENABLE_SSE) @@ -257,6 +269,7 @@ set(fftw_par_SOURCE set (fftw_threads_SOURCE ${fftw_par_SOURCE} threads/threads.c) set (fftw_omp_SOURCE ${fftw_par_SOURCE} threads/openmp.c) +set (fftw_hpx_SOURCE ${fftw_par_SOURCE} threads/hpx.cpp) include_directories (.) @@ -365,6 +378,15 @@ if (OPENMP_FOUND) target_compile_options (${fftw3_lib}_omp PRIVATE ${OpenMP_C_FLAGS}) endif () +if (HPX_FOUND) + add_library (${fftw3_lib}_hpx ${fftw_hpx_SOURCE}) + target_include_directories (${fftw3_lib}_hpx INTERFACE $) + target_link_libraries (${fftw3_lib}_hpx ${fftw3_lib}) + target_link_libraries (${fftw3_lib}_hpx ${CMAKE_THREAD_LIBS_INIT}) + list (APPEND subtargets ${fftw3_lib}_hpx) + target_compile_options (${fftw3_lib}_hpx PRIVATE ${OpenMP_C_FLAGS}) +endif () + foreach(subtarget ${subtargets}) set_target_properties (${subtarget} PROPERTIES SOVERSION 3.6.9 VERSION 3) install (TARGETS ${subtarget} From a6464f76453e5b11e9ae75893b14c6987b07c424 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Tue, 7 Nov 2023 14:38:47 -0500 Subject: [PATCH 03/26] fixed cmake issue --- CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 29e6c9afb..adbc01d63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -379,12 +379,13 @@ if (OPENMP_FOUND) endif () if (HPX_FOUND) - add_library (${fftw3_lib}_hpx ${fftw_hpx_SOURCE}) + add_library (${fftw3_lib}_hpx SHARED ${fftw_hpx_SOURCE}) target_include_directories (${fftw3_lib}_hpx INTERFACE $) + target_link_libraries (${fftw3_lib}_hpx PUBLIC HPX::hpx) target_link_libraries (${fftw3_lib}_hpx ${fftw3_lib}) target_link_libraries (${fftw3_lib}_hpx ${CMAKE_THREAD_LIBS_INIT}) list (APPEND subtargets ${fftw3_lib}_hpx) - target_compile_options (${fftw3_lib}_hpx PRIVATE ${OpenMP_C_FLAGS}) + target_compile_options (${fftw3_lib}_hpx PRIVATE HPX::hpx) endif () foreach(subtarget ${subtargets}) From 7b8e590f6575fb7e266aac8fb46efb182ec42092 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Tue, 5 Dec 2023 11:47:45 -0600 Subject: [PATCH 04/26] corrections for linking/loading --- threads/Makefile.am | 4 ++-- threads/hpx.cpp | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/threads/Makefile.am b/threads/Makefile.am index a02dfa114..a7497c169 100644 --- a/threads/Makefile.am +++ b/threads/Makefile.am @@ -46,8 +46,8 @@ endif endif if HPX -libfftw3@PREC_SUFFIX@_hpx_la_SOURCES = conf.c hpx.cpp \ -threads.h dft-vrank-geq1.c rdft-vrank-geq1.c hc2hc.c \ +libfftw3@PREC_SUFFIX@_hpx_la_SOURCES = api.c conf.c hpx.cpp \ +threads.h dft-vrank-geq1.c ct.c rdft-vrank-geq1.c hc2hc.c \ vrank-geq1-rdft2.c f77api.c f77funcs.h libfftw3@PREC_SUFFIX@_hpx_la_CXXFLAGS = $(HPX_CXXFLAGS) libfftw3@PREC_SUFFIX@_hpx_la_CFLAGS = $(AM_CFLAGS) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index 37411e9e7..8cb2caa31 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -22,8 +22,6 @@ /* openmp.c: thread spawning via HPX */ #include "threads/threads.h" -#include "ct.c" -#include "api.c" #if !defined(HAVE_HPX) From e0fe01f5327a953988809d47614414163a717076 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Tue, 5 Dec 2023 16:15:24 -0600 Subject: [PATCH 05/26] fixed variable spelling --- mpi/Makefile.am | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mpi/Makefile.am b/mpi/Makefile.am index 8e43fac5b..c87a393ac 100644 --- a/mpi/Makefile.am +++ b/mpi/Makefile.am @@ -32,11 +32,14 @@ mpi_bench_CFLAGS = $(PTHREAD_CFLAGS) if !COMBINED_THREADS LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_threads.la endif -else +endif if OPENMP mpi_bench_CFLAGS = $(OPENMP_CFLAGS) LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_omp.la endif +if HPX +mpi_bench_CFLAGS = $(HPX_CXXFLAGS) +LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_hpx.la endif mpi_bench_SOURCES = mpi-bench.c $(top_srcdir)/tests/fftw-bench.c $(top_srcdir)/tests/hook.c From cf33f607dd445703c382e8e609540c77c7de21b1 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Tue, 5 Dec 2023 16:16:01 -0600 Subject: [PATCH 06/26] added environment variable --- threads/hpx.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index 8cb2caa31..57a31a29e 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -29,6 +29,7 @@ #else +#include #include #include #include @@ -39,7 +40,14 @@ extern "C" { int X(ithreads_init)(void) { - return hpx::start( nullptr, 0, nullptr ); + const char * nthreads_cstr = std::get("FFTW3_HPX_NTHREADS"); + if(nthreads_cstr == nullptr) { + return hpx::start( nullptr, 0, nullptr ); + } + + std::string count(nthreads_cstr); + std::string thread_arg = "--hpx:threads=" + count; + return hpx::start( nullptr, 1, thread_arg.c_str() ); } /* Distribute a loop from 0 to loopmax-1 over nthreads threads. From 0e5cb7ef92408e28c4dbc75b58c23620e3687c19 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Tue, 5 Dec 2023 17:51:25 -0600 Subject: [PATCH 07/26] added missing hpx libraries to mpi-bench --- mpi/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpi/Makefile.am b/mpi/Makefile.am index c87a393ac..2a5ddd255 100644 --- a/mpi/Makefile.am +++ b/mpi/Makefile.am @@ -39,7 +39,7 @@ LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_omp.la endif if HPX mpi_bench_CFLAGS = $(HPX_CXXFLAGS) -LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_hpx.la +LIBFFTWTHREADS = $(HPX_LIBS) $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_hpx.la endif mpi_bench_SOURCES = mpi-bench.c $(top_srcdir)/tests/fftw-bench.c $(top_srcdir)/tests/hook.c From c07076b0538d2738cbb36263b45e3870d80a857d Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Wed, 6 Dec 2023 15:32:59 -0600 Subject: [PATCH 08/26] fixed typing error --- threads/hpx.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index 57a31a29e..9552a59c6 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -40,7 +40,7 @@ extern "C" { int X(ithreads_init)(void) { - const char * nthreads_cstr = std::get("FFTW3_HPX_NTHREADS"); + const char * nthreads_cstr = std::getenv("FFTW3_HPX_NTHREADS"); if(nthreads_cstr == nullptr) { return hpx::start( nullptr, 0, nullptr ); } From 524d0963af5a7b21b80fca21193e48f3f78e2d5e Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Fri, 8 Dec 2023 13:24:38 -0500 Subject: [PATCH 09/26] fixed memory allocation performance issue --- threads/hpx.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index 9552a59c6..f91cd587f 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -93,16 +93,14 @@ void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) return; } - hpx::threads::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc]() + std::vector> futures; + futures.reserve(nthr); + std::vector sdata(nthr, d); + + hpx::threads::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc, &futures, &sdata]() { - std::vector> futures; - futures.reserve(nthr); - std::vector sdata; - sdata.reserve(nthr); - for (int tid = 0; tid < nthr; ++tid) { - sdata.push_back(d); futures.push_back(hpx::async([tid, &sdata, &data, &proc, block_size, loopmax]() { sdata[tid].max = (sdata[tid].min = tid * block_size) + block_size; From 8d7a58e48e36d7db7ef39c8b443c92f4b3426573 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Fri, 8 Dec 2023 13:27:50 -0500 Subject: [PATCH 10/26] fixed memory allocation performance issue --- threads/hpx.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index f91cd587f..bda31e0a2 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -113,8 +113,9 @@ void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) })); } - hpx::wait_all(futures); }); + + hpx::wait_all(futures); } void X(threads_cleanup)(void) From 5629bbca056fdd63184ba0099365b4159a111b0e Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Fri, 8 Dec 2023 13:34:02 -0500 Subject: [PATCH 11/26] fixed possible thread scheduling issue --- threads/hpx.cpp | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index bda31e0a2..3720628fc 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -97,25 +97,28 @@ void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) futures.reserve(nthr); std::vector sdata(nthr, d); - hpx::threads::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc, &futures, &sdata]() - { - for (int tid = 0; tid < nthr; ++tid) + hpx::future fut = + hpx::threads::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc, &futures, &sdata]() { - futures.push_back(hpx::async([tid, &sdata, &data, &proc, block_size, loopmax]() - { - sdata[tid].max = (sdata[tid].min = tid * block_size) + block_size; - if (sdata[tid].max > loopmax) { - sdata[tid].max = loopmax; - } - sdata[tid].thr_num = tid; - sdata[tid].data = data; - proc(&sdata[tid]); - })); - } - - }); - - hpx::wait_all(futures); + for (int tid = 0; tid < nthr; ++tid) + { + futures.push_back(hpx::async([tid, &sdata, &data, &proc, block_size, loopmax]() + { + sdata[tid].max = (sdata[tid].min = tid * block_size) + block_size; + if (sdata[tid].max > loopmax) { + sdata[tid].max = loopmax; + } + sdata[tid].thr_num = tid; + sdata[tid].data = data; + proc(&sdata[tid]); + })); + } + + hpx::wait_all(futures); + return hpx::make_ready_future(); + }); + + fut.wait(); } void X(threads_cleanup)(void) From ead1a6ede3592e2e0676a702684ba3500cdab033 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Fri, 8 Dec 2023 21:37:41 -0500 Subject: [PATCH 12/26] alternative to pkg-config added --- configure.ac | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 604b8011b..6fe740a18 100644 --- a/configure.ac +++ b/configure.ac @@ -683,12 +683,39 @@ AC_ARG_ENABLE(hpx, [AC_HELP_STRING([--enable-hpx],[use HPX for parallelism])], e if test "$enable_hpx" = "yes"; then AC_SUBST(HPX_LIBS) AC_SUBST(HPX_CFLAGS) - PKG_CHECK_EXISTS([hpx_component],[],[ - AC_MSG_ERROR([required library HPX not found, check PKG_CONFIG_PATH]) - ]) - PKG_CHECK_MODULES([HPX], [hpx_component]) - AC_SUBST(HPX_CXXFLAGS, ["$HPX_CFLAGS"]) - AC_DEFINE(HAVE_HPX,1,[Define to enable HPX]) + + AC_CHECK_PROG(HAS_PKGCONFIG, pkg-config, yes, no, [], []) + + if test "$HAS_PKGCONFIG" = "yes"; then + PKG_CHECK_EXISTS([hpx_component, hpx_component_relwithdebinfo],[],[ + AC_MSG_ERROR([required library HPX not found, check PKG_CONFIG_PATH]) + ]) + PKG_CHECK_MODULES([HPX], [hpx_component]) + AC_SUBST(HPX_CXXFLAGS, ["$HPX_CFLAGS"]) + AC_DEFINE(HAVE_HPX,1,[Define to enable HPX]) + else + AC_LANG_PUSH([C++]) + + AC_SUBST(HAVE_HPX_HPP) + AC_CHECK_HEADER(hpx.hpp,[HAVE_HPX_HPP=1],[HAVE_HPX_HPP=0]) + if test "$HAVE_HPX_HPP" = "0"; then + AC_MSG_ERROR([required library HPX not found, check CXXFLAGS]) + fi + + AC_SUBST(HAVE_HPX) + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([#include ], + [hpx::init(); hpx::finalize();])], + [HAVE_HPX=1], + [HAVE_HPX=0]) + if test "$HAVE_HPX_HPP" = "0"; then + AC_MSG_ERROR([required library HPX not found, check LD_LIBRARY_PATH]) + fi + + HPX_LIBS="-lhpx -lhpx_core" + AC_LANG_POP([C++]) + fi + fi AC_ARG_ENABLE(threads, [AC_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no) From 3f42610e392f81911361f7dffe770904d859dd9c Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Fri, 8 Dec 2023 22:11:02 -0500 Subject: [PATCH 13/26] alternative to pkg-config added --- configure.ac | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index 6fe740a18..e1c9afea8 100644 --- a/configure.ac +++ b/configure.ac @@ -690,25 +690,26 @@ if test "$enable_hpx" = "yes"; then PKG_CHECK_EXISTS([hpx_component, hpx_component_relwithdebinfo],[],[ AC_MSG_ERROR([required library HPX not found, check PKG_CONFIG_PATH]) ]) - PKG_CHECK_MODULES([HPX], [hpx_component]) + PKG_CHECK_MODULES([HPX], [hpx_component]) AC_SUBST(HPX_CXXFLAGS, ["$HPX_CFLAGS"]) AC_DEFINE(HAVE_HPX,1,[Define to enable HPX]) else AC_LANG_PUSH([C++]) AC_SUBST(HAVE_HPX_HPP) - AC_CHECK_HEADER(hpx.hpp,[HAVE_HPX_HPP=1],[HAVE_HPX_HPP=0]) + AC_CHECK_HEADER(hpx/hpx.hpp,[HAVE_HPX_HPP=1],[HAVE_HPX_HPP=0]) if test "$HAVE_HPX_HPP" = "0"; then - AC_MSG_ERROR([required library HPX not found, check CXXFLAGS]) + AC_MSG_ERROR([required HPX not found, check CXXFLAGS]) fi AC_SUBST(HAVE_HPX) AC_LINK_IFELSE( - [AC_LANG_PROGRAM([#include ], + [AC_LANG_PROGRAM([#include ], [hpx::init(); hpx::finalize();])], [HAVE_HPX=1], [HAVE_HPX=0]) - if test "$HAVE_HPX_HPP" = "0"; then + + if test "$HAVE_HPX" = "0"; then AC_MSG_ERROR([required library HPX not found, check LD_LIBRARY_PATH]) fi From 4720423a41333dbb0a13fbf2d18d5069ac5cf995 Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Mon, 11 Dec 2023 09:13:49 -0600 Subject: [PATCH 14/26] fixed bug with pkgconfig detection --- configure.ac | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index e1c9afea8..6a9dcaf55 100644 --- a/configure.ac +++ b/configure.ac @@ -687,10 +687,19 @@ if test "$enable_hpx" = "yes"; then AC_CHECK_PROG(HAS_PKGCONFIG, pkg-config, yes, no, [], []) if test "$HAS_PKGCONFIG" = "yes"; then - PKG_CHECK_EXISTS([hpx_component, hpx_component_relwithdebinfo],[],[ - AC_MSG_ERROR([required library HPX not found, check PKG_CONFIG_PATH]) - ]) - PKG_CHECK_MODULES([HPX], [hpx_component]) + AC_SUBST(hpxcompfound) + + PKG_CHECK_EXISTS([hpx_component],[hpxcompfound=yes],[hpxcompfound=no]) + + if test "$hpxcompfound" = "no"; then + PKG_CHECK_EXISTS([hpx_component_relwithdebinfo],[],[ + AC_MSG_ERROR([required library HPX not found, check PKG_CONFIG_PATH]) + ]) + PKG_CHECK_MODULES([HPX], [hpx_component_relwithdebinfo]) + else + PKG_CHECK_MODULES([HPX], [hpx_component]) + fi + AC_SUBST(HPX_CXXFLAGS, ["$HPX_CFLAGS"]) AC_DEFINE(HAVE_HPX,1,[Define to enable HPX]) else From a4c93b931aeaea91df5fca967b64ed773e219755 Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Mon, 11 Dec 2023 09:27:29 -0600 Subject: [PATCH 15/26] updated hpx calls --- threads/hpx.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index 3720628fc..d024c1a0b 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -47,7 +47,8 @@ int X(ithreads_init)(void) std::string count(nthreads_cstr); std::string thread_arg = "--hpx:threads=" + count; - return hpx::start( nullptr, 1, thread_arg.c_str() ); + char * args[] = { const_cast(thread_arg.c_str()) }; + return hpx::start( nullptr, 1, args ); } /* Distribute a loop from 0 to loopmax-1 over nthreads threads. @@ -98,7 +99,7 @@ void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) std::vector sdata(nthr, d); hpx::future fut = - hpx::threads::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc, &futures, &sdata]() + hpx::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc, &futures, &sdata]() -> hpx::future { for (int tid = 0; tid < nthr; ++tid) { @@ -115,7 +116,7 @@ void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) } hpx::wait_all(futures); - return hpx::make_ready_future(); + return hpx::make_ready_future(); }); fut.wait(); From 8a77963c7acb76a3ee94404bcce6f97e0fcdabf0 Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Mon, 11 Dec 2023 09:29:45 -0600 Subject: [PATCH 16/26] fixed runtime initialization --- threads/hpx.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index d024c1a0b..78a9aea53 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -47,8 +47,9 @@ int X(ithreads_init)(void) std::string count(nthreads_cstr); std::string thread_arg = "--hpx:threads=" + count; - char * args[] = { const_cast(thread_arg.c_str()) }; - return hpx::start( nullptr, 1, args ); + hpx::init_params params; + params.cfg = { thread_arg }; + return hpx::start(nullptr, 0, nullptr, params); } /* Distribute a loop from 0 to loopmax-1 over nthreads threads. From 36aa011d76217a94c7f02c57b209e91bd41da9ea Mon Sep 17 00:00:00 2001 From: "ct.clmsn" Date: Mon, 11 Dec 2023 10:27:28 -0600 Subject: [PATCH 17/26] rollback to hpx v1.9 call to run_as* --- threads/hpx.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index 78a9aea53..307eab675 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -100,7 +100,7 @@ void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) std::vector sdata(nthr, d); hpx::future fut = - hpx::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc, &futures, &sdata]() -> hpx::future + hpx::threads::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc, &futures, &sdata]() -> hpx::future { for (int tid = 0; tid < nthr; ++tid) { From 6a9a26fce501f2798807f35fd1deee492134cf30 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Wed, 27 Dec 2023 16:26:14 -0500 Subject: [PATCH 18/26] added threading hooks for planner --- threads/hpx.cpp | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index 307eab675..adc0c9aad 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -129,10 +129,28 @@ void X(threads_cleanup)(void) hpx::stop(); } -/* FIXME [Matteo Frigo 2015-05-25] What does "thread-safe" - mean for openmp? */ +static hpx::mutex planner_mutex; +static hpx::mutex install_planner_hooks_mutex; +static std::unique_lock planner_lock = std::unique_lock(planner_mutex);; +static int planner_hooks_installed = 0; + +static void lock_planner_mutex(void) +{ + planner_lock.lock(); +} + +static void unlock_planner_mutex(void) +{ + planner_lock.unlock(); +} + void X(threads_register_planner_hooks)(void) { + std::lock_guard lkg(install_planner_hooks_mutex); + if (!planner_hooks_installed) { + X(set_planner_hooks)(lock_planner_mutex, unlock_planner_mutex); + planner_hooks_installed = 1; + } } } // end extern "C" From bcbb63b4323539016643883f3b0f5e91a91596b0 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Thu, 28 Dec 2023 11:30:18 -0500 Subject: [PATCH 19/26] unrolled the parallel loop so the initial thread does work --- threads/hpx.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index adc0c9aad..b0cd9a5f9 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -102,7 +102,7 @@ void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) hpx::future fut = hpx::threads::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc, &futures, &sdata]() -> hpx::future { - for (int tid = 0; tid < nthr; ++tid) + for (int tid = 1; tid < nthr; ++tid) { futures.push_back(hpx::async([tid, &sdata, &data, &proc, block_size, loopmax]() { @@ -116,6 +116,16 @@ void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) })); } + { + sdata[0].max = (sdata[0].min = 0) + block_size; + if (sdata[0].max > loopmax) { + sdata[0].max = loopmax; + } + sdata[0].thr_num = tid; + sdata[0].data = data; + proc(&sdata[0]); + } + hpx::wait_all(futures); return hpx::make_ready_future(); }); From 4117af1dd1d8b8174ac83b63a0fc38c4215a4c3c Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Fri, 29 Dec 2023 14:52:39 -0500 Subject: [PATCH 20/26] rm'd extraneous ';' and changed planner_hooks_installed to a bool --- threads/hpx.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index b0cd9a5f9..39679fcf4 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -141,8 +141,8 @@ void X(threads_cleanup)(void) static hpx::mutex planner_mutex; static hpx::mutex install_planner_hooks_mutex; -static std::unique_lock planner_lock = std::unique_lock(planner_mutex);; -static int planner_hooks_installed = 0; +static std::unique_lock planner_lock = std::unique_lock(planner_mutex); +static bool planner_hooks_installed = false static void lock_planner_mutex(void) { @@ -159,7 +159,7 @@ void X(threads_register_planner_hooks)(void) std::lock_guard lkg(install_planner_hooks_mutex); if (!planner_hooks_installed) { X(set_planner_hooks)(lock_planner_mutex, unlock_planner_mutex); - planner_hooks_installed = 1; + planner_hooks_installed = true; } } From e3ea5031c4a60ecb9ca749b9c766dc1b6c06c67b Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Wed, 3 Jan 2024 13:46:45 -0500 Subject: [PATCH 21/26] added missing header file --- threads/hpx.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index 39679fcf4..fab360bb7 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -22,6 +22,7 @@ /* openmp.c: thread spawning via HPX */ #include "threads/threads.h" +#include "api/api.h" #if !defined(HAVE_HPX) From b94050b018527d18f386c5a78f540ddc0eca0e7d Mon Sep 17 00:00:00 2001 From: Chris Taylor Date: Fri, 5 Jan 2024 18:14:30 +0000 Subject: [PATCH 22/26] semaphore fix --- threads/hpx.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/threads/hpx.cpp b/threads/hpx.cpp index fab360bb7..439d635c8 100644 --- a/threads/hpx.cpp +++ b/threads/hpx.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #endif @@ -122,7 +123,7 @@ void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) if (sdata[0].max > loopmax) { sdata[0].max = loopmax; } - sdata[0].thr_num = tid; + sdata[0].thr_num = 0; sdata[0].data = data; proc(&sdata[0]); } @@ -140,19 +141,18 @@ void X(threads_cleanup)(void) hpx::stop(); } -static hpx::mutex planner_mutex; +static hpx::counting_semaphore<> planner_semaphore = hpx::counting_semaphore<>(1); static hpx::mutex install_planner_hooks_mutex; -static std::unique_lock planner_lock = std::unique_lock(planner_mutex); -static bool planner_hooks_installed = false +static bool planner_hooks_installed = false; static void lock_planner_mutex(void) { - planner_lock.lock(); + planner_semaphore.acquire(); } static void unlock_planner_mutex(void) { - planner_lock.unlock(); + planner_semaphore.release(); } void X(threads_register_planner_hooks)(void) From 25eae8bd391de2932857c906437f5bf677b1cc6c Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Tue, 5 Mar 2024 11:03:23 -0500 Subject: [PATCH 23/26] added rv64g (riscv 64 bit) cycle counter --- kernel/cycle.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kernel/cycle.h b/kernel/cycle.h index 16dfdc98f..e4e217176 100644 --- a/kernel/cycle.h +++ b/kernel/cycle.h @@ -562,3 +562,25 @@ static inline ticks getticks(void) INLINE_ELAPSED(inline) #define HAVE_TICK_COUNTER #endif + +/*----------------------------------------------------------------*/ +/* + * RISC_V 64-bit cycle counter (RV64G) + */ +#if defined(__riscv) && (__riscv_xlen == 64) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +static __inline__ ticks getticks(void) +{ + ticks ret; + + __asm__ __volatile__ ("csrrs %0, 0xc00, x0" : "=r" (dst) ); + + /* no input, nothing else clobbered */ + return ret; +} + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif From 0d8d86ac326dbba091d46234dfde454c3f6ecf31 Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Tue, 5 Mar 2024 11:09:05 -0500 Subject: [PATCH 24/26] error in assembly, referenced wrong storage variable --- kernel/cycle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cycle.h b/kernel/cycle.h index e4e217176..b9d5dd1eb 100644 --- a/kernel/cycle.h +++ b/kernel/cycle.h @@ -574,7 +574,7 @@ static __inline__ ticks getticks(void) { ticks ret; - __asm__ __volatile__ ("csrrs %0, 0xc00, x0" : "=r" (dst) ); + __asm__ __volatile__ ("csrrs %0, 0xc00, x0" : "=r" (ret) ); /* no input, nothing else clobbered */ return ret; From f80f68a4cbb87b505357fe8dd6edc3890da1055b Mon Sep 17 00:00:00 2001 From: ct-clmsn Date: Tue, 5 Mar 2024 11:16:04 -0500 Subject: [PATCH 25/26] updated copyright in file to include authors --- kernel/cycle.h | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cycle.h b/kernel/cycle.h index b9d5dd1eb..3b2d415f6 100644 --- a/kernel/cycle.h +++ b/kernel/cycle.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2003, 2007-14 Matteo Frigo * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * Copyright (c) 2024 Christopher Taylor, Tactical Computing Labs, LLC * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the From 642462fc54af92745155b8070bd527a829cbd76c Mon Sep 17 00:00:00 2001 From: Christoper Taylor Date: Tue, 26 Mar 2024 08:59:31 -0400 Subject: [PATCH 26/26] used preferred pseudo-instruction --- kernel/cycle.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/cycle.h b/kernel/cycle.h index 3b2d415f6..59ba9517d 100644 --- a/kernel/cycle.h +++ b/kernel/cycle.h @@ -569,19 +569,20 @@ INLINE_ELAPSED(inline) * RISC_V 64-bit cycle counter (RV64G) */ #if defined(__riscv) && (__riscv_xlen == 64) && !defined(HAVE_TICK_COUNTER) + typedef unsigned long ticks; static __inline__ ticks getticks(void) { - ticks ret; + unsigned long cycles; - __asm__ __volatile__ ("csrrs %0, 0xc00, x0" : "=r" (ret) ); + __asm__ __volatile__ ("rdcycle %0" : "=r" (cycles)); /* no input, nothing else clobbered */ - return ret; + return cycles; } INLINE_ELAPSED(inline) - #define HAVE_TICK_COUNTER + #endif