diff --git a/.pyproject_hip.toml b/.pyproject_hip.toml new file mode 100644 index 00000000..817792c9 --- /dev/null +++ b/.pyproject_hip.toml @@ -0,0 +1,238 @@ +[build-system] +build-backend = 'setuptools.build_meta' +requires = [ + 'setuptools>=61.0', + 'setuptools_scm>=6.4', + 'Cython>=3.0', + 'extension-helpers>=1.1', + 'numpy>=2.0', +] + +[project] +name = 'Triumvirate-CUDA' +description = "Three-point clustering measurements in large-scale structure analyses." +authors = [ + {name = 'Mike S Wang'}, + {name = 'Naonori S Sugiyama'}, +] +maintainers = [ + {name = 'Mike S Wang', email = "32841762+MikeSWang@users.noreply.github.com"}, +] +license = {file = "LICENCE"} +classifiers = [ + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Operating System :: POSIX :: Linux", + "Environment :: GPU", + "Programming Language :: C++", + "Programming Language :: Cython", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Astronomy", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Physics", + "Intended Audience :: Science/Research", + "Intended Audience :: Education", +] +dynamic = [ + 'version', + 'readme', +] +requires-python = '>=3.10' +dependencies = [ + 'numpy>=1.23', + 'scipy>=1.13', + 'sympy>=1.12.1', + 'astropy>=4.2', + 'pyyaml>=5.1', + 'tqdm', +] + +[project.optional-dependencies] +mpi = ['mpi4py>=3.1.5'] +nbk = ['nbodykit'] + +[project.urls] +# Home = "https://mikeswang.github.io/Triumvirate" +Documentation = "https://triumvirate.readthedocs.io/" +Source = "https://github.com/MikeSWang/Triumvirate" +Changelog = "https://github.com/MikeSWang/Triumvirate/blob/main/CHANGELOG.md" + +# [tool.setuptools] +# include-package-data = true + +[tool.setuptools.dynamic] +# version = {attr = 'triumvirate.__version__'} +readme = {file = "README.rst"} + +# [tool.setuptools.packages.find] +# where = ['src'] +# namespaces = true + +[tool.setuptools.package-data] +'triumvirate' = ["*.pxd", "*.pyx"] +'triumvirate.include' = ["*.hpp"] +'triumvirate.src' = ["*.cpp"] +'triumvirate.resources' = ["*.ini", "*.yml"] + +[tool.setuptools_scm] + +[tool.pytest.ini_options] +minversion = '7.0' +addopts = "--full-trace --verbose --capture=no --runslow" +testpaths = [ + "tests", +] + +[tool.cibuildwheel] +build-frontend = 'build' +skip = [ + 'cp313-*', + 'pp*', + '*-win32', + '*-manylinux_i686', + '*-musllinux*', + '*_ppc64le', + '*_s390x', +] +environment = { PY_OMP='1', PY_CUDA='1', PY_BUILD_PARALLEL='-j' } +# test-requires = "pytest>=7.0" +# test-command = "pytest {project}/tests" + +manylinux-x86_64-image = 'manylinux_2_28' +manylinux-aarch64-image = 'manylinux_2_28' + +# TODO: Modify the following for HIP. +[tool.cibuildwheel.linux] +before-all = [ + "yum install -y gsl-devel", +# Install CUDA Toolkit inside Docker container using package manager, +# matching repository with image OS, and optionally matching CUDA version +# for Pip/Conda consistency. + "yum install -y yum-utils", + "yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo", + "yum install -y cuda-toolkit", + # "yum install -y cuda-toolkit-12-6", +] +environment = { PY_CXX="/usr/local/cuda/bin/nvcc", PY_CXXFLAGS="-I/usr/local/cuda/include", PY_LDFLAGS="-L/usr/local/cuda/lib64", PY_OMP='1', PY_CUDA='1', PY_BUILD_PARALLEL='-j' } + +[tool.autopep8] +in-place = true +recursive = true +aggressive = 3 +max_line_length = 79 +ignore = "E226," + +[tool.numpydoc_validation] +checks = [ + 'all', + 'ES01', + 'EX01', + 'GL01', + 'GL02', + 'GL03', + 'GL06', + 'RT02', + 'SA01', + 'SS06', +] +exclude = [ + 'test_.*', + '\.__init__$', + '\.__repr__$', + '\.__str__$', + '\.__hash__$', + '\.__len__$', + '\.__eq__$', + '\.__le__$', + '\.__lt__$', + '\.__gt__$', + '\.__ge__$', + '\.__getitem__$', +] + +# [tool.docformatter] +# recursive = true +# wrap-summaries = 72 +# wrap-descriptions = 72 +# blank = true +# close-quotes-on-newline = true + +[tool.codespell] +skip = "publication/joss/paper.bib," +ignore-words-list = "ERRO," +count = true +quiet-level = 3 + +[tool.ruff] +target-version = 'py310' +line-length = 79 +exclude = [ + ".eggs", + ".git", + ".git-rewrite", + ".ipynb_checkpoints", + ".nox", + ".pyenv", + ".pytest_cache", + ".ruff_cache", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +[tool.ruff.lint] +fixable = ['ALL'] +unfixable = [] + +# [tool.ruff.format] +# quote-style = 'preserve' +# indent-style = 'space' +# line-ending = 'auto' +# skip-magic-trailing-comma = true +# docstring-code-format = false +# docstring-code-line-length = 'dynamic' + +[tool.semantic_release] +commit_parser = 'commit_parser:TRVCommitParser' +commit_message = "REL: Bump version to {version}" +tag_format = 'v{version}' +allow_zero_version = true +major_on_zero = false +no_git_verify = false +logging_use_named_masks = false +version_variables = [ + "src/triumvirate/__init__.py:__version__", +] + +[tool.semantic_release.branches.main] +match = 'main' +prerelease = false + +[tool.semantic_release.changelog] +template_dir = "deploy/pkg/tmpl" +exclude_commit_patterns = ['\[dependabot\]', '\[pre-commit.ci\]',] + +[tool.semantic_release.changelog.environment] +block_start_string = '{%' +block_end_string = '%}' +variable_start_string = '{{' +variable_end_string = '}}' +comment_start_string = '{#' +comment_end_string = '#}' +newline_sequence = "\n" +autoescape = true +trim_blocks = false +lstrip_blocks = false +keep_trailing_newline = false + +[tool.semantic_release.commit_author] +env = 'GIT_COMMIT_AUTHOR' +default = "semantic-release " diff --git a/.pyproject_hipcuda.toml b/.pyproject_hipcuda.toml new file mode 100644 index 00000000..d6c703f8 --- /dev/null +++ b/.pyproject_hipcuda.toml @@ -0,0 +1,239 @@ +[build-system] +build-backend = 'setuptools.build_meta' +requires = [ + 'setuptools>=61.0', + 'setuptools_scm>=6.4', + 'Cython>=3.0', + 'extension-helpers>=1.1', + 'numpy>=2.0', +] + +[project] +name = 'Triumvirate-CUDA' +description = "Three-point clustering measurements in large-scale structure analyses." +authors = [ + {name = 'Mike S Wang'}, + {name = 'Naonori S Sugiyama'}, +] +maintainers = [ + {name = 'Mike S Wang', email = "32841762+MikeSWang@users.noreply.github.com"}, +] +license = {file = "LICENCE"} +classifiers = [ + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Operating System :: POSIX :: Linux", + "Environment :: GPU", + "Environment :: GPU :: NVIDIA CUDA", + "Programming Language :: C++", + "Programming Language :: Cython", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Astronomy", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Physics", + "Intended Audience :: Science/Research", + "Intended Audience :: Education", +] +dynamic = [ + 'version', + 'readme', +] +requires-python = '>=3.10' +dependencies = [ + 'numpy>=1.23', + 'scipy>=1.13', + 'sympy>=1.12.1', + 'astropy>=4.2', + 'pyyaml>=5.1', + 'tqdm', +] + +[project.optional-dependencies] +mpi = ['mpi4py>=3.1.5'] +nbk = ['nbodykit'] + +[project.urls] +# Home = "https://mikeswang.github.io/Triumvirate" +Documentation = "https://triumvirate.readthedocs.io/" +Source = "https://github.com/MikeSWang/Triumvirate" +Changelog = "https://github.com/MikeSWang/Triumvirate/blob/main/CHANGELOG.md" + +# [tool.setuptools] +# include-package-data = true + +[tool.setuptools.dynamic] +# version = {attr = 'triumvirate.__version__'} +readme = {file = "README.rst"} + +# [tool.setuptools.packages.find] +# where = ['src'] +# namespaces = true + +[tool.setuptools.package-data] +'triumvirate' = ["*.pxd", "*.pyx"] +'triumvirate.include' = ["*.hpp"] +'triumvirate.src' = ["*.cpp"] +'triumvirate.resources' = ["*.ini", "*.yml"] + +[tool.setuptools_scm] + +[tool.pytest.ini_options] +minversion = '7.0' +addopts = "--full-trace --verbose --capture=no --runslow" +testpaths = [ + "tests", +] + +[tool.cibuildwheel] +build-frontend = 'build' +skip = [ + 'cp313-*', + 'pp*', + '*-win32', + '*-manylinux_i686', + '*-musllinux*', + '*_ppc64le', + '*_s390x', +] +environment = { PY_OMP='1', PY_CUDA='1', PY_BUILD_PARALLEL='-j' } +# test-requires = "pytest>=7.0" +# test-command = "pytest {project}/tests" + +manylinux-x86_64-image = 'manylinux_2_28' +manylinux-aarch64-image = 'manylinux_2_28' + +# TODO: Modify the following for hybrid CUDA/HIP. +[tool.cibuildwheel.linux] +before-all = [ + "yum install -y gsl-devel", +# Install CUDA Toolkit inside Docker container using package manager, +# matching repository with image OS, and optionally matching CUDA version +# for Pip/Conda consistency. + "yum install -y yum-utils", + "yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo", + "yum install -y cuda-toolkit", + # "yum install -y cuda-toolkit-12-6", +] +environment = { PY_CXX="/usr/local/cuda/bin/nvcc", PY_CXXFLAGS="-I/usr/local/cuda/include", PY_LDFLAGS="-L/usr/local/cuda/lib64", PY_OMP='1', PY_CUDA='1', PY_BUILD_PARALLEL='-j' } + +[tool.autopep8] +in-place = true +recursive = true +aggressive = 3 +max_line_length = 79 +ignore = "E226," + +[tool.numpydoc_validation] +checks = [ + 'all', + 'ES01', + 'EX01', + 'GL01', + 'GL02', + 'GL03', + 'GL06', + 'RT02', + 'SA01', + 'SS06', +] +exclude = [ + 'test_.*', + '\.__init__$', + '\.__repr__$', + '\.__str__$', + '\.__hash__$', + '\.__len__$', + '\.__eq__$', + '\.__le__$', + '\.__lt__$', + '\.__gt__$', + '\.__ge__$', + '\.__getitem__$', +] + +# [tool.docformatter] +# recursive = true +# wrap-summaries = 72 +# wrap-descriptions = 72 +# blank = true +# close-quotes-on-newline = true + +[tool.codespell] +skip = "publication/joss/paper.bib," +ignore-words-list = "ERRO," +count = true +quiet-level = 3 + +[tool.ruff] +target-version = 'py310' +line-length = 79 +exclude = [ + ".eggs", + ".git", + ".git-rewrite", + ".ipynb_checkpoints", + ".nox", + ".pyenv", + ".pytest_cache", + ".ruff_cache", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +[tool.ruff.lint] +fixable = ['ALL'] +unfixable = [] + +# [tool.ruff.format] +# quote-style = 'preserve' +# indent-style = 'space' +# line-ending = 'auto' +# skip-magic-trailing-comma = true +# docstring-code-format = false +# docstring-code-line-length = 'dynamic' + +[tool.semantic_release] +commit_parser = 'commit_parser:TRVCommitParser' +commit_message = "REL: Bump version to {version}" +tag_format = 'v{version}' +allow_zero_version = true +major_on_zero = false +no_git_verify = false +logging_use_named_masks = false +version_variables = [ + "src/triumvirate/__init__.py:__version__", +] + +[tool.semantic_release.branches.main] +match = 'main' +prerelease = false + +[tool.semantic_release.changelog] +template_dir = "deploy/pkg/tmpl" +exclude_commit_patterns = ['\[dependabot\]', '\[pre-commit.ci\]',] + +[tool.semantic_release.changelog.environment] +block_start_string = '{%' +block_end_string = '%}' +variable_start_string = '{{' +variable_end_string = '}}' +comment_start_string = '{#' +comment_end_string = '#}' +newline_sequence = "\n" +autoescape = true +trim_blocks = false +lstrip_blocks = false +keep_trailing_newline = false + +[tool.semantic_release.commit_author] +env = 'GIT_COMMIT_AUTHOR' +default = "semantic-release " diff --git a/Makefile b/Makefile index 4e3447e2..4df4d4a2 100644 --- a/Makefile +++ b/Makefile @@ -79,13 +79,32 @@ MAKEFLAGS_JOBS = $(shell echo "${MAKEFLAGS} " | grep -Eo ${PATTERN_JOBS}) ifdef usecuda ifeq ($(strip ${usecuda}), $(filter $(strip ${usecuda}), true 1)) usecuda := true -PKGNAME := ${PKGNAME}-CUDA -LIBNAME := ${LIBNAME}_cuda + PKGNAME := ${PKGNAME}-CUDA + PROGNAME := ${PROGNAME}_cuda + LIBNAME := ${LIBNAME}_cuda else # usecuda != (true|1) unexport usecuda endif # usecuda == (true|1) endif # usecuda +# HIP: enabled with ``usehip=(true|1)``; disabled otherwise +ifdef usehip +ifeq ($(strip ${usehip}), $(filter $(strip ${usehip}), true 1)) +usehip := true + ifndef usecuda + PKGNAME := ${PKGNAME}-HIP + PROGNAME := ${PROGNAME}_hip + LIBNAME := ${LIBNAME}_hip + else # usecuda + PKGNAME := ${PKGNAME}-HIPCUDA + PROGNAME := ${PROGNAME}_hipcuda + LIBNAME := ${LIBNAME}_hipcuda + endif # !usecuda +else # usehip != (true|1) +unexport usehip +endif # usehip == (true|1) +endif # usehip + # OpenMP: enabled with ``useomp=(true|1)``; disabled otherwise ifdef useomp ifeq ($(strip ${useomp}), $(filter $(strip ${useomp}), true 1)) @@ -137,18 +156,23 @@ OS := $(shell uname -s) # Assume explicitly GCC compiler by default. [adapt] ifeq (${OS}, Linux) -## If using CUDA, use CUDA compiler. - ifndef usecuda +## If using CUDA/HIP, use CUDA/HIP compiler. + ifdef usehip + CXX ?= hipcc + else ifdef usecuda # !usehip && usecuda + CXX ?= nvcc + else # !usehip && !usecuda CXX ?= g++ - else # usecuda - CXX := nvcc - endif # !usecuda + endif # usehip else ifeq (${OS}, Darwin) ifdef usecuda $(error "CUDA is not supported on macOS.") endif # usecuda + ifdef usehip + $(error "HIP is not supported on macOS.") + endif # usehip ## Use GCC compiler from Homebrew (brew formula 'gcc'). ## The compiler binary may have suffix '-'; @@ -160,12 +184,14 @@ else ifeq (${OS}, Darwin) else # OS -# If using CUDA, use CUDA compiler. - ifndef usecuda +## If using CUDA/HIP, use CUDA/HIP compiler. + ifdef usehip + CXX ?= hipcc + else ifdef usecuda # !usehip && usecuda + CXX ?= nvcc + else # !usehip && !usecuda CXX ?= g++ - else # usecuda - CXX := nvcc - endif # !usecuda + endif # usehip endif # OS @@ -179,12 +205,14 @@ RM ?= rm -f # -- Dependencies -------------------------------------------------------- -# If using CUDA FFT, remove standard FFTW dependency. -ifndef usecuda -DEPS := gsl fftw3 -else # usecuda +# If using cuFFT/hipFFT, remove standard FFTW dependency. +ifdef usehip +DEPS := gsl +else ifdef usecuda # !usehip && usecuda DEPS := gsl -endif # !usecuda +else # !usehip && !usecuda +DEPS := gsl fftw3 +endif # usehip # Dependencies are searched for by `pkg-config`. Ensure the set-up of # `pkg-config` matches that of the dependencies (e.g. both are installed @@ -194,10 +222,12 @@ DEP_CXXFLAGS := $(shell pkg-config --silence-errors --cflags-only-other ${DEPS}) DEP_LDFLAGS := $(shell pkg-config --silence-errors --libs-only-other --libs-only-L ${DEPS}) DEP_LDLIBS := $(shell pkg-config --silence-errors --libs-only-l ${DEPS}) -# If using CUDA FFT, add its dependencies. -ifdef usecuda +# If using cuFFT/hipFFT, add its dependencies. +ifdef usehip +DEP_LDLIBS += -lhipfft +else ifdef usecuda # !usehip && usecuda DEP_LDLIBS += -lcufft -lcufftw -endif # usecuda +endif # usehip # -- Dependencies (test) ------------------------------------------------- @@ -215,21 +245,27 @@ DEP_TEST_LDLIBS := $(shell pkg-config --silence-errors --libs-only-l ${DEPS_TEST INCLUDES += -I${DIR_PKG_INCLUDE} ${DEP_INCLUDES} CPPFLAGS += -MMD -MP -D__TRV_VERSION__=\"${PKG_VER}\" -ifndef usecuda +ifdef usehip CXXFLAGS += -std=c++17 -Wall -O3 ${DEP_CXXFLAGS} -else # usecuda +else ifdef usecuda # !usehip && usecuda CXXFLAGS += -std=c++17 -Xcompiler -Wall,-O3 ${DEP_CXXFLAGS} -endif # !usecuda +else # !usehip && !usecuda +CXXFLAGS += -std=c++17 -Wall -O3 ${DEP_CXXFLAGS} +endif # usehip -ifndef usecuda +ifdef usehip LDFLAGS += \ $(addprefix -Wl${COMMA}-rpath${COMMA},$(patsubst -L%,%,${DEP_LDFLAGS})) \ ${DEP_LDFLAGS} -else # usecuda +else ifdef usecuda # !usehip && usecuda LDFLAGS += \ $(addprefix -Xlinker -rpath${COMMA},$(patsubst -L%,%,${DEP_LDFLAGS})) \ ${DEP_LDFLAGS} -endif # !usecuda +else # !usehip && !usecuda +LDFLAGS += \ + $(addprefix -Wl${COMMA}-rpath${COMMA},$(patsubst -L%,%,${DEP_LDFLAGS})) \ + ${DEP_LDFLAGS} +endif # usehip LDLIBS += $(if ${DEP_LDLIBS},${DEP_LDLIBS},-lgsl -lgslcblas -lfftw3 -lm) @@ -240,15 +276,21 @@ PIPOPTS ?= --user INCLUDES_TEST = ${INCLUDES} ${DEP_TEST_INCLUDES} CXXFLAGS_TEST = ${CXXFLAGS} ${DEP_TEST_CXXFLAGS} -ifndef usecuda + +ifdef usehip LDFLAGS_TEST = -L${DIR_BUILDLIB} ${LDFLAGS} \ $(addprefix -Wl${COMMA}-rpath${COMMA},$(patsubst -L%,%,${DEP_TEST_LDFLAGS})) \ ${DEP_TEST_LDFLAGS} -else # usecuda +else ifdef usecuda # !usehip && usecuda LDFLAGS_TEST = -L${DIR_BUILDLIB} ${LDFLAGS} \ $(addprefix -Xlinker -rpath${COMMA},$(patsubst -L%,%,${DEP_TEST_LDFLAGS})) \ ${DEP_TEST_LDFLAGS} -endif # !usecuda +else # !usehip && !usecuda +LDFLAGS_TEST = -L${DIR_BUILDLIB} ${LDFLAGS} \ + $(addprefix -Wl${COMMA}-rpath${COMMA},$(patsubst -L%,%,${DEP_TEST_LDFLAGS})) \ + ${DEP_TEST_LDFLAGS} +endif # usehip + LDLIBS_TEST = -l${LIBNAME} ${LDLIBS} \ $(if ${DEP_TEST_LDLIBS},${DEP_TEST_LDLIBS},-lgtest -lpthread) @@ -260,38 +302,48 @@ ifdef NERSC_HOST ## GSL library [deprecated] # ifdef GSL_ROOT - # INCLUDES += -I${GSL_ROOT}/include - # ifndef usecuda - # LDFLAGS += -Wl,-rpath,${GSL_ROOT}/lib -L${GSL_ROOT}/lib - # else # usecuda - # LDFLAGS += -Xlinker -rpath,${GSL_ROOT}/lib -L${GSL_ROOT}/lib - # endif # !usecuda + + # INCLUDES += -I${GSL_ROOT}/include + + # ifdef usehip + # LDFLAGS += -Wl,-rpath,${GSL_ROOT}/lib -L${GSL_ROOT}/lib + # else ifdef usecuda # !usehip && usecuda + # LDFLAGS += -Xlinker -rpath,${GSL_ROOT}/lib -L${GSL_ROOT}/lib + # else # !usehip && !usecuda + # LDFLAGS += -Wl,-rpath,${GSL_ROOT}/lib -L${GSL_ROOT}/lib + # endif # usehip + # endif # GSL_ROOT ## FFTW library [deprecated] # ifdef FFTW_ROOT - # INCLUDES += -I${FFTW_INC} + # ifndef usehip # ifndef usecuda - # LDFLAGS += -Wl,-rpath,${FFTW_DIR} -L${FFTW_DIR} - # else # usecuda - # LDFLAGS += -Xlinker -rpath,${FFTW_DIR} -L${FFTW_DIR} + # INCLUDES += -I${FFTW_INC} + # LDFLAGS += -Wl,-rpath,${GSL_ROOT}/lib -L${GSL_ROOT}/lib # endif # !usecuda + # endif # usehip # endif # FFTW_ROOT -## cuFFT library - ifdef usecuda +## cuFFT/hipFFT library + ifdef usehip + INCLUDES += -I${HIP_PATH}/include + LDFLAGS += -Wl,-rpath,${HIP_PATH}/lib -L${HIP_PATH}/lib + else ifdef usecuda # !usehip && usecuda INCLUDES += -I${NVIDIA_PATH}/math_libs/include LDFLAGS += -Xlinker -rpath,${NVIDIA_PATH}/math_libs/lib64 -L${NVIDIA_PATH}/math_libs/lib64 - endif # usecuda + endif # usehip ## GTEST library ifdef GTEST_ROOT INCLUDES_TEST += -I${GTEST_ROOT}/include - ifndef usecuda + ifdef usehip LDFLAGS_TEST += -Wl,-rpath,${GTEST_ROOT}/lib -L${GTEST_ROOT}/lib - else # usecuda + else ifdef usecuda # !usehip && usecuda LDFLAGS_TEST += -Xlinker -rpath,${GTEST_ROOT}/lib -L${GTEST_ROOT}/lib - endif # !usecuda + else # !usehip && !usecuda + LDFLAGS_TEST += -Wl,-rpath,${GTEST_ROOT}/lib -L${GTEST_ROOT}/lib + endif # usehip endif # GTEST_ROOT endif # NERSC_HOST @@ -302,11 +354,13 @@ ifdef DIRAC_HOST ## GTEST library ifdef GTEST_ROOT INCLUDES_TEST += -I${GTEST_ROOT}/include - ifndef usecuda + ifdef usehip LDFLAGS_TEST += -Wl,-rpath,${GTEST_ROOT}/lib -L${GTEST_ROOT}/lib - else # usecuda + else ifdef usecuda # !usehip && usecuda LDFLAGS_TEST += -Xlinker -rpath,${GTEST_ROOT}/lib -L${GTEST_ROOT}/lib - endif # !usecuda + else # !usehip && !usecuda + LDFLAGS_TEST += -Wl,-rpath,${GTEST_ROOT}/lib -L${GTEST_ROOT}/lib + endif # usehip endif # GTEST_ROOT endif # DIRAC_HOST @@ -320,27 +374,34 @@ ifdef useomp ## Assume GCC implementation by default. [adapt] ifeq (${OS}, Linux) -### If using CUDA FFT, add preprocessing flags. - ifndef usecuda +### If using CUDA, add preprocessing flags. + ifdef usehip #### Use GCC implementation. CXXFLAGS_OMP ?= -fopenmp LDFLAGS_OMP ?= -fopenmp # LDLIBS_OMP ?= -lgomp -#### Use Intel implementation. - # CXXFLAGS_OMP ?= -qopenmp - # LDFLAGS_OMP ?= -qopenmp - # # LDLIBS_OMP ?= -liomp5 - - else # usecuda + else ifdef usecuda # !usehip && usecuda #### Use GCC implementation. CXXFLAGS_OMP ?= -Xcompiler -fopenmp LDFLAGS_OMP ?= -Xcompiler -fopenmp LDLIBS_OMP ?= -lgomp - endif # !usecuda + else # !usehip && !usecuda + +#### Use GCC implementation. + CXXFLAGS_OMP ?= -fopenmp + LDFLAGS_OMP ?= -fopenmp + # LDLIBS_OMP ?= -lgomp + +#### Use Intel implementation. + # CXXFLAGS_OMP ?= -qopenmp + # LDFLAGS_OMP ?= -qopenmp + # # LDLIBS_OMP ?= -liomp5 + + endif # !usehip else ifeq (${OS}, Darwin) @@ -357,33 +418,40 @@ ifdef useomp else # OS ### Use GCC implementation. -### If using CUDA FFT, add preprocessing flags. - ifndef usecuda +### If using CUDA, add preprocessing flags. + ifdef usehip CXXFLAGS_OMP ?= -fopenmp LDFLAGS_OMP ?= -fopenmp - else # usecuda + else ifdef usecuda # !usehip && usecuda CXXFLAGS_OMP ?= -Xcompiler -fopenmp LDFLAGS_OMP ?= -Xcompiler -fopenmp - endif # !usecuda + else # !usehip && !usecuda + CXXFLAGS_OMP ?= -fopenmp + LDFLAGS_OMP ?= -fopenmp + endif # usehip endif # OS -## If using CUDA FFT, remove macros for FFTW. - ifndef usecuda - CPPFLAGS += -DTRV_USE_OMP -DTRV_USE_FFTWOMP - else # usecuda +## If using cuFFT/hipFFT, remove macros for FFTW. + ifdef usehip CPPFLAGS += -DTRV_USE_OMP - endif # !usecuda + else ifdef usecuda # !usehip && usecuda + CPPFLAGS += -DTRV_USE_OMP + else # !usehip && !usecuda + CPPFLAGS += -DTRV_USE_OMP -DTRV_USE_FFTWOMP + endif # usehip CXXFLAGS += ${CXXFLAGS_OMP} LDFLAGS += ${LDFLAGS_OMP} ## If using CUDA FFT, do not include OpenMP FFTW dependency. - ifndef usecuda - LDLIBS += -lfftw3_omp ${LDLIBS_OMP} - else # usecuda + ifdef usehip LDLIBS += ${LDLIBS_OMP} - endif # !usecuda + else ifdef usecuda # !usehip && usecuda + LDLIBS += ${LDLIBS_OMP} + else # !usehip && !usecuda + LDLIBS += -lfftw3_omp ${LDLIBS_OMP} + endif # usehip CPPFLAGS_TEST += CXXFLAGS_TEST += ${CXXFLAGS_OMP} @@ -398,10 +466,12 @@ WOMP := without endif # useomp -# CUDA -ifdef usecuda +# CUDA/HIP +ifdef usehip +CPPFLAGS += -DTRV_USE_HIP +else ifdef usecuda # !usehip && usecuda CPPFLAGS += -DTRV_USE_CUDA -endif # usecuda +endif # usehip # Visual display ifdef usedisp @@ -411,11 +481,13 @@ endif # usedisp # Profiling ifdef useprof ## Linaro MAP profiler -ifndef usecuda +ifdef usehip CXXFLAGS += -g1 -O3 -fno-inline -fno-optimize-sibling-calls -else # usecuda +else ifdef usecuda # !usehip && usecuda CXXFLAGS += -g -O3 -lineinfo -endif # !usecuda +else # !usehip && !usecuda +CXXFLAGS += -g1 -O3 -fno-inline -fno-optimize-sibling-calls +endif # usehip endif # useprof # Parameter debugging @@ -439,14 +511,19 @@ export PY_NO_OMP else # useomp export PY_CXXFLAGS_OMP=${CXXFLAGS_OMP} export PY_LDFLAGS_OMP=${LDFLAGS_OMP} ${LDLIBS_OMP} +ifdef usehip +export PY_OMP=true +endif # usehip ifdef usecuda export PY_OMP=true endif # usecuda endif # !useomp -ifdef usecuda +ifdef usehip +export PY_HIP=true +else ifdef usecuda # !usehip && usecuda export PY_CUDA=true -endif # usecuda +endif # usehip export PY_BUILD_PARALLEL=${MAKEFLAGS_JOBS} @@ -474,27 +551,33 @@ LDLIBS_TEST := $(strip ${LDLIBS_TEST}) # ------------------------------------------------------------------------ SRCS := $(wildcard ${DIR_PKG_SRC}/*.cpp) +ifdef usehip ifndef usecuda -OBJS := $(SRCS:${DIR_PKG_SRC}/%.cpp=${DIR_BUILDOBJ}/%.o) -else # usecuda +OBJS := $(SRCS:${DIR_PKG_SRC}/%.cpp=${DIR_BUILDOBJ}/%_hip.o) +else # usehip && usecuda +OBJS := $(SRCS:${DIR_PKG_SRC}/%.cpp=${DIR_BUILDOBJ}/%_hipcuda.o) +endif # usehip && !usecuda +else ifdef usecuda # !usehip && usecuda OBJS := $(SRCS:${DIR_PKG_SRC}/%.cpp=${DIR_BUILDOBJ}/%_cuda.o) -endif # !usecuda -DEPS := $(OBJS:.o=.d) +else # !usehip && !usecuda +OBJS := $(SRCS:${DIR_PKG_SRC}/%.cpp=${DIR_BUILDOBJ}/%.o) +endif # usehip PROGSRC := ${DIR_PKG_SRCPROG}/${PROGNAME}.cpp +ifdef usehip ifndef usecuda -PROGOBJ := ${DIR_BUILDOBJ}/${PROGNAME}.o -else # usecuda +PROGOBJ := ${DIR_BUILDOBJ}/${PROGNAME}_hip.o +else # usehip && usecuda +PROGOBJ := ${DIR_BUILDOBJ}/${PROGNAME}_hipcuda.o +endif # usehip && !usecuda +else ifdef usecuda # !usehip && usecuda PROGOBJ := ${DIR_BUILDOBJ}/${PROGNAME}_cuda.o -endif # !usecuda +else # !usehip && !usecuda +PROGOBJ := ${DIR_BUILDOBJ}/${PROGNAME}.o +endif # usehip -ifndef usecuda PROGEXE := ${DIR_BUILDBIN}/${PROGNAME} PROGLIB := ${DIR_BUILDLIB}/lib${LIBNAME}.a -else # usecuda -PROGEXE := ${DIR_BUILDBIN}/${PROGNAME}_cuda -PROGLIB := ${DIR_BUILDLIB}/lib${LIBNAME}_cuda.a -endif # !usecuda # -- Installation -------------------------------------------------------- @@ -513,22 +596,34 @@ cpplibinstall: library cppappbuild: executable -ifndef usecuda +ifdef usehip +ifdef usecuda pyinstall: @echo "Installing ${PKGNAME} Python package ${WOMP} OpenMP (in pip dev mode)..." - @cp .pyproject.toml pyproject.toml + @cp .pyproject_hipcuda.toml pyproject.toml python -m pip install ${PIPOPTS} --editable . -vvv -else # usecuda +else # usehip && !usecuda +pyinstall: + @echo "Installing ${PKGNAME} Python package ${WOMP} OpenMP (in pip dev mode)..." + @cp .pyproject_hip.toml pyproject.toml + python -m pip install ${PIPOPTS} --editable . -vvv +endif # usehip && usecuda +else ifdef usecuda pyinstall: @echo "Installing ${PKGNAME} Python package ${WOMP} OpenMP (in pip dev mode)..." @cp .pyproject_cuda.toml pyproject.toml python -m pip install ${PIPOPTS} --editable . -vvv -endif # !usecuda +else # !usehip && usecuda +pyinstall: + @echo "Installing ${PKGNAME} Python package ${WOMP} OpenMP (in pip dev mode)..." + @cp .pyproject.toml pyproject.toml + python -m pip install ${PIPOPTS} --editable . -vvv +endif # !usehip && !usecuda uninstall: cppuninstall pyuninstall cppuninstall: - @echo "Uninstalling Triumvirate(-CUDA) C++ library/program..." + @echo "Uninstalling Triumvirate(-CUDA/HIP/HIPCUDA) C++ library/program..." @echo " removing builds..." @find ${DIR_BUILD} -mindepth 1 -maxdepth 1 ! -name ".git*" -exec rm -r {} + @@ -571,13 +666,21 @@ objects_: ${PROGOBJ}: ${PROGSRC} $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@ -ifndef usecuda -$(OBJS): ${DIR_BUILDOBJ}/%.o: ${DIR_PKG_SRC}/%.cpp | objects_ +ifdef usehip +ifdef usecuda +$(OBJS): ${DIR_BUILDOBJ}/%_hipcuda.o: ${DIR_PKG_SRC}/%.cpp | objects_ $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@ -else # usecuda +else # usehip && !usecuda +$(OBJS): ${DIR_BUILDOBJ}/%_hip.o: ${DIR_PKG_SRC}/%.cpp | objects_ + $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@ +endif # usehip && usecuda +else ifdef usecuda $(OBJS): ${DIR_BUILDOBJ}/%_cuda.o: ${DIR_PKG_SRC}/%.cpp | objects_ $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@ -endif # !usecuda +else # !usehip && usecuda +$(OBJS): ${DIR_BUILDOBJ}/%.o: ${DIR_PKG_SRC}/%.cpp | objects_ + $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@ +endif # !usehip && !usecuda -include $(DEPS) @@ -654,12 +757,12 @@ clean: buildclean testclean distclean runclean buildclean: cppclean pyclean cppclean: - @echo "Cleaning up Triumvirate(-CUDA) C++ build..." + @echo "Cleaning up Triumvirate(-CUDA/HIP/HIPCUDA) C++ build..." @echo " removing builds..." @find ${DIR_BUILD} -mindepth 1 -maxdepth 1 ! -name ".git*" -exec rm -r {} + pyclean: - @echo "Cleaning up Triumvirate(-CUDA) Python build..." + @echo "Cleaning up Triumvirate(-CUDA/HIP/HIPCUDA) Python build..." @echo " removing Cythonised C/C++ scripts..." @find ${DIR_PKG} -maxdepth 1 -name "*.cpp" -exec rm {} + @echo " removing Cythonised extensions..." @@ -673,7 +776,7 @@ pyclean: @find . -type d -name ".ipynb_checkpoints" -exec rm -r {} + testclean: - @echo "Cleaning up Triumvirate(-CUDA) tests..." + @echo "Cleaning up Triumvirate(-CUDA/HIP/HIPCUDA) tests..." @echo " removing test builds and outputs..." @$(RM) -r ${DIR_TESTBUILD}/* ${DIR_TESTOUT}/* @echo " removing pytest cache..." @@ -684,7 +787,7 @@ testclean: @$(RM) -r core distclean: - @echo "Cleaning up Triumvirate(-CUDA) distributions..." + @echo "Cleaning up Triumvirate(-CUDA/HIP/HIPCUDA) distributions..." @echo " removing distribution outputs..." @$(RM) -r ${DIR_DIST}/ @echo " removing wheels..." @@ -694,7 +797,7 @@ distclean: @find . -name "*.egg-info" -exec rm -r {} + runclean: - @echo "Cleaning up Triumvirate(-CUDA) runs..." + @echo "Cleaning up Triumvirate(-CUDA/HIP/HIPCUDA) runs..." @echo " removing compiled bytecode..." @find . -type d -name "__pycache__" -exec rm -r {} + @echo " removing core dumps..."