-
Notifications
You must be signed in to change notification settings - Fork 16
/
make.sh
executable file
·352 lines (322 loc) · 10.5 KB
/
make.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
#!/bin/bash
# Make script for KHARMA
# Used to set sensible default flags and call cmake/make
# Usage:
# ./make.sh [option1] [option2]
#
# clean: BUILD by re-running cmake, restarting the make process from nothing.
# That is, "./make.sh clean" == "make clean" + "make"
# Always use 'clean' when switching Release<->Debug or OpenMP<->CUDA
# cuda: Build for GPU with CUDA
# sycl: Build for GPU with SYCL
# hip: Build for GPU with HIP
# debug: Configure with debug flags: mostly array bounds checks
# Note, though, many sanity checks during the run are
# actually *runtime* parameters e.g. verbose, flag_verbose, etc
# trace: Configure with execution tracing: print at the beginning and end
# of most host-side function calls during a step
# hdf5: Download & compile HDF5, rather than looking for a system version
# cleanhdf5: Reconfigure HDF5 from scratch, rather than just recompiling
# nompi: Disable MPI and don't search/link it
# noimplicit: Disable implicit solver, avoids pulling in Kokkos-kernels
# nocleanup: Disable magnetic field cleaning code for resizing, avoids
# pulling in some unofficial Parthenon code.
# Many machine files have additional options, check machines/machinename.sh
# Make processes to use
# Set conservatively as nvcc/nvc++ uses a *lot* of memory
# Set in environment or override in machine file
NPROC=${NPROC:-8}
### Load basic stuff ###
HOST=$(hostname -f)
if [ -z $HOST ]; then
HOST=$(hostname)
fi
ARGS="$*"
SOURCE_DIR=$(dirname "$(readlink -f "$0")")
# A machine config in .config overrides our defaults
if [ -f $HOME/.config/kharma.sh ]; then
source $HOME/.config/kharma.sh
else
for machine in $SOURCE_DIR/machines/*.sh
do
source $machine
done
fi
# Default to compiling for the host architecture
# Always better to specify, though, for cross-compile/older Kokkos support
EXTRA_FLAGS="-DKokkos_ARCH_${HOST_ARCH:-NATIVE}=ON $EXTRA_FLAGS"
# Kokkos does *not* support compiling for multiple devices!
# But if they ever do, you can separate a list of DEVICE_ARCH
# with commas.
if [[ -v DEVICE_ARCH ]]; then
readarray -t arch_array < <(awk -F',' '{ for( i=1; i<=NF; i++ ) print $i }' <<<"$DEVICE_ARCH")
for arch in "${arch_array[@]}"; do
EXTRA_FLAGS="-DKokkos_ARCH_${arch}=ON $EXTRA_FLAGS"
done
fi
if [[ "$ARGS" == *"trace"* ]]; then
EXTRA_FLAGS="-DKHARMA_TRACE=1 $EXTRA_FLAGS"
fi
if [[ "$ARGS" == *"nompi"* ]]; then
EXTRA_FLAGS="-DKHARMA_DISABLE_MPI=1 $EXTRA_FLAGS"
fi
if [[ "$ARGS" == *"noimplicit"* ]]; then
EXTRA_FLAGS="-DKHARMA_DISABLE_IMPLICIT=1 $EXTRA_FLAGS"
fi
if [[ "$ARGS" == *"nocleanup"* ]]; then
EXTRA_FLAGS="-DKHARMA_DISABLE_CLEANUP=1 $EXTRA_FLAGS"
fi
if [[ "$ARGS" == *"split_implicit"* ]]; then
EXTRA_FLAGS="-DKHARMA_SPLIT_IMPLICIT_SOLVE=1 $EXTRA_FLAGS"
fi
### Enivoronment Prep ###
if [[ "$(which python3 2>/dev/null)" == *"conda"* ]]; then
echo "make.sh note:"
echo "It looks like you have Anaconda loaded."
echo "This is usually okay, but double-check the line 'Found MPI_CXX:' below!"
echo
fi
# Save arguments if we've changed them
# Used in run.sh for loading the same modules/etc.
if [[ "$ARGS" == *"clean"* ]]; then
echo "$ARGS" > $SOURCE_DIR/make_args
fi
# Choose configuration
if [[ "$ARGS" == *"debug"* ]]; then
TYPE=Debug
else
TYPE=Release
fi
### Set KHARMA Flags ###
SCRIPT_DIR=$( dirname "$0" )
cd $SCRIPT_DIR
SCRIPT_DIR=$PWD
# Generally best to set CXX_NATIVE yourself if you want to be sure,
# but we try to be smart about loading the most specific/advanced/
# capable compiler available in PATH.
if [[ -z "$CXX_NATIVE" ]]; then
# If Cray environment is loaded, use their wrappers
if which CC >/dev/null 2>&1; then
CXX_NATIVE=CC
C_NATIVE=cc
# Don't set an OMP flag to use
# This could call through to any compiler, & sometimes (Frontier)
# we want no OpenMP at all
# Prefer Intel oneAPI compiler over legacy, both over generic
elif which icpx >/dev/null 2>&1; then
CXX_NATIVE=icpx
C_NATIVE=icx
OMP_FLAG="-fiopenmp"
elif which icpc >/dev/null 2>&1; then
CXX_NATIVE=icpc
C_NATIVE=icc
OMP_FLAG="-qopenmp"
# Prefer NVHPC over generic compilers
elif which nvc++ >/dev/null 2>&1; then
CXX_NATIVE=nvc++
C_NATIVE=nvc
OMP_FLAG="-mp"
# Maybe we overwrote 'c++' to point to something
# Usually this is GCC on Linux systems, which is fine
elif which cpp >/dev/null 2>&1; then
CXX_NATIVE=c++
C_NATIVE=cc
OMP_FLAG="-fopenmp"
# Otherwise, trusty system GCC
else
CXX_NATIVE=g++
C_NATIVE=gcc
OMP_FLAG="-fopenmp"
fi
# clang/++ will never be used automatically;
# blame Apple, who don't support OpenMP
fi
# Disable OpenMP for HIP compiles, it gets confused
# and thinks we want to use OMP 5.0 offload stuff
if [[ "$ARGS" != *"hip"* ]]; then
export CXXFLAGS="$OMP_FLAG $CXXFLAGS"
fi
# Set compilers
# Options are named different so we can override w/wrapper for CUDA
export CXX="$CXX_NATIVE"
export CC="$C_NATIVE"
# CUDA loop options: MANUAL1D_LOOP > MDRANGE_LOOP, TPTTR_LOOP & TPTTRTVR_LOOP don't compile
# Inner loop must be TVR_INNER_LOOP
# OpenMP loop options for KNL:
# Outer: SIMDFOR_LOOP;MANUAL1D_LOOP;MDRANGE_LOOP;TPTTR_LOOP;TPTVR_LOOP;TPTTRTVR_LOOP
# Inner: SIMDFOR_INNER_LOOP;TVR_INNER_LOOP
if [[ "$ARGS" == *"sycl"* ]]; then
OUTER_LAYOUT="MANUAL1D_LOOP"
INNER_LAYOUT="TVR_INNER_LOOP"
ENABLE_OPENMP="ON"
ENABLE_CUDA="OFF"
ENABLE_SYCL="ON"
ENABLE_HIP="OFF"
elif [[ "$ARGS" == *"hip"* ]]; then
OUTER_LAYOUT="MANUAL1D_LOOP"
INNER_LAYOUT="TVR_INNER_LOOP"
ENABLE_OPENMP="OFF"
ENABLE_CUDA="OFF"
ENABLE_SYCL="OFF"
ENABLE_HIP="ON"
elif [[ "$ARGS" == *"cuda"* ]]; then
export CXX="$SCRIPT_DIR/bin/nvcc_wrapper"
if [[ "$ARGS" == *"wrapper_dryrun"* ]]; then
export CXXFLAGS="-dryrun $CXXFLAGS"
echo "Dry-running the nvcc wrapper with $CXXFLAGS"
fi
export NVCC_WRAPPER_DEFAULT_COMPILER="$CXX_NATIVE"
OUTER_LAYOUT="MANUAL1D_LOOP"
INNER_LAYOUT="TVR_INNER_LOOP"
ENABLE_OPENMP="ON"
ENABLE_CUDA="ON"
ENABLE_SYCL="OFF"
ENABLE_HIP="OFF"
elif [[ "$ARGS" == *"nvc++"* ]]; then
OUTER_LAYOUT="MANUAL1D_LOOP"
INNER_LAYOUT="TVR_INNER_LOOP"
ENABLE_OPENMP="ON"
ENABLE_CUDA="ON"
ENABLE_SYCL="OFF"
ENABLE_HIP="OFF"
else
OUTER_LAYOUT="MDRANGE_LOOP"
INNER_LAYOUT="SIMDFOR_INNER_LOOP"
ENABLE_OPENMP="ON"
ENABLE_CUDA="OFF"
ENABLE_SYCL="OFF"
ENABLE_HIP="OFF"
fi
# Allow for a custom linker program, but use CXX by
# default as system linker may be older/incompatible
if [[ -v LINKER ]]; then
EXTRA_FLAGS="$EXTRA_FLAGS -DCMAKE_LINKER=$LINKER"
fi
if [[ "$ARGS" == *"special_link_line"* ]]; then
EXTRA_FLAGS="$EXTRA_FLAGS -DCMAKE_CXX_LINK_EXECUTABLE='<CMAKE_LINKER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>'"
fi
# Avoid warning on nvcc pragmas Intel doesn't like
if [[ $CXX == "icpc" ]]; then
export CXXFLAGS="-Wno-unknown-pragmas $CXXFLAGS"
fi
# Avoid icpx's astonishing DEFAULT -ffast-math
if [[ $CXX == "icpx" ]]; then
export CXXFLAGS="-fno-fast-math $CXXFLAGS"
fi
### Build HDF5 ###
# If we're building HDF5, do it after we set *all flags*
if [[ "$ARGS" == *"hdf5"* && "$ARGS" == *"clean"* && "$ARGS" != *"dryrun"* ]]; then
H5VER=1.14.2
H5VERU=1_14_2
cd external
# Allow complete reconfigure (for switching compilers, takes longer)
if [[ "$ARGS" == *"cleanhdf5"* ]]; then
rm -rf hdf5-${H5VER}/
fi
# Download if needed
if [ ! -f hdf5-${H5VER}.tar.gz ]; then
curl https://hdf-wordpress-1.s3.amazonaws.com/wp-content/uploads/manual/HDF5/HDF5_${H5VERU}/src/hdf5-${H5VER}.tar.gz -o hdf5-${H5VER}.tar.gz
fi
# Unpack if needed (or deleted)
if [ ! -d hdf5-${H5VER}/ ]; then
tar xf hdf5-${H5VER}.tar.gz
fi
cd hdf5-${H5VER}/
# TODO better ensure we're using C_NATIVE underneath. e.g. MPI_CFLAGS with -cc
if [[ "$ARGS" == *"nompi"* ]]; then
HDF_CC=$C_NATIVE
HDF_EXTRA=""
else
if [[ "$C_NATIVE" == *"icx"* ]]; then
HDF_CC=mpiicx
elif [[ "$C_NATIVE" == *"icc"* ]]; then
HDF_CC=mpiicc
elif [[ "$C_NATIVE" == "cc" ]]; then
# Cray wrappers include MPI
HDF_CC=cc
else
HDF_CC=mpicc
fi
HDF_EXTRA="--enable-parallel"
fi
echo Configuring HDF5...
export CFLAGS="-fPIC $CFLAGS"
CC=$HDF_CC sh configure -C $HDF_EXTRA --prefix=$SOURCE_DIR/external/hdf5 --enable-build-mode=production \
--disable-dependency-tracking --disable-hl --disable-tests --disable-tools --disable-shared --disable-deprecated-symbols > build-hdf5.log
sleep 1
echo "Building HDF5 (probably 30s-2min)"
# Compiling C takes less memory
if [[ -v $NPROC ]]; then
make -j$(( $NPROC * 2 )) >> build-hdf5.log 2>&1
else
make -j >> build-hdf5.log 2>&1
fi
make install >> build-hdf5.log 2>&1
make clean >> build-hdf5.log 2>&1
cd ../..
echo Built HDF5
fi
if [[ "$ARGS" == *"hdf5"* ]]; then
PREFIX_PATH="$SOURCE_DIR/external/hdf5;$PREFIX_PATH"
fi
### Build KHARMA ###
# If we're doing a clean build, prep the source and
# delete the build directory
if [[ "$ARGS" == *"clean"* ]]; then
# Should do this manually when compiling on backend nodes!
if [ ! -f external/parthenon/CMakeLists.txt ]; then
git submodule update --recursive --init
fi
# Patch parthenon to use KHARMA's coordinates, anything incidental
cd external/parthenon
if [[ $(( $(git --version | cut -d '.' -f 2) > 35 )) == "1" ]]; then
git apply --quiet ../patches/parthenon-*.patch
else
echo "make.sh note: You may see errors applying patches below. These are normal."
git apply ../patches/parthenon-*.patch
fi
cd -
# HIP requires device-capable variant functions
if [[ "$ARGS" == *"hip"* ]]; then
cd external/variant
if [[ $(( $(git --version | cut -d '.' -f 2) > 35 )) == "1" ]]; then
git apply --quiet ../patches/variant-hip.patch
else
git apply ../patches/variant-hip.patch
fi
cd -
# HIP also prefers new Kokkos.
# TODO work something out if on HIP machines w/o internet
cd external/parthenon
git submodule update --remote external/Kokkos
cd -
fi
rm -rf build
fi
mkdir -p build
cd build
if [[ "$ARGS" == *"clean"* ]]; then
if [[ "$ARGS" == *"dryrun"* ]]; then
set -x
fi
cmake ..\
-DCMAKE_C_COMPILER="$CC" \
-DCMAKE_CXX_COMPILER="$CXX" \
-DCMAKE_PREFIX_PATH="$PREFIX_PATH;$CMAKE_PREFIX_PATH" \
-DCMAKE_BUILD_TYPE=$TYPE \
-DPAR_LOOP_LAYOUT=$OUTER_LAYOUT \
-DPAR_LOOP_INNER_LAYOUT=$INNER_LAYOUT \
-DKokkos_ENABLE_OPENMP=$ENABLE_OPENMP \
-DKokkos_ENABLE_CUDA=$ENABLE_CUDA \
-DKokkos_ENABLE_SYCL=$ENABLE_SYCL \
-DKokkos_ENABLE_HIP=$ENABLE_HIP \
$EXTRA_FLAGS
if [[ "$ARGS" == *"dryrun"* ]]; then
set +x
exit
fi
fi
if [[ "$ARGS" != *"dryrun"* ]]; then
make -j$NPROC
cp kharma/kharma.* ..
fi