diff --git a/CHANGELOG b/CHANGELOG index 3b9ee0843..863c17377 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ List of features / changes made / release notes, in reverse chronological order. If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately). +* new benchmarker perftest/spreadtestndall sweeps all kernel widths (M Barbone). * cufinufft now supports modeord(type 1,2 only): 0 CMCL-style increasing mode order, 1 FFT-style mode order. * New doc page: migration guide from NFFT3 (2d1 case only). diff --git a/docs/devnotes.rst b/docs/devnotes.rst index 0dd042461..f95729a5a 100644 --- a/docs/devnotes.rst +++ b/docs/devnotes.rst @@ -19,7 +19,7 @@ Developer notes * If you add a new option field (recall it must be plain C style only, no special types) to ``include/finufft_opts.h``, don't forget to add it to ``include/finufft.fh``, ``include/finufft_mod.f90``, ``matlab/finufft.mw``, ``python/finufft/_finufft.py``, and the Julia interface, as well a paragraph describing its use in the docs. Also to set its default value in ``src/finufft.cpp``. You will then need to regenerate the docs as in ``docs/README``. -* For testing and performance measuring routines see ``test/README`` and ``perftest/README``. We need more of the latter, eg, something making performance graphs that enable rapid eyeball comparison of various settings/machines. +* For testing and performance measuring routines see ``test/README`` and ``perftest/README``. We need more of the latter, eg, something making performance graphs that enable rapid eyeball comparison of various settings/machines. Marco is working on that. * Continuous Integration (CI). See files for this in ``.github/workflows/``. It currently tests the default ``makefile`` settings in linux, and three other ``make.inc.*`` files covering OSX and Windows (MinGW). CI does not test build the variant OMP=OFF. The dev should test these locally. Likewise, the Julia wrapper is separate and thus not tested in CI. We have added ``JenkinsFile`` for the GPU CI via python wrappers. diff --git a/makefile b/makefile index 2d992e3aa..8db73a45e 100644 --- a/makefile +++ b/makefile @@ -269,11 +269,17 @@ perftest/%f: perftest/%.cpp $(DYNLIB) # spreader only test, double/single (good for self-contained work on spreader) ST=perftest/spreadtestnd +STA=perftest/spreadtestndall STF=$(ST)f +STAF=$(STA)f $(ST): $(ST).cpp $(SOBJS) $(SOBJS_PI) $(CXX) $(CXXFLAGS) ${LDFLAGS} $< $(SOBJS) $(SOBJS_PI) $(LIBS) -o $@ $(STF): $(ST).cpp $(SOBJSF) $(SOBJS_PI) $(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE $< $(SOBJSF) $(SOBJS_PI) $(LIBS) -o $@ +$(STA): $(STA).cpp $(SOBJS) $(SOBJS_PI) + $(CXX) $(CXXFLAGS) ${LDFLAGS} $< $(SOBJS) $(SOBJS_PI) $(LIBS) -o $@ +$(STAF): $(STA).cpp $(SOBJSF) $(SOBJS_PI) + $(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE $< $(SOBJSF) $(SOBJS_PI) $(LIBS) -o $@ spreadtest: $(ST) $(STF) # run one thread per core... (escape the $ to get single $ in bash; one big cmd) (export OMP_NUM_THREADS=$$(perftest/mynumcores.sh) ;\ @@ -285,16 +291,19 @@ spreadtest: $(ST) $(STF) $(STF) 1 8e6 8e6 1e-3 ;\ $(STF) 2 8e6 8e6 1e-3 ;\ $(STF) 3 8e6 8e6 1e-3 ) +# smaller test of spreadinterp various tols, precs, kermeths... spreadtestall: $(ST) $(STF) (cd perftest; ./spreadtestall.sh) - +# Marco's sweep through kernel widths (ie tols)... +spreadtestndall: $(STA) $(STAF) + (cd perftest; ./multispreadtestndall.sh) bigtest: perftest/big2d2f @echo "\nRunning >2^31 size example (takes 30 s and 30 GB RAM)..." perftest/big2d2f PERFEXECS := $(basename $(wildcard test/finufft?d_test.cpp)) PERFEXECS += $(PERFEXECS:%=%f) -perftest: $(ST) $(STF) $(PERFEXECS) bigtest +perftest: $(ST) $(STF) $(PERFEXECS) spreadtestndall bigtest # here the tee cmd copies output to screen. 2>&1 grabs both stdout and stderr... (cd perftest ;\ ./spreadtestnd.sh 2>&1 | tee results/spreadtestnd_results.txt ;\ diff --git a/perftest/README b/perftest/README index 1f63f7c4f..907553f4d 100644 --- a/perftest/README +++ b/perftest/README @@ -1,11 +1,16 @@ Performance and development test directory for FINUFFT. +spreadtestnd : time spread & interp for given dim, tol, etc. +spreadtestndall : time spread or interp sweeping over all tols (w), given dim. + [note the above two differ in 4th cmd-line arg being "tol" vs "dir"] big2d2f : tests int64_t (8byte int) indexing, ie data size > 2^31. Scripts: +spreadtestall.sh : rapid test of spreadtestnd in all cases. spreadtestnd.sh : performance test of spreader only, in dims 1,2, or 3. nuffttestnd.sh : performance test of NUFFT library, in dims 1,2, or 3. mycpuinfo.sh : prints info about the CPU +multispreadtestndall.sh : runs Marco's w-sweeping scripts all dims, precs. Possibly obsolete scripts (for developers): highaspect3d_test.sh : comparing various pizza-box orientations for speed diff --git a/perftest/multispreadtestndall.sh b/perftest/multispreadtestndall.sh new file mode 100755 index 000000000..90306f199 --- /dev/null +++ b/perftest/multispreadtestndall.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# simple driver for Marco's sweeping-w spreadtest variant, all precs & dims. +# used my the makefile. +# all avail threads for now. +# human has to check the output for now. +# Barnett 6/4/24 + +M=1e7 # problem size (sets both # NU pts and # U modes); it's a string +N=1e6 # num U grid pts + +./spreadtestndall 1 $M $N 1 1 +./spreadtestndall 1 $M $N 2 1 +./spreadtestndall 2 $M $N 1 1 +./spreadtestndall 2 $M $N 2 1 +./spreadtestndall 3 $M $N 1 1 +./spreadtestndall 3 $M $N 2 1 +./spreadtestndallf 1 $M $N 1 1 +./spreadtestndallf 1 $M $N 2 1 +./spreadtestndallf 2 $M $N 1 1 +./spreadtestndallf 2 $M $N 2 1 +./spreadtestndallf 3 $M $N 1 1 +./spreadtestndallf 3 $M $N 2 1