Skip to content

Commit

Permalink
[RF] Update RooFit backend benchmarks to CHEP 2023 state
Browse files Browse the repository at this point in the history
This commit brings in the exact code that was used to produce results
presented at CHEP 2023:

https://indico.jlab.org/event/459/contributions/11570/
  • Loading branch information
guitargeek committed May 17, 2023
1 parent c3be502 commit f61656f
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 96 deletions.
30 changes: 15 additions & 15 deletions root/roofit/roofit/RooFitUnBinnedBenchmarks.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -250,29 +250,29 @@ const auto unit = benchmark::kMillisecond;

#define ARGS UseRealTime()->Unit(unit)

BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing_FitLegacy")->Args({nEvents, 0, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing_FitLegacyNumCPU2")->Args({nEvents, 0, 2})->ARGS;
BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing_FitLegacyNumCPU4")->Args({nEvents, 0, 4})->ARGS;
BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing_FitCPU")->Args({nEvents, 1, 1})->ARGS;
#ifdef R__HAS_CUDA
BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing__BatchMode_CUDA")->Args({nEvents, 2, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing_FitCUDA")->Args({nEvents, 2, 1})->ARGS;
#endif
BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing__BatchMode_CPU")->Args({nEvents, 1, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing__NumCPU_1")->Args({nEvents, 0, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing__NumCPU_2")->Args({nEvents, 0, 2})->ARGS;
BENCHMARK(BM_RooFit_BDecayWithMixing)->Name("BDecayWithMixing__NumCPU_4")->Args({nEvents, 0, 4})->ARGS;

BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution_FitLegacy")->Args({nEvents, 0, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution_FitLegacyNumCPU2")->Args({nEvents, 0, 2})->ARGS;
BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution_FitLegacyNumCPU4")->Args({nEvents, 0, 4})->ARGS;
BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution_FitCPU")->Args({nEvents, 1, 1})->ARGS;
#ifdef R__HAS_CUDA
BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution__BatchMode_CUDA")->Args({nEvents, 2, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution_FitCUDA")->Args({nEvents, 2, 1})->ARGS;
#endif
BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution__BatchMode_CPU")->Args({nEvents, 1, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution__NumCPU_1")->Args({nEvents, 0, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution__NumCPU_2")->Args({nEvents, 0, 2})->ARGS;
BENCHMARK(BM_RooFit_BDecayGaussResolution)->Name("BDecayGaussResolution__NumCPU_4")->Args({nEvents, 0, 4})->ARGS;

BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitLegacy")->Args({nEvents, 0, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitLegacyNumCPU2")->Args({nEvents, 0, 2})->ARGS;
BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitLegacyNumCPU4")->Args({nEvents, 0, 4})->ARGS;
BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCPU")->Args({nEvents, 1, 1})->ARGS;
#ifdef R__HAS_CUDA
BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss__BatchMode_CUDA")->Args({nEvents, 2, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCUDA")->Args({nEvents, 2, 1})->ARGS;
#endif
BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss__BatchMode_CPU")->Args({nEvents, 1, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss__NumCPU_1")->Args({nEvents, 0, 1})->ARGS;
BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss__NumCPU_2")->Args({nEvents, 0, 2})->ARGS;
BENCHMARK(BM_RooFit_BDecayDoubleGauss)->Name("BDecayDoubleGauss__NumCPU_4")->Args({nEvents, 0, 4})->ARGS;

#undef ARGS

Expand Down
159 changes: 78 additions & 81 deletions root/roofit/roofit/benchRooFitBackends_make_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,87 +4,93 @@
import ROOT
import numpy as np
import json
import sys
from collections import defaultdict

ROOT.gROOT.SetBatch(True)

f = open("benchRooFitBackends.json", "rb")
json_name = sys.argv[1]
base_name = json_name.split(".")[0]

f = open(json_name, "rb")
json_object = json.load(f)["benchmarks"]
f.close()

labels_in_json = [
"Gaus_",
"GausXS_",
"AddPdf_",
"ProdPdf_",
"FitModel_",
]

scalar_time = []
vector_st_time = []
cuda_time = []
timing_dict = defaultdict(dict)

# Old values for ACAT 2021
# scalar_time = np.array([2632, 1069, 9784, 112000, 93000])
# vector_st_time = np.array([234, 116, 908, 12000, 15000])
# vector_mt_time = np.array([82, 39, 238, 3350, 4800])
# cuda_time = np.array([109, 63, 197, 2280, 3550])
labels_in_json = []

for bm in json_object:
name = bm["name"]
if "FitLegacy" in name:
scalar_time.append(bm["real_time"])
if "FitCPU" in name:
vector_st_time.append(bm["real_time"])
if "FitCUDA" in name:
cuda_time.append(bm["real_time"])

scalar_time = np.array(scalar_time)
vector_st_time = np.array(vector_st_time)
cuda_time = np.array(cuda_time)

labels = [
"Gaussian",
"Gaussian\n(with $\sigma$ as observable)",
"Gaussian\nplus exponential",
"Mixture 1:\n(gauss + gauss)\nx (gauss + poly)\n x gamma",
"Mixture 2:\ngamma + gamma\n + gauss + gauss + poly",
]

# We don't benchmark BatchMode("cpu") with multithreading, as this is not
# implemented properly yet. Set results to zeros.
vector_mt_time = np.zeros_like(vector_st_time)
name, backend = bm["name"].split("/")[0].split("_Fit")
timing_dict[name][backend] = bm["real_time"]
if len(labels_in_json) == 0 or labels_in_json[-1] != name:
labels_in_json.append(name)

# labels_in_json = [
# "Gaus",
# "GausXS",
# "AddPdf",
# "ProdPdf",
# "FitModel",
# ]

backends = ["CPU", "CUDA"]
reference_backend = "Legacy"

timings = {}

print(timing_dict)

for b in backends + [reference_backend]:
l = list()

for label in labels_in_json:
l.append(timing_dict[label][b])

timings[b] = np.array(l)

# normalize to ratios
vector_st_time = scalar_time / vector_st_time
# vector_mt_time = scalar_time / vector_mt_time
cuda_time = scalar_time / cuda_time
scalar_time = scalar_time / scalar_time

nx = len(vector_st_time)
labels = [
"Gaussian",
"#splitline{Gaussian}{(with #sigma as observable)}",
"#splitline{Gaussian}{plus exponential}",
"#splitline{Mixture 1:}{#splitline{(gauss + gauss)}{#splitline{x (gauss + poly)}{ x gamma}}}",
"#splitline{#splitline{Mixture 2:}{gamma + gamma}}{ + gauss + gauss + poly}",
]
for b in backends:
timings[b] = timings[reference_backend] / timings[b]

print(timings)

labels_dict = {
"Gaus": "Gaussian",
"GausXS": "#splitline{Gaussian}{(with #sigma as observable)}",
"AddPdf": "#splitline{Gaussian}{plus exponential}",
"ProdPdf": "#splitline{Mixture 1:}{#splitline{(gauss + gauss)}{#splitline{x (gauss + poly)}{ x gamma}}}",
"FitModel": "#splitline{#splitline{Mixture 2:}{gamma + gamma}}{ + gauss + gauss + poly}",
"BDecayWithMixing": "#splitline{RooBMixDecay}{perfect resolution}",
"BDecayGaussResolution": "#splitline{RooBMixDecay}{Gaussian res.}",
"BDecayDoubleGauss": "#splitline{RooBMixDecay}{Double-Gaussian res.}",
}

labels = [labels_dict[l] for l in labels_in_json]

nx = len(timings[reference_backend])
nb = len(backends)

# cb = ROOT.TCanvas("cb","cb",900,400)
cb = ROOT.TCanvas("cb", "cb", int(900 * 2), int(400 * 2))
cb = ROOT.TCanvas("cb", "cb", int(180 * len(labels) * 2), int(400 * 2))
cb.SetGrid()
cb.SetBottomMargin(0.16)

# ROOT.gROOT.GetColor(9).SetRGB(12.2 / 100, 46.7 / 100, 70.6 / 100)
# ROOT.gROOT.GetColor(46).SetRGB(100.0 / 100, 49.8 / 100, 5.5 / 100)
# ROOT.gROOT.GetColor(9).SetRGB(17.3 / 100, 62.7 / 100, 17.3 / 100)

y_max = 35
y_max = 1
for b in backends:
y_max = max(y_max, np.max(timings[b]) + 3)

ROOT.gStyle.SetHistMinimumZero()

h1b = ROOT.TH1F("h1b", "RooFit: speedup in benchmark fits relative to scalar mode (1 million events)", nx, 0, nx)
h1b = ROOT.TH1F(
"h1b", "RooFit: speedup in benchmark fits with BatchMode() relative to old RooFit (1 million events)", nx, 0, nx
)
h1b.SetFillColor(9)
h1b.SetBarWidth(0.8 / 3)
h1b.SetBarWidth(0.8 / nb)
h1b.SetBarOffset(0.1)
h1b.SetStats(0)
h1b.SetMinimum(-0.1)
Expand All @@ -95,52 +101,43 @@
h1b.GetXaxis().SetLabelSize(0.055)
# h1b.GetXaxis().SetLabelOffset(0.1)

for i in range(len(vector_st_time) + 1):
h1b.SetBinContent(i, vector_st_time[i - 1])
h1b.GetXaxis().SetBinLabel(i, labels[i - 1])
# h1b.GetXaxis().ChangeLabel(
# labNum = i,
# labAngle = -1.,
# labSize = -1.,
# labAlign = -1,
# labColor = -1,
# labFont = -1,
# labText = "")
# # labText = labels[i-1])
for i in range(nx):
h1b.SetBinContent(i + 1, timings["CPU"][i])
h1b.GetXaxis().SetBinLabel(i + 1, labels[i])

h1b.Draw("b")

h2b = ROOT.TH1F("h2b", "h2b", nx, 0, nx)

h2b.SetFillColor(46)
h2b.SetBarWidth(0.8 / 3)
h2b.SetBarOffset(0.1 + 0.8 / 3)
h2b.SetBarWidth(0.8 / nb)
h2b.SetBarOffset(0.1 + 2 * 0.8 / nb)
h2b.SetStats(0)
h2b.SetMinimum(-1)
h2b.SetMaximum(y_max)
for i in range(len(vector_mt_time) + 1):
h2b.SetBinContent(i, vector_mt_time[i - 1])
for i in range(nx):
h2b.SetBinContent(i + 1, 0.0)
h2b.Draw("b same")

h3b = ROOT.TH1F("h3b", "h3b", nx, 0, nx)

h3b.SetFillColor(8)
h3b.SetBarWidth(0.8 / 3)
h3b.SetBarOffset(0.1 + 2 * 0.8 / 3)
h3b.SetBarWidth(0.8 / nb)
h3b.SetBarOffset(0.1 + 1 * 0.8 / nb)
h3b.SetStats(0)
h3b.SetMinimum(-1)
h3b.SetMaximum(y_max)
for i in range(len(vector_mt_time) + 1):
h3b.SetBinContent(i, cuda_time[i - 1])
for i in range(nx):
h3b.SetBinContent(i + 1, timings["CUDA"][i])

h3b.Draw("b same")

legend = ROOT.TLegend(0.1, 0.7, 0.46, 0.9)
# legend->SetHeader("The Legend Title","C"); // option "C" allows to center the heade
legend.AddEntry(h1b, "Vectorized (single thread)", "f")
legend.AddEntry(h2b, "Vectorized (24 threads)", "f")
legend.AddEntry(h1b, "CPU (AMD Ryzen 9 3900)", "f")
# legend.AddEntry(h2b, "Vectorized (24 threads)", "f")
legend.AddEntry(h3b, "CUDA (RTX 3070 gaming GPU)", "f")
legend.Draw()

cb.SaveAs("benchRooFitBackends.png")
cb.SaveAs("benchRooFitBackends.pdf")
cb.SaveAs(base_name + ".png")
cb.SaveAs(base_name + ".pdf")

0 comments on commit f61656f

Please sign in to comment.