Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CERN Summer Student Project] TMVA BDT Benchmarking #231

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions root/tmva/tmva/BoostedDTBenchmarks.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/* Authored by Xandru Mifsud (CERN Summer Student) and Lorenzo Moneta (Summer Project Supervisor) */

#include "TSystem.h"
#include "TTree.h"
#include "TFile.h"

#include "TMVA/RReader.hxx"
#include "TMVA/RTensorUtils.hxx"
#include "TMVA/DataLoader.h"
#include "TMVA/Factory.h"
#include "TMVA/MethodBase.h"
#include "TMVA/Types.h"

#include "benchmark/benchmark.h"

#include "MakeRandomTTree.h"

using namespace TMVA::Experimental;
using namespace std;

static void BM_TMVA_BDTTraining(benchmark::State &state){
// Parameters
UInt_t nVars = 4;
UInt_t nEvents = 500;
Bool_t mem_stats = (state.range(0) == 2000) && (state.range(1) == 10) && (state.range(2) == 1);

// Memory benchmark data placeholder
ProcInfo_t pinfo;
Long_t init_mem_res, term_mem_res; init_mem_res = term_mem_res = 0;
double mem_res = 0.0;

// Open output file
TString outfileName( "bdt_bench_train_output.root" );
TFile* outputFile = TFile::Open(outfileName, "RECREATE");

// Set up (generate one extra event for testing)
TTree *sigTree = genTree("sigTree", nEvents, nVars,0.3, 0.5, 100);
TTree *bkgTree = genTree("bkgTree", nEvents, nVars,-0.3, 0.5, 101);

// Prepare a DataLoader instance, registering the signal and background TTrees
auto *dataloader = new TMVA::DataLoader("bdt-bench");
dataloader->AddSignalTree(sigTree);
dataloader->AddBackgroundTree(bkgTree);

// Register variables in dataloader, using naming convention for randomly generated TTrees in MakeRandomTTree.h
for(UInt_t i = 0; i < nVars; i++){
string var_name = "var" + to_string(i);
string var_leaflist = var_name + "/F";

dataloader->AddVariable(var_name.c_str(), 'D');
}

// For each benchmark we specifically ignore this test event such that we exclusively benchmark training.
dataloader->PrepareTrainingAndTestTree("",
Form("SplitMode=Block:nTrain_Signal=%i:nTrain_Background=%i:!V", nEvents, nEvents));

// Benchmarking
UInt_t iter_c = 0;
for(auto _: state){
ROOT::EnableImplicitMT(state.range(2));

// Create factory instance
auto factory = new TMVA::Factory("bdt-bench", outputFile,
"Silent:!DrawProgressBar:AnalysisType=Classification");

// Get current memory usage statistics after setup
if(mem_stats && iter_c == 0){
gSystem->GetProcInfo(&pinfo);
init_mem_res = pinfo.fMemResident;
}

// Construct training options string
string opts = "!V:!H:NTrees=" + to_string(state.range(0)) + ":MaxDepth=" + to_string(state.range(1));

// Train a TMVA method
string key = to_string(state.range(0)) + "_" + to_string(state.range(1)) + "_" + to_string(state.range(2));
auto method = factory->BookMethod(dataloader, TMVA::Types::kBDT, "BDT_" + key, opts);
TMVA::Event::SetIsTraining(kTRUE);
method->TrainMethod();

// Maintain Memory statistics (independent from Google Benchmark)
if(mem_stats && iter_c == 0){
gSystem->GetProcInfo(&pinfo);
term_mem_res = pinfo.fMemResident;
mem_res += (double) (term_mem_res - init_mem_res);
}

TMVA::Event::SetIsTraining(kFALSE);
method->Data()->DeleteAllResults(TMVA::Types::kTraining, method->GetAnalysisType());

// Destroy factory entirely
factory->DeleteAllMethods();
factory->fMethodsMap.clear();
delete factory;

// DEBUG
// cout << "[DEBUG] " << key << ": res_mem_init = " << (double) init_mem_res << ", res_mem_term = " << (double) term_mem_res << endl;

iter_c++;
}

if(mem_stats){
mem_res *= iter_c;
state.counters["Resident Memory"] = benchmark::Counter(mem_res, benchmark::Counter::kAvgIterations);
}

// Teardown
delete sigTree;
delete bkgTree;

outputFile->Close();
delete outputFile;
}
BENCHMARK(BM_TMVA_BDTTraining)->ArgsProduct({{2000, 1000, 400, 100}, {10, 8, 6, 4, 2}, {1, 4, 8, 16}});

static void BM_TMVA_BDTTesting(benchmark::State &state){
// Parameters
UInt_t nVars = 4;
UInt_t nEvents = 500;
Bool_t mem_stats = (state.range(0) == 2000) && (state.range(1) == 10) && (state.range(2) == 1);

// Memory benchmark data placeholder
ProcInfo_t pinfo;
Long_t init_mem_res, term_mem_res; init_mem_res = term_mem_res = 0;
double mem_res = 0.0;

// Open output file
TString outfileName( "bdt_bench_test_output.root" );
TFile* outputFile = TFile::Open(outfileName, "RECREATE");

// Set up
auto inputFile = new TFile("bdt_bench_test_input.root","RECREATE");
TTree *testTree = genTree("testTree", nEvents, nVars,0.3, 0.5, 102, false);
testTree->Write();
delete testTree;
inputFile->Close();
delete inputFile;

ROOT::RDataFrame testDF("testTree","bdt_bench_test_input.root");
auto testTensor = AsTensor<Float_t>(testDF);

// Benchmarking
UInt_t iter_c = 0;
for(auto _: state){
ROOT::EnableImplicitMT(state.range(2));

// Test a TMVA method via RReader
string key = to_string(state.range(0)) + "_" + to_string(state.range(1)) + "_" + to_string(state.range(2));

// Get current memory usage statistics after setup
if(mem_stats && iter_c == 0){
gSystem->GetProcInfo(&pinfo);
init_mem_res = pinfo.fMemResident;
}

RReader model("./bdt-bench/weights/bdt-bench_BDT_" + key + ".weights.xml");
model.Compute(testTensor);

// Maintain Memory statistics (independent from Google Benchmark)
if(mem_stats && iter_c == 0){
gSystem->GetProcInfo(&pinfo);
term_mem_res = pinfo.fMemResident;
mem_res += (double) (term_mem_res - init_mem_res);
}

iter_c++;
}

if(mem_stats){
mem_res *= iter_c;
state.counters["Resident Memory"] = benchmark::Counter(mem_res, benchmark::Counter::kAvgIterations);
}

// Teardown
outputFile->Close();
}
BENCHMARK(BM_TMVA_BDTTesting)->ArgsProduct({{2000, 1000, 400, 100}, {10, 8, 6, 4, 2}, {1, 4, 8, 16}});

BENCHMARK_MAIN();
5 changes: 5 additions & 0 deletions root/tmva/tmva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ if(ROOT_tmva_FOUND)
CrossValidationBenchmarks.cxx
LABEL short
LIBRARIES Core Tree MathCore TMVA)

RB_ADD_GBENCHMARK(BoostedDTBenchmarks
BoostedDTBenchmarks.cxx
LABEL short
LIBRARIES Core Tree TreePlayer MathCore RIO XMLIO ROOTDataFrame TMVA)
endif()

if(ROOT_tmva_FOUND AND ROOT_tmva-cpu_FOUND AND ROOT_imt_FOUND)
Expand Down
41 changes: 41 additions & 0 deletions root/tmva/tmva/MakeRandomTTree.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#include "TRandom3.h"
#include "TTree.h"

// Utility function for generating a random TTree with Gaussian float data, for the specified number of points and vars
TTree* genTree(std::string name, UInt_t nPoints, const UInt_t nVars, Double_t offset, Double_t scale = 0.3, UInt_t seed = 100,
bool evtCol = true){
// Initialisation
TRandom3 rng(seed);
Float_t vars[nVars]; for(auto& var: vars){ var = 0.0;}
UInt_t id = 0;

// Create new TTree instance
auto data = new TTree(name.c_str(),name.c_str());

// Add a branch corresponding to each variable
for(UInt_t i = 0; i < nVars; i++){
std::string var_name = "var" + std::to_string(i);
std::string var_leaflist = var_name + "/F";

data->Branch(var_name.c_str(), vars + i, var_leaflist.c_str());
}

// And add a branch for the (unique) Event identifier
if(evtCol){
data->Branch("EventNumber", &id, "EventNumber/I");
}

// Populate TTree instance with Gaussian data
for(UInt_t j = 0; j < nPoints; j++){
for(UInt_t i = 0; i < nVars; i++){
vars[i] = rng.Gaus(offset, scale);
}

data->Fill();
id++;
}

// Important: Disconnects the tree from the memory locations of vars[i]
data->ResetBranchAddresses();
return data;
}