From 4bd54a7c4d376ca4cc991c88bc63ed9f6b598bad Mon Sep 17 00:00:00 2001 From: Paul Buehler Date: Fri, 6 Dec 2024 13:34:51 +0100 Subject: [PATCH 01/19] Fix particle accumulation bug in HepMC + error handling (#13768) * Reset mParticles before filled with new set of particles * Return with false instead of stopping execution in case of problems --- Generators/src/GeneratorHepMC.cxx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Generators/src/GeneratorHepMC.cxx b/Generators/src/GeneratorHepMC.cxx index 4f38b01811f98..2076910b2bf5f 100644 --- a/Generators/src/GeneratorHepMC.cxx +++ b/Generators/src/GeneratorHepMC.cxx @@ -159,7 +159,7 @@ Bool_t GeneratorHepMC::generateEvent() tries++; } while (tries < max_tries); - LOG(fatal) << "HepMC event gen failed (Does the file/stream have enough events)?"; + LOG(error) << "HepMC event gen failed (Does the file/stream have enough events)?"; /** failure **/ return false; @@ -286,6 +286,7 @@ Bool_t GeneratorHepMC::importParticles() } /** loop over particles **/ + mParticles.clear(); auto particles = mEvent->particles(); for (int i = 0; i < particles.size(); ++i) { From aa4898eda2d9740b025d4fb5f3751309144ec3d5 Mon Sep 17 00:00:00 2001 From: Chiara Zampolli Date: Fri, 6 Dec 2024 13:42:09 +0100 Subject: [PATCH 02/19] Change in case we enable the sec vtx in online (#13770) In online, the strangeness tracking would be off via export ARGS_EXTRA_PROCESS_o2_secondary_vertexing_workflow='--disable-cascade-finder --disable-3body-finder --disable-strangeness-tracker but then the ASYNC part of the FST would fail. --- prodtests/full_system_test.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/prodtests/full_system_test.sh b/prodtests/full_system_test.sh index 8d6a0ca3cf1f9..bc15c9b119f23 100755 --- a/prodtests/full_system_test.sh +++ b/prodtests/full_system_test.sh @@ -241,12 +241,17 @@ for STAGE in $STAGES; do export HOSTMEMSIZE=1000000000 export SYNCMODE=1 export CTFINPUT=0 + # enabling SECVTX + export WORKFLOW_EXTRA_PROCESSING_STEPS+="MATCH_SECVTX" elif [[ "$STAGE" = "ASYNC" ]]; then export CREATECTFDICT=0 export GPUTYPE=CPU export SYNCMODE=0 export HOSTMEMSIZE=$TPCTRACKERSCRATCHMEMORY export CTFINPUT=1 + # the following line is needed in case the SECTVX was enabled in the SYNC; in this case, it'd have the options: + # export ARGS_EXTRA_PROCESS_o2_secondary_vertexing_workflow='--disable-cascade-finder --disable-3body-finder --disable-strangeness-tracker' + unset ARGS_EXTRA_PROCESS_o2_secondary_vertexing_workflow export WORKFLOW_PARAMETERS="${WORKFLOW_PARAMETERS},AOD" else export CREATECTFDICT=$SYNCMODEDOCTFDICT @@ -255,6 +260,8 @@ for STAGE in $STAGES; do export HOSTMEMSIZE=$TPCTRACKERSCRATCHMEMORY export CTFINPUT=0 export WORKFLOW_PARAMETERS="${WORKFLOW_PARAMETERS},CALIB,CTF,EVENT_DISPLAY,${FST_SYNC_EXTRA_WORKFLOW_PARAMETERS}" + # enabling SECVTX + export WORKFLOW_EXTRA_PROCESSING_STEPS+="MATCH_SECVTX" # temporarily enable ZDC reconstruction for calibration validations export WORKFLOW_EXTRA_PROCESSING_STEPS+=",ZDC_RECO" unset JOBUTILS_JOB_SKIPCREATEDONE From 736eb5b7be8a39eb4f1ff9e42673f49c2e5a7f78 Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 6 Dec 2024 16:48:06 +0100 Subject: [PATCH 03/19] Override bad covariance instead of throwing exception --- .../DCAFitter/include/DCAFitter/DCAFitterN.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h index 2e36f7588e8be..8ded804751de1 100644 --- a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h +++ b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h @@ -41,19 +41,20 @@ struct TrackCovI { // (otherwise for quazi-collinear tracks the X will not be constrained) float cyy = trc.getSigmaY2(), czz = trc.getSigmaZ2(), cyz = trc.getSigmaZY(), cxx = cyy * xerrFactor; float detYZ = cyy * czz - cyz * cyz; - if (detYZ > 0.) { - auto detYZI = 1. / detYZ; - sxx = 1. / cxx; - syy = czz * detYZI; - syz = -cyz * detYZI; - szz = cyy * detYZI; - } else { + if (detYZ <= 0.) { #ifndef GPUCA_GPUCODE - throw std::runtime_error("invalid track covariance"); + printf("overriding invalid track covariance from %s\n", trc.asString().c_str()); #else - printf("invalid track covariance\n"); + printf("overriding invalid track covariance cyy:%e czz:%e cyz:%e\n", cyy, czz, cyz); #endif + cyz = o2::gpu::GPUCommonMath::Sqrt(cyy * czz) * (cyz > 0 ? 0.98f : -0.98f); + detYZ = cyy * czz - cyz * cyz; } + auto detYZI = 1. / detYZ; + sxx = 1. / cxx; + syy = czz * detYZI; + syz = -cyz * detYZI; + szz = cyy * detYZI; } }; From ecd04ae60e2048e0e85aaf6e0cae0f154fc08f5c Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 6 Dec 2024 16:48:41 +0100 Subject: [PATCH 04/19] Set the loaded SVParams to DCAFitter of MatchITSTPCQC --- Detectors/GLOQC/src/MatchITSTPCQC.cxx | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Detectors/GLOQC/src/MatchITSTPCQC.cxx b/Detectors/GLOQC/src/MatchITSTPCQC.cxx index f0345175b9a59..6e14f9eb16c8b 100644 --- a/Detectors/GLOQC/src/MatchITSTPCQC.cxx +++ b/Detectors/GLOQC/src/MatchITSTPCQC.cxx @@ -477,6 +477,21 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) if (mTimestamp == -1 && mDoK0QC) { // we have not yet initialized the SVertexer params; let's do it ctx.inputs().get("SVParam"); + const auto& svparam = o2::vertexing::SVertexerParams::Instance(); + mFitterV0.setUseAbsDCA(svparam.useAbsDCA); + mFitterV0.setMaxR(svparam.maxRIni); + mFitterV0.setMinParamChange(svparam.minParamChange); + mFitterV0.setMinRelChi2Change(svparam.minRelChi2Change); + mFitterV0.setMaxDZIni(svparam.maxDZIni); + mFitterV0.setMaxDXYIni(svparam.maxDXYIni); + mFitterV0.setMaxChi2(svparam.maxChi2); + mFitterV0.setMatCorrType(o2::base::Propagator::MatCorrType(svparam.matCorr)); + mFitterV0.setUsePropagator(svparam.usePropagator); + mFitterV0.setRefitWithMatCorr(svparam.refitWithMatCorr); + mFitterV0.setMaxStep(svparam.maxStep); + mFitterV0.setMaxSnp(svparam.maxSnp); + mFitterV0.setMinXSeed(svparam.minXSeed); + mTimestamp = ctx.services().get().creation; auto grplhcif = o2::base::GRPGeomHelper::instance().getGRPLHCIF(); if (grplhcif->getBeamZ(0) != 1 || grplhcif->getBeamZ(1) != 1) { @@ -962,6 +977,7 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) if (mDoK0QC && mRecoCont.getPrimaryVertices().size() > 0) { // now doing K0S + mFitterV0.setBz(mBz); const auto pvertices = mRecoCont.getPrimaryVertices(); LOG(info) << "****** Number of PVs = " << pvertices.size(); From 8157b0765edc8b9faf38a934c2edc69450b95876 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sun, 8 Dec 2024 13:26:19 +0100 Subject: [PATCH 05/19] AOD: Missing default init --- .../AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h b/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h index eaaf2d9eaedd9..2ab7c531be7a8 100644 --- a/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h +++ b/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h @@ -251,7 +251,7 @@ class AODProducerWorkflowDPL : public Task std::unordered_set mGIDUsedBySVtx; std::unordered_set mGIDUsedByStr; - AODProducerStreamerMask mStreamerMask; + AODProducerStreamerMask mStreamerMask{0}; std::shared_ptr mStreamer; int mNThreads = 1; From f8c8cd5f2e1e34dc89f52df4308f8fac2cc49180 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:11:05 +0100 Subject: [PATCH 06/19] Do not compile tests which need GLFW if it's not found (#13774) --- Detectors/MUON/MCH/Geometry/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/MUON/MCH/Geometry/CMakeLists.txt b/Detectors/MUON/MCH/Geometry/CMakeLists.txt index 5e367fa87bbf4..000364af028c6 100644 --- a/Detectors/MUON/MCH/Geometry/CMakeLists.txt +++ b/Detectors/MUON/MCH/Geometry/CMakeLists.txt @@ -12,7 +12,7 @@ add_subdirectory(MisAligner) add_subdirectory(Transformer) add_subdirectory(Creator) -if (BUILD_TESTING) +if (BUILD_TESTING AND GLFW_FOUND) add_subdirectory(Test) endif() From 5c52a4b9a19d91dde2cd82f70fecc3256b461f74 Mon Sep 17 00:00:00 2001 From: Hadi Hassan Date: Mon, 9 Dec 2024 14:51:20 +0200 Subject: [PATCH 07/19] Fixing the FOCAL-E pad geometry (#13764) --- Detectors/FOCAL/simulation/data/simcuts.dat | 10 +++--- .../geometryFiles/geometry_Spaghetti.txt | 2 +- Detectors/FOCAL/simulation/src/Detector.cxx | 36 ++++++++++--------- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/Detectors/FOCAL/simulation/data/simcuts.dat b/Detectors/FOCAL/simulation/data/simcuts.dat index 1ef7260bc34be..744f67c3c81f4 100644 --- a/Detectors/FOCAL/simulation/data/simcuts.dat +++ b/Detectors/FOCAL/simulation/data/simcuts.dat @@ -6,11 +6,11 @@ FOC 0 5.e-5 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 -1. -1 -1 -1 -1 1 -1 3 -1 -1 -1 -1 -1 * Si sensor FOC 1 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 -1. -1 -1 -1 -1 1 -1 1 -1 -1 -1 -1 -1 +* Si pixel +FOC 2 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 1.e-5 -1. -1 -1 -1 -1 1 -1 1 -1 -1 -1 -1 -1 * G10 plate -FOC 2 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 -1. -1 -1 -1 -1 1 -1 3 -1 -1 -1 -1 -1 +FOC 3 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 -1. -1 -1 -1 -1 1 -1 3 -1 -1 -1 -1 -1 * Alloy -FOC 5 5.e-5 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 -1. -1 -1 -1 -1 1 -1 3 -1 -1 -1 -1 -1 +FOC 6 5.e-5 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 -1. -1 -1 -1 -1 1 -1 3 -1 -1 -1 -1 -1 * Aluminium -FOC 10 5.e-5 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 -1. -1 -1 -1 -1 1 -1 3 -1 -1 -1 -1 -1 -* G10 plate -FOC 12 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 -1. -1 -1 -1 -1 1 -1 3 -1 -1 -1 -1 -1 +FOC 11 5.e-5 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 1.e-4 -1. -1 -1 -1 -1 1 -1 3 -1 -1 -1 -1 -1 diff --git a/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt b/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt index ff24f0bfec34a..5209204cc5eca 100644 --- a/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt +++ b/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt @@ -58,7 +58,7 @@ COMMAND_INSERT_PIX_AT_L9 GLOBAL_TOWER_TOL 0. Air GLOBAL_TOWER_TOLX 0.02 Air GLOBAL_TOWER_TOLY 0.8 Al - GLOBAL_FOCAL_Z 764.47 + GLOBAL_FOCAL_Z 763.5 GLOBAL_Tower_NX 2 GLOBAL_Tower_NY 11 GLOBAL_MIDDLE_TOWER_OFFSET 5 diff --git a/Detectors/FOCAL/simulation/src/Detector.cxx b/Detectors/FOCAL/simulation/src/Detector.cxx index 58db515b2e719..08df253d49f83 100644 --- a/Detectors/FOCAL/simulation/src/Detector.cxx +++ b/Detectors/FOCAL/simulation/src/Detector.cxx @@ -406,7 +406,7 @@ void Detector::addAlignableVolumes() const //____________________________________________________________________________ void Detector::addAlignableVolumesHCAL() const { - const std::string vpsector = "/cave_1/caveRB24_1/FOCAL_1/HCAL_1"; + const std::string vpsector = "/cave_1/barrel_1/FOCAL_1/HCAL_1"; const std::string snsector = "FOCAL/HCAL"; if (!gGeoManager->SetAlignableEntry(snsector.c_str(), vpsector.c_str())) { @@ -417,7 +417,7 @@ void Detector::addAlignableVolumesHCAL() const //____________________________________________________________________________ void Detector::addAlignableVolumesECAL() const { - const std::string vpsector = "/cave_1/caveRB24_1/FOCAL_1/ECAL_1"; + const std::string vpsector = "/cave_1/barrel_1/FOCAL_1/ECAL_1"; const std::string snsector = "FOCAL/ECAL"; if (!gGeoManager->SetAlignableEntry(snsector.c_str(), vpsector.c_str())) { @@ -485,8 +485,8 @@ void Detector::ConstructGeometry() } else { CreateHCALSpaghetti(); } - const float z0 = 1312.5; // center of caveRB24 mother volume - TVirtualMC::GetMC()->Gspos("FOCAL", 1, "caveRB24", 0, 0, mGeometry->getFOCALZ0() - (mGeometry->getInsertFrontPadLayers() ? 2.0 : 0.0) + (mGeometry->getInsertHCalReadoutMaterial() ? 1.5 : 0.0) - z0, 0, "ONLY"); + // const float z0 = 1312.5; // center of barrel mother volume + TVirtualMC::GetMC()->Gspos("FOCAL", 1, "barrel", 0, 30., mGeometry->getFOCALZ0() - (mGeometry->getInsertFrontPadLayers() ? 2.0 : 0.0) + (mGeometry->getInsertHCalReadoutMaterial() ? 1.5 : 0.0), 0, "ONLY"); } void Detector::CreateHCALSpaghetti() @@ -795,8 +795,8 @@ void Detector::CreateECALGeometry() // gMC->Gsvolu("EMSC1", "BOX", idtmed[3698], pars, 4);//Left towers (pixels shifted right) // gMC->Gsvolu("EMSC2", "BOX", idtmed[3698], pars, 4);//Right towers (pixels shifted left) - TVirtualMC::GetMC()->Gsvolu("EMSC1", "BOX", ID_AIR, pars, 4); // Left towers (pixels shifted right) - TVirtualMC::GetMC()->Gsvolu("EMSC2", "BOX", ID_AIR, pars, 4); // Right towers (pixels shifted left) + TVirtualMC::GetMC()->Gsvolu("EMSC1", "BOX", getMediumID(ID_AIR), pars, 4); // Left towers (pixels shifted right) + TVirtualMC::GetMC()->Gsvolu("EMSC2", "BOX", getMediumID(ID_AIR), pars, 4); // Right towers (pixels shifted left) // mSensitiveECALPad.push_back("EMSC1"); // mSensitiveECALPad.push_back("EMSC2"); mSensitive.push_back("EMSC1"); @@ -816,7 +816,7 @@ void Detector::CreateECALGeometry() if (icomp->material() == "PureW") { // TVirtualMC::GetMC()->Gsvolu("EW1", "BOX", idtmed[3599], pars, 4); - TVirtualMC::GetMC()->Gsvolu("EW1", "BOX", ID_TUNGSTEN, pars, 4); + TVirtualMC::GetMC()->Gsvolu("EW1", "BOX", getMediumID(ID_TUNGSTEN), pars, 4); // mSensitiveECALPad.push_back("EW1"); mSensitive.push_back("EW1"); gGeoManager->GetVolume("EW1")->SetLineColor(kBlue); @@ -827,7 +827,7 @@ void Detector::CreateECALGeometry() } if (icomp->material() == "Alloy") { // TVirtualMC::GetMC()->Gsvolu("EW1", "BOX", idtmed[3604], pars, 4); - TVirtualMC::GetMC()->Gsvolu("EW1", "BOX", ID_ALLOY, pars, 4); + TVirtualMC::GetMC()->Gsvolu("EW1", "BOX", getMediumID(ID_ALLOY), pars, 4); // mSensitiveECALPad.push_back("EW1"); mSensitive.push_back("EW1"); TVirtualMC::GetMC()->Gspos("EW1", icomp->id() + 1, "EMSC1", @@ -838,7 +838,7 @@ void Detector::CreateECALGeometry() if (icomp->material() == "G10") { // TVirtualMC::GetMC()->Gsvolu("G10RO1", "BOX", idtmed[3601], pars, 4); - TVirtualMC::GetMC()->Gsvolu("G10RO1", "BOX", ID_G10, pars, 4); + TVirtualMC::GetMC()->Gsvolu("G10RO1", "BOX", getMediumID(ID_G10), pars, 4); // mSensitiveECALPad.push_back("G10RO1"); mSensitive.push_back("G10RO1"); gGeoManager->GetVolume("G10RO1")->SetLineColor(kGreen); @@ -850,7 +850,7 @@ void Detector::CreateECALGeometry() if (icomp->material() == "Cu") { // TVirtualMC::GetMC()->Gsvolu("EWCU", "BOX", idtmed[3602], pars, 4); - TVirtualMC::GetMC()->Gsvolu("EWCU", "BOX", ID_COPPER, pars, 4); + TVirtualMC::GetMC()->Gsvolu("EWCU", "BOX", getMediumID(ID_COPPER), pars, 4); // mSensitiveECALPad.push_back("EWCU"); mSensitive.push_back("EWCU"); gGeoManager->GetVolume("EWCU")->SetLineColor(kViolet); @@ -862,7 +862,7 @@ void Detector::CreateECALGeometry() if (icomp->material() == "Air") { // TVirtualMC::GetMC()->Gsvolu("EWAIR1", "BOX", idtmed[3698], pars, 4); - TVirtualMC::GetMC()->Gsvolu("EWAIR1", "BOX", ID_AIR, pars, 4); + TVirtualMC::GetMC()->Gsvolu("EWAIR1", "BOX", getMediumID(ID_AIR), pars, 4); // mSensitiveECALPad.push_back("EWAIR1"); mSensitive.push_back("EWAIR1"); gGeoManager->GetVolume("EWAIR1")->SetLineColor(kGray); @@ -874,7 +874,7 @@ void Detector::CreateECALGeometry() if (icomp->material() == "Ceramic") { // TVirtualMC::GetMC()->Gsvolu("EWAIR1", "BOX", idtmed[3607], pars, 4); - TVirtualMC::GetMC()->Gsvolu("EWAIR1", "BOX", ID_CERAMIC, pars, 4); + TVirtualMC::GetMC()->Gsvolu("EWAIR1", "BOX", getMediumID(ID_CERAMIC), pars, 4); // mSensitiveECALPad.push_back("EWAIR1"); mSensitive.push_back("EWAIR1"); TVirtualMC::GetMC()->Gspos("EWAIR1", icomp->id() + 1, "EMSC1", @@ -885,7 +885,7 @@ void Detector::CreateECALGeometry() if (icomp->material() == "SiPad") { // TVirtualMC::GetMC()->Gsvolu("EWSIPAD1", "BOX", idtmed[3600], pars, 4); - TVirtualMC::GetMC()->Gsvolu("EWSIPAD1", "BOX", ID_SIPAD, pars, 4); + TVirtualMC::GetMC()->Gsvolu("EWSIPAD1", "BOX", getMediumID(ID_SIPAD), pars, 4); // mSensitiveECALPad.push_back("EWSIPAD1"); mSensitive.push_back("EWSIPAD1"); gGeoManager->GetVolume("EWSIPAD1")->SetLineColor(kOrange - 7); @@ -900,7 +900,7 @@ void Detector::CreateECALGeometry() // Pixels (sensitive layer) if (icomp->material() == "SiPix") { // TVirtualMC::GetMC()->Gsvolu("EWSIPIX1", "BOX", idtmed[3600], pars, 4); - TVirtualMC::GetMC()->Gsvolu("EWSIPIX1", "BOX", ID_SIPIX, pars, 4); + TVirtualMC::GetMC()->Gsvolu("EWSIPIX1", "BOX", getMediumID(ID_SIPIX), pars, 4); // mSensitiveECALPix.push_back("EWSIPIX1"); mSensitive.push_back("EWSIPIX1"); gGeoManager->GetVolume("EWSIPIX1")->SetLineColor(kPink); @@ -915,7 +915,7 @@ void Detector::CreateECALGeometry() // Passive silicon if (icomp->material() == "Si") { // TVirtualMC::GetMC()->Gsvolu("EWSI1", "BOX", idtmed[3610], pars, 4); - TVirtualMC::GetMC()->Gsvolu("EWSI1", "BOX", ID_SIINSENS, pars, 4); + TVirtualMC::GetMC()->Gsvolu("EWSI1", "BOX", getMediumID(ID_SIINSENS), pars, 4); // mSensitiveECALPix.push_back("EWSI1"); mSensitive.push_back("EWSI1"); gGeoManager->GetVolume("EWSI1")->SetLineColor(kPink); @@ -954,7 +954,7 @@ void Detector::CreateECALGeometry() fcal_pars[3] = 0.; // TVirtualMC::GetMC()->Gsvolu("ECAL", "BOX", idtmed[3698], fcal_pars, 4); - TVirtualMC::GetMC()->Gsvolu("ECAL", "BOX", ID_AIR, fcal_pars, 4); + TVirtualMC::GetMC()->Gsvolu("ECAL", "BOX", getMediumID(ID_AIR), fcal_pars, 4); // mSensitiveECALPad.push_back("ECAL"); mSensitive.push_back("ECAL"); @@ -964,7 +964,9 @@ void Detector::CreateECALGeometry() TGeoVolume* volumeSiPad = new TGeoVolume("volSiPad", siPadBox, gGeoManager->GetMedium(getMediumID(ID_SIPAD))); volumeSiPad->SetLineColor(kOrange + 7); // mSensitiveECALPad.push_back(volumeSiPad->GetName()); - mSensitive.push_back(volumeSiPad->GetName()); + if (geom->getInsertFrontPadLayers()) { + mSensitive.push_back(volumeSiPad->GetName()); + } double xp, yp, zp; int itowerx, itowery; From 24e05f9d9f82045330f8a64c941b089c18441797 Mon Sep 17 00:00:00 2001 From: Piotr Konopka Date: Mon, 9 Dec 2024 23:04:34 +0100 Subject: [PATCH 08/19] QC-1253 Mergers: Shorter latency with multiple layers (#13782) If we run multiple layers of Mergers, the merged object arrival time can be described as: merger cycle duration * number of layers (it can be shorter due to randomized timer shifts at startup). As a consequence, adding each new layer adds the latency to the merger topology. Assuming that the deployed Mergers are not expendable, we can rely on expecting the right number of input messages to know that each Merger in the lower layer produced an update, so we can publish the merged object. As an effect, we get lower latency. --- .../Mergers/include/Mergers/FullHistoryMerger.h | 1 + .../Mergers/include/Mergers/IntegratingMerger.h | 1 + .../Mergers/include/Mergers/MergerConfig.h | 8 +++++--- Utilities/Mergers/src/FullHistoryMerger.cxx | 17 ++++++++++++++++- Utilities/Mergers/src/IntegratingMerger.cxx | 13 ++++++++++++- .../Mergers/src/MergerInfrastructureBuilder.cxx | 15 +++++++++++---- .../Mergers/test/mergersBenchmarkTopology.cxx | 6 +++--- 7 files changed, 49 insertions(+), 12 deletions(-) diff --git a/Utilities/Mergers/include/Mergers/FullHistoryMerger.h b/Utilities/Mergers/include/Mergers/FullHistoryMerger.h index a62f5acf685fc..03e62d040a8b2 100644 --- a/Utilities/Mergers/include/Mergers/FullHistoryMerger.h +++ b/Utilities/Mergers/include/Mergers/FullHistoryMerger.h @@ -71,6 +71,7 @@ class FullHistoryMerger : public framework::Task void mergeCache(); void publish(framework::DataAllocator& allocator); void clear(); + bool shouldFinishCycle(const framework::InputRecord& inputs) const; }; } // namespace o2::mergers diff --git a/Utilities/Mergers/include/Mergers/IntegratingMerger.h b/Utilities/Mergers/include/Mergers/IntegratingMerger.h index 8a6cecc437d1e..7a8da4a3f0d29 100644 --- a/Utilities/Mergers/include/Mergers/IntegratingMerger.h +++ b/Utilities/Mergers/include/Mergers/IntegratingMerger.h @@ -60,6 +60,7 @@ class IntegratingMerger : public framework::Task void publishMovingWindow(framework::DataAllocator& allocator); static void merge(ObjectStore& mMergedDelta, ObjectStore&& other); void clear(); + bool shouldFinishCycle(const framework::InputRecord&) const; private: header::DataHeader::SubSpecificationType mSubSpec; diff --git a/Utilities/Mergers/include/Mergers/MergerConfig.h b/Utilities/Mergers/include/Mergers/MergerConfig.h index 1f626cd4345c5..dcaf2682eaa05 100644 --- a/Utilities/Mergers/include/Mergers/MergerConfig.h +++ b/Utilities/Mergers/include/Mergers/MergerConfig.h @@ -41,7 +41,7 @@ enum class MergedObjectTimespan { // when InputObjectsTimespan::FullHistory is set. LastDifference, // Generalisation of the two above. Resets all objects in Mergers after n cycles (0 - infinite). - // The the above will be removed once we switch to NCycles in QC. + // The above will be removed once we switch to NCycles in QC. NCycles }; @@ -52,7 +52,8 @@ enum class PublishMovingWindow { }; enum class PublicationDecision { - EachNSeconds, // Merged object is published each N seconds. This can evolve over time, thus we expect pairs specifying N:duration1, M:duration2... + EachNSeconds, // Merged object is published each N seconds. This can evolve over time, thus we expect pairs specifying N:duration1, M:duration2... + EachNArrivals, // Merged object is published whenever we receive N new input objects. }; enum class TopologySize { @@ -66,6 +67,7 @@ enum class ParallelismType { RoundRobin // Mergers receive their input messages in round robin order. Useful when there is one InputSpec with a wildcard. }; +// fixme: this way of configuring mergers should be refactored, it does not make sense that we share `param`s across for different enum values. template struct ConfigEntry { V value; @@ -82,7 +84,7 @@ class PublicationDecisionParameter PublicationDecisionParameter(size_t param) : decision({{param, 1}}) {} PublicationDecisionParameter(const std::vector>& decision) : decision(decision) {} - std::vector> decision; + std::vector> decision; }; // todo rework configuration in a way that user cannot create an invalid configuration diff --git a/Utilities/Mergers/src/FullHistoryMerger.cxx b/Utilities/Mergers/src/FullHistoryMerger.cxx index 079f557ec83d5..1ae566d7953db 100644 --- a/Utilities/Mergers/src/FullHistoryMerger.cxx +++ b/Utilities/Mergers/src/FullHistoryMerger.cxx @@ -76,7 +76,7 @@ void FullHistoryMerger::run(framework::ProcessingContext& ctx) } } - if (ctx.inputs().isValid("timer-publish") && !mFirstObjectSerialized.first.empty()) { + if (shouldFinishCycle(ctx.inputs())) { mCyclesSinceReset++; mergeCache(); publish(ctx.outputs()); @@ -88,6 +88,21 @@ void FullHistoryMerger::run(framework::ProcessingContext& ctx) } } +bool FullHistoryMerger::shouldFinishCycle(const framework::InputRecord& inputs) const +{ + if (mFirstObjectSerialized.first.empty()) { + return false; + } + + if (mConfig.publicationDecision.value == PublicationDecision::EachNSeconds) { + return inputs.isValid("timer-publish"); + } else if (mConfig.publicationDecision.value == PublicationDecision::EachNArrivals) { + return mUpdatesReceived > 0 && mUpdatesReceived % mConfig.publicationDecision.param.decision.begin()->first == 0; + } else { + throw std::runtime_error("unsupported publication decision parameter"); + } +} + void FullHistoryMerger::endOfStream(framework::EndOfStreamContext& eosContext) { mergeCache(); diff --git a/Utilities/Mergers/src/IntegratingMerger.cxx b/Utilities/Mergers/src/IntegratingMerger.cxx index 749becd463a5d..e9cfec1cfe8cc 100644 --- a/Utilities/Mergers/src/IntegratingMerger.cxx +++ b/Utilities/Mergers/src/IntegratingMerger.cxx @@ -68,11 +68,22 @@ void IntegratingMerger::run(framework::ProcessingContext& ctx) } } - if (ctx.inputs().isValid("timer-publish")) { + if (shouldFinishCycle(ctx.inputs())) { finishCycle(ctx.outputs()); } } +bool IntegratingMerger::shouldFinishCycle(const framework::InputRecord& inputs) const +{ + if (mConfig.publicationDecision.value == PublicationDecision::EachNSeconds) { + return inputs.isValid("timer-publish"); + } else if (mConfig.publicationDecision.value == PublicationDecision::EachNArrivals) { + return mDeltasMerged > 0 && mDeltasMerged % mConfig.publicationDecision.param.decision.begin()->first == 0; + } else { + throw std::runtime_error("unsupported publication decision parameter"); + } +} + void IntegratingMerger::finishCycle(DataAllocator& outputs) { mCyclesSinceReset++; diff --git a/Utilities/Mergers/src/MergerInfrastructureBuilder.cxx b/Utilities/Mergers/src/MergerInfrastructureBuilder.cxx index 233631f0fe80b..9fcb6aaa482dd 100644 --- a/Utilities/Mergers/src/MergerInfrastructureBuilder.cxx +++ b/Utilities/Mergers/src/MergerInfrastructureBuilder.cxx @@ -128,7 +128,8 @@ framework::WorkflowSpec MergerInfrastructureBuilder::generateInfrastructure() auto layerInputs = mInputs; // preparing some numbers - auto mergersPerLayer = computeNumberOfMergersPerLayer(layerInputs.size()); + const auto mergersPerLayer = computeNumberOfMergersPerLayer(layerInputs.size()); + const bool expendable = std::ranges::any_of(mConfig.labels, [](const auto& label) { return label.value == "expendable"; }); // topology generation MergerBuilder mergerBuilder; @@ -150,7 +151,6 @@ framework::WorkflowSpec MergerInfrastructureBuilder::generateInfrastructure() // we also expect moving windows to be published only by the last layer layerConfig.publishMovingWindow = {PublishMovingWindow::No}; } - mergerBuilder.setConfig(layerConfig); framework::Inputs nextLayerInputs; auto inputsRangeBegin = layerInputs.begin(); @@ -162,13 +162,19 @@ framework::WorkflowSpec MergerInfrastructureBuilder::generateInfrastructure() auto inputsRangeEnd = inputsRangeBegin + inputsPerMerger + (m < inputsPerMergerRemainder); mergerBuilder.setInputSpecs(framework::Inputs(inputsRangeBegin, inputsRangeEnd)); - inputsRangeBegin = inputsRangeEnd; + if (layer > 1 && !expendable) { + // we optimize the latency of higher Merger layers by publishing an object as soon as we get the expected number of inputs. + // we can do that safely only if tasks are not expendable, i.e. we are guaranteed that workflow stops if a Merger crashes. + const auto inputNumber = std::distance(inputsRangeBegin, inputsRangeEnd); + assert(inputNumber != 0); + layerConfig.publicationDecision = {PublicationDecision::EachNArrivals, inputNumber}; + } if (layer == mergersPerLayer.size() - 1) { // the last layer => use the specified external OutputSpec mergerBuilder.setOutputSpec(mOutputSpecIntegral); } - + mergerBuilder.setConfig(layerConfig); auto merger = mergerBuilder.buildSpec(); auto input = DataSpecUtils::matchingInput(merger.outputs.at(0)); @@ -176,6 +182,7 @@ framework::WorkflowSpec MergerInfrastructureBuilder::generateInfrastructure() nextLayerInputs.push_back(input); workflow.emplace_back(std::move(merger)); + inputsRangeBegin = inputsRangeEnd; } layerInputs = nextLayerInputs; // todo: could be optimised with pointers } diff --git a/Utilities/Mergers/test/mergersBenchmarkTopology.cxx b/Utilities/Mergers/test/mergersBenchmarkTopology.cxx index 4cf8e84a37e63..beb9a572b6685 100644 --- a/Utilities/Mergers/test/mergersBenchmarkTopology.cxx +++ b/Utilities/Mergers/test/mergersBenchmarkTopology.cxx @@ -71,14 +71,14 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) for (size_t p = 0; p < objectsProducers; p++) { mergersInputs.push_back({ "mo", "TST", "HISTO", static_cast(p + 1), - Lifetime::Timeframe }); + Lifetime::Sporadic }); DataProcessorSpec producer{ "producer-histo" + std::to_string(p), Inputs{}, Outputs{ { { "mo" }, "TST", "HISTO", static_cast(p + 1), - Lifetime::Timeframe } }, + Lifetime::Sporadic } }, AlgorithmSpec{ (AlgorithmSpec::ProcessCallback)[ p, periodus = int(1000000 / objectsRate), objectsBins, objectsProducers ]( ProcessingContext& processingContext) mutable { static auto lastTime = steady_clock::now(); @@ -115,7 +115,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) DataProcessorSpec printer{ "printer-bins", Inputs{ - { "histo", "TST", "HISTO", 0 } + { "histo", "TST", "HISTO", 0, Lifetime::Sporadic } }, Outputs{}, AlgorithmSpec{ From f4f8f438c5024c4f7da2be5ec5f177f71c94ae30 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Tue, 10 Dec 2024 09:15:40 +0100 Subject: [PATCH 09/19] Improve logging in case of OOM (#13786) --- .../ITS/tracking/include/ITStracking/TimeFrame.h | 1 + Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx | 14 ++++++++++++++ Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 15 +++++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index fa4f33782d16a..0237f4ce9579b 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -259,6 +259,7 @@ class TimeFrame void printCellLUTonLayer(int i); void printTrackletLUTs(); void printCellLUTs(); + void printROFInfo(const int rofId); IndexTableUtils mIndexTableUtils; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx index f614de9b9f26a..40a540015d75f 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx @@ -608,5 +608,19 @@ void TimeFrame::printNClsPerROF() std::cout << std::endl; } } + +void TimeFrame::printROFInfo(const int rofId) +{ + std::cout << "ROF " << rofId << " dump:" << std::endl; + for (int iLayer{0}; iLayer < mClusters.size(); ++iLayer) { + std::cout << "Layer " << iLayer << " has: " << getClustersOnLayer(rofId, iLayer).size() << " clusters." << std::endl; + } + std::cout << "Number of seeding vertices: " << getPrimaryVertices(rofId).size() << std::endl; + int iVertex{0}; + for (auto& v : getPrimaryVertices(rofId)) { + std::cout << "\t vertex " << iVertex++ << ": x=" << v.getX() << " " << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors." << std::endl; + } +} + } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 721452bf0361d..7b06f7c7bcc76 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -35,6 +35,7 @@ namespace o2 { namespace its { +using o2::its::constants::GB; Tracker::Tracker(o2::its::TrackerTraits* traits) { @@ -74,12 +75,15 @@ void Tracker::clustersToTracks(std::function logger, std::f &Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); nTracklets += mTraits->getTFNumberOfTracklets(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during trackleting in iteration {}, check the detector status and/or the selections.", iteration)); + mTimeFrame->printROFInfo(iROFs); + error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); break; } float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, mTrkParams[iteration].TrackletsPerClusterLimit)); + error(fmt::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", + trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit)); break; } @@ -87,12 +91,15 @@ void Tracker::clustersToTracks(std::function logger, std::f &Tracker::computeCells, "Cell finding", [](std::string) {}, iteration); nCells += mTraits->getTFNumberOfCells(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during cell finding in iteration {}, check the detector status and/or the selections.", iteration)); + mTimeFrame->printROFInfo(iROFs); + error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); break; } float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, mTrkParams[iteration].CellsPerClusterLimit)); + error(fmt::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", + cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit)); break; } From d06c2cfad74dcdb03db3ffe50490f0350cbf40ae Mon Sep 17 00:00:00 2001 From: Marvin Hemmer <53471402+mhemmer-cern@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:56:48 +0100 Subject: [PATCH 10/19] [Run2,Converter] Add V0 on the fly table (#13765) - Add the newly added on the fly V0 table that is produced in the Run2 to Run3 converter task in AliPhysics. - The columns for the Run2OTFV0s no longer have the C in their name --- .../include/Framework/AnalysisDataModel.h | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Framework/Core/include/Framework/AnalysisDataModel.h b/Framework/Core/include/Framework/AnalysisDataModel.h index 8aba0e3a87ed7..c050977833937 100644 --- a/Framework/Core/include/Framework/AnalysisDataModel.h +++ b/Framework/Core/include/Framework/AnalysisDataModel.h @@ -1672,6 +1672,26 @@ DECLARE_SOA_COLUMN(SPDFiredFastOrL0, spdFiredFastOrL0, uint16_t); //! Fired DECLARE_SOA_COLUMN(SPDFiredFastOrL1, spdFiredFastOrL1, uint16_t); //! Fired FASTOR signals in the first layer of the SPD (online) DECLARE_SOA_COLUMN(V0TriggerChargeA, v0TriggerChargeA, uint16_t); //! V0A trigger charge DECLARE_SOA_COLUMN(V0TriggerChargeC, v0TriggerChargeC, uint16_t); //! V0C trigger charge +namespace oftv0 +{ +DECLARE_SOA_INDEX_COLUMN(Collision, collision); //! Collision index +DECLARE_SOA_INDEX_COLUMN_FULL(PosTrack, posTrack, int, Tracks, "_Pos"); //! Positive track +DECLARE_SOA_INDEX_COLUMN_FULL(NegTrack, negTrack, int, Tracks, "_Neg"); //! Negative track +DECLARE_SOA_COLUMN(Px, px, float); //! momentum in x +DECLARE_SOA_COLUMN(Py, py, float); //! momentum in y +DECLARE_SOA_COLUMN(Pz, pz, float); //! momentum in z +DECLARE_SOA_COLUMN(E, e, float); //! energy +DECLARE_SOA_COLUMN(Qt, qt, float); //! Qt of AP +DECLARE_SOA_COLUMN(Alpha, alpha, float); //! Alpha of AP +DECLARE_SOA_COLUMN(X, x, float); //! conversion point x coordiante +DECLARE_SOA_COLUMN(Y, y, float); //! conversion point y coordiante +DECLARE_SOA_COLUMN(Z, z, float); //! conversion point z coordiante +DECLARE_SOA_COLUMN(Chi2NDF, chi2NDF, float); //! chi^2 over NDF from KFParticle +DECLARE_SOA_COLUMN(PsiPair, psiPair, float); //! Psi pair +DECLARE_SOA_COLUMN(DCAr, dcaR, float); //! DCA in radial direction +DECLARE_SOA_COLUMN(DCAz, dcaZ, float); //! DCA in z direction +DECLARE_SOA_COLUMN(Mass, mass, float); //! mass of the conversion. Do NOT use for cut! +} // namespace oftv0 } // namespace run2 DECLARE_SOA_TABLE(Run2BCInfos, "AOD", "RUN2BCINFO", run2::EventCuts, //! Legacy information for Run 2 event selection @@ -1682,6 +1702,18 @@ DECLARE_SOA_TABLE(Run2BCInfos, "AOD", "RUN2BCINFO", run2::EventCuts, //! Legacy run2::V0TriggerChargeA, run2::V0TriggerChargeC); using Run2BCInfo = Run2BCInfos::iterator; +DECLARE_SOA_TABLE(Run2OTFV0s, "AOD", "Run2OTFV0", //! Run 2 V0 on the fly table + o2::soa::Index<>, + run2::oftv0::CollisionId, run2::oftv0::PosTrackId, run2::oftv0::NegTrackId, + run2::oftv0::Px, run2::oftv0::Py, run2::oftv0::Pz, run2::oftv0::E, + run2::oftv0::Qt, run2::oftv0::Alpha, + run2::oftv0::X, run2::oftv0::Y, run2::oftv0::Z, + run2::oftv0::Chi2NDF, run2::oftv0::PsiPair, + run2::oftv0::DCAr, run2::oftv0::DCAz, + run2::oftv0::Mass); + +using Run2OTFV0 = Run2OTFV0s::iterator; + // ---- MC tables ---- namespace mccollision { From c5cbdc49b86416c5689a7413f0308d282efbd3b7 Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 10 Dec 2024 16:06:37 +0100 Subject: [PATCH 11/19] DCAFitterN: log-throttling for err.messages + user policy for bad CovMat Due to the linearization errors the covariance matrix of the track propagated to some point may become non-positive defined. In this case an error will be logged (logarithmically throttled), the relevant correlation coefficient of the cov.matrix is redefined to cure the position part of the cov.matrix and further program flow depends on the user settings for DCAFitterN::setBadCovPolicy(v): DCAFitterN::setBadCovPolicy(DCAFitterN::Discard) : abandon fit (default) DCAFitterN::setBadCovPolicy(DCAFitterN::Override) : continue fit with overridden cov.matrix DCAFitterN::setBadCovPolicy(DCAFitterN::OverrideAnFlag): continue fit with overridden cov.matrix but set the propagation failure flag (can be checked using the isPropagationFailure(int cand = 0) method). --- Common/DCAFitter/README.md | 19 ++- .../DCAFitter/include/DCAFitter/DCAFitterN.h | 117 +++++++++++++++--- 2 files changed, 114 insertions(+), 22 deletions(-) diff --git a/Common/DCAFitter/README.md b/Common/DCAFitter/README.md index bbb079d144748..1699ffb4f8aca 100644 --- a/Common/DCAFitter/README.md +++ b/Common/DCAFitter/README.md @@ -2,7 +2,7 @@ \page refDetectorsVertexing DCAFitter /doxy --> -## DCAFitterN +# DCAFitterN Templated class to fit the Point of Closest Approach (PCA) of secondary vertex with N prongs. Allows minimization of either absolute or weighted Distances of Closest Approach (DCA) of N tracks to their common PCA. @@ -74,7 +74,22 @@ Extra method `setWeightedFinalPCA(bool)` is provided for the "mixed" mode: if `s but the final V0 position will be calculated using weighted average. One can also recalculate the V0 position by the weighted average method by calling explicitly `ft.recalculatePCAWithErrors(int icand=0)`, w/o prior call of `setWeightedFinalPCA(true)`: this will update the position returned by the `getPCACandidate(int cand = 0)`. -The covariance matrix of the V0 position is calculated as an inversed sum of tracks inversed covariances at respective `X_dca` points. +The covariance matrix of the V0 position is calculated as an inverted sum of tracks inversed covariances at respective `X_dca` points. See ``O2/Common/DCAFitter/test/testDCAFitterN.cxx`` for more extended example. Currently only 2 and 3 prongs permitted, thought this can be changed by modifying ``DCAFitterN::NMax`` constant. + +## Error handling + +It may happen that the track propagation to the the proximity of the PCA fails at the various stage of the fit. In this case the fit is abandoned and the failure flag is set, it can be checked using +isPropagationFailure(int cand = 0)` method. + +Also, due to the linearization errors the covariance matrix of the track propagated to some point may become non-positive defined. +In this case the relevant correlation coefficient of the cov.matrix is redefined to cure the position part of the cov.matrix and further program flow depends on the user settings for `DCAFitterN::setBadCovPolicy(v)`: + +`DCAFitterN::setBadCovPolicy(DCAFitterN::Discard);` : abandon fit (default) + +`DCAFitterN::setBadCovPolicy(DCAFitterN::Override);` : continue fit with overridden cov.matrix + +`DCAFitterN::setBadCovPolicy(DCAFitterN::OverrideAnFlag);` continue fit with overridden cov.matrix but set the propagation failure flag (can be checked using the same `isPropagationFailure(int cand = 0)` method). + diff --git a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h index 8ded804751de1..6bd143eae44d6 100644 --- a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h +++ b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h @@ -26,35 +26,32 @@ namespace o2 { namespace vertexing { + ///__________________________________________________________________________________ ///< Inverse cov matrix (augmented by a dummy X error) of the point defined by the track struct TrackCovI { float sxx, syy, syz, szz; - GPUd() TrackCovI(const o2::track::TrackParCov& trc, float xerrFactor = 1.) { set(trc, xerrFactor); } - GPUdDefault() TrackCovI() = default; - GPUd() void set(const o2::track::TrackParCov& trc, float xerrFactor = 1) + GPUd() bool set(const o2::track::TrackParCov& trc, float xerrFactor = 1.f) { // we assign Y error to X for DCA calculation // (otherwise for quazi-collinear tracks the X will not be constrained) float cyy = trc.getSigmaY2(), czz = trc.getSigmaZ2(), cyz = trc.getSigmaZY(), cxx = cyy * xerrFactor; float detYZ = cyy * czz - cyz * cyz; + bool res = true; if (detYZ <= 0.) { -#ifndef GPUCA_GPUCODE - printf("overriding invalid track covariance from %s\n", trc.asString().c_str()); -#else - printf("overriding invalid track covariance cyy:%e czz:%e cyz:%e\n", cyy, czz, cyz); -#endif cyz = o2::gpu::GPUCommonMath::Sqrt(cyy * czz) * (cyz > 0 ? 0.98f : -0.98f); detYZ = cyy * czz - cyz * cyz; + res = false; } auto detYZI = 1. / detYZ; sxx = 1. / cxx; syy = czz * detYZI; syz = -cyz * detYZI; szz = cyy * detYZI; + return res; } }; @@ -74,6 +71,27 @@ struct TrackDeriv { } }; +///__________________________________________________________________________ +///< Log log-throttling helper +struct LogLogThrottler { + size_t evCount{0}; + size_t evCountPrev{0}; + size_t logCount{0}; + + GPUdi() bool needToLog() + { + if (size_t(o2::gpu::GPUCommonMath::Log(++evCount)) + 1 > logCount) { + logCount++; + return true; + } + return false; + } + + GPUdi() size_t getNMuted() const { return evCount - evCountPrev - 1; } + + GPUdi() void clear() { evCount = evCountPrev = logCount = 0; } +}; + template class DCAFitterN { @@ -100,6 +118,12 @@ class DCAFitterN using ArrTrPos = o2::gpu::gpustd::array; // container of Track positions public: + enum BadCovPolicy { // if encountering non-positive defined cov. matrix, the choice is: + Discard = 0, // stop evaluation + Override = 1, // override correlation coef. to have cov.matrix pos.def and continue + OverrideAndFlag = 2 // override correlation coef. to have cov.matrix pos.def, set mPropFailed flag of corresponding candidate to true and continue (up to the user to check the flag) + }; + static constexpr int getNProngs() { return N; } DCAFitterN() = default; @@ -300,6 +324,9 @@ class DCAFitterN pnt[2] = tr.getZ(); } + void setBadCovPolicy(BadCovPolicy v) { mBadCovPolicy = v; } + BadCovPolicy getBadCovPolicy() const { return mBadCovPolicy; } + private: // vectors of 1st derivatives of track local residuals over X parameters o2::gpu::gpustd::array, N> mDResidDx; @@ -325,11 +352,15 @@ class DCAFitterN o2::gpu::gpustd::array mNIters; // number of iterations for each seed o2::gpu::gpustd::array mTrPropDone{}; // Flag that the tracks are fully propagated to PCA o2::gpu::gpustd::array mPropFailed{}; // Flag that some propagation failed for this PCA candidate + LogLogThrottler mLoggerBadCov{}; + LogLogThrottler mLoggerBadInv{}; + LogLogThrottler mLoggerBadProp{}; MatSym3D mWeightInv; // inverse weight of single track, [sum{M^T E M}]^-1 in EQ.T o2::gpu::gpustd::array mOrder{0}; int mCurHyp = 0; int mCrossIDCur = 0; int mCrossIDAlt = -1; + BadCovPolicy mBadCovPolicy{BadCovPolicy::Discard}; // what to do in case of non-pos-def. cov. matrix, see BadCovPolicy enum bool mAllowAltPreference = true; // if the fit converges to alternative PCA seed, abandon the current one bool mUseAbsDCA = false; // use abs. distance minimization rather than chi2 bool mWeightedFinalPCA = false; // recalculate PCA as a cov-matrix weighted mean, even if absDCA method was used @@ -678,7 +709,23 @@ GPUd() bool DCAFitterN::recalculatePCAWithErrors(int cand) mCurHyp = mOrder[cand]; if (mUseAbsDCA) { for (int i = N; i--;) { - mTrcEInv[mCurHyp][i].set(mCandTr[mCurHyp][i], XerrFactor); // prepare inverse cov.matrices at starting point + if (!mTrcEInv[mCurHyp][i].set(mCandTr[mCurHyp][i], XerrFactor)) { // prepare inverse cov.matrices at starting point + if (mLoggerBadCov.needToLog()) { +#ifndef GPUCA_GPUCODE + printf("fitter %d: error (%ld muted): overrode invalid track covariance from %s\n", + mFitterID, mLoggerBadCov.getNMuted(), mCandTr[mCurHyp][i].asString().c_str()); +#else + printf("fitter %d: error (%ld muted): overrode invalid track covariance cyy:%e czz:%e cyz:%e\n", + mFitterID, mLoggerBadCov.getNMuted(), mCandTr[mCurHyp][i].getSigmaY2(), mCandTr[mCurHyp][i].getSigmaZ2(), mCandTr[mCurHyp][i].getSigmaZY()); +#endif + mLoggerBadCov.evCountPrev = mLoggerBadCov.evCount; + } + if (mBadCovPolicy == Discard) { + return false; + } else if (mBadCovPolicy == OverrideAndFlag) { + mPropFailed[mCurHyp] = true; + } // otherwise, just use overridden errors w/o flagging + } } if (!calcPCACoefs()) { mCurHyp = saveCurHyp; @@ -885,7 +932,23 @@ GPUd() bool DCAFitterN::minimizeChi2() return false; } setTrackPos(mTrPos[mCurHyp][i], mCandTr[mCurHyp][i]); // prepare positions - mTrcEInv[mCurHyp][i].set(mCandTr[mCurHyp][i], XerrFactor); // prepare inverse cov.matrices at starting point + if (!mTrcEInv[mCurHyp][i].set(mCandTr[mCurHyp][i], XerrFactor)) { // prepare inverse cov.matrices at starting point + if (mLoggerBadCov.needToLog()) { +#ifndef GPUCA_GPUCODE + printf("fitter %d: error (%ld muted): overrode invalid track covariance from %s\n", + mFitterID, mLoggerBadCov.getNMuted(), mCandTr[mCurHyp][i].asString().c_str()); +#else + printf("fitter %d: error (%ld muted): overrode invalid track covariance cyy:%e czz:%e cyz:%e\n", + mFitterID, mLoggerBadCov.getNMuted(), mCandTr[mCurHyp][i].getSigmaY2(), mCandTr[mCurHyp][i].getSigmaZ2(), mCandTr[mCurHyp][i].getSigmaZY()); +#endif + mLoggerBadCov.evCountPrev = mLoggerBadCov.evCount; + } + if (mBadCovPolicy == Discard) { + return false; + } else if (mBadCovPolicy == OverrideAndFlag) { + mPropFailed[mCurHyp] = true; + } // otherwise, just use overridden errors w/o flagging + } } if (mMaxDZIni > 0 && !roughDZCut()) { // apply rough cut on tracks Z difference @@ -905,11 +968,10 @@ GPUd() bool DCAFitterN::minimizeChi2() // do Newton-Rapson iteration with corrections = - dchi2/d{x0..xN} * [ d^2chi2/d{x0..xN}^2 ]^-1 if (!mD2Chi2Dx2.Invert()) { -#ifndef GPUCA_GPUCODE_DEVICE - LOG(error) << "InversionFailed"; -#else - printf("InversionFailed\n"); -#endif + if (mLoggerBadInv.needToLog()) { + printf("fitter %d: error (%ld muted): Inversion failed\n", mFitterID, mLoggerBadCov.getNMuted()); + mLoggerBadInv.evCountPrev = mLoggerBadInv.evCount; + } return false; } VecND dx = mD2Chi2Dx2 * mDChi2Dx; @@ -962,11 +1024,10 @@ GPUd() bool DCAFitterN::minimizeChi2NoErr() // do Newton-Rapson iteration with corrections = - dchi2/d{x0..xN} * [ d^2chi2/d{x0..xN}^2 ]^-1 if (!mD2Chi2Dx2.Invert()) { -#ifndef GPUCA_GPUCODE_DEVICE - LOG(error) << "InversionFailed"; -#else - printf("InversionFailed\n"); -#endif + if (mLoggerBadInv.needToLog()) { + printf("itter %d: error (%ld muted): Inversion failed\n", mFitterID, mLoggerBadCov.getNMuted()); + mLoggerBadInv.evCountPrev = mLoggerBadInv.evCount; + } return false; } VecND dx = mD2Chi2Dx2 * mDChi2Dx; @@ -1109,6 +1170,14 @@ GPUdi() bool DCAFitterN::propagateParamToX(o2::track::TrackPar& t, f } if (!res) { mPropFailed[mCurHyp] = true; + if (mLoggerBadProp.needToLog()) { +#ifndef GPUCA_GPUCODE + printf("fitter %d: error (%ld muted): propagation failed for %s\n", mFitterID, mLoggerBadProp.getNMuted(), t.asString().c_str()); +#else + printf("fitter %d: error (%ld muted): propagation failed\n", mFitterID, mLoggerBadProp.getNMuted()); +#endif + mLoggerBadProp.evCountPrev = mLoggerBadProp.evCount; + } } return res; } @@ -1127,6 +1196,14 @@ GPUdi() bool DCAFitterN::propagateToX(o2::track::TrackParCov& t, flo } if (!res) { mPropFailed[mCurHyp] = true; + if (mLoggerBadProp.needToLog()) { +#ifndef GPUCA_GPUCODE + printf("fitter %d: error (%ld muted): propagation failed for %s\n", mFitterID, mLoggerBadProp.getNMuted(), t.asString().c_str()); +#else + printf("fitter %d: error (%ld muted): propagation failed\n", mFitterID, mLoggerBadProp.getNMuted()); +#endif + mLoggerBadProp.evCountPrev = mLoggerBadProp.evCount; + } } return res; } From 47d098dedfbf8cd5c8933340c3aefb40a34b53c8 Mon Sep 17 00:00:00 2001 From: Marek Mytkowski <101755133+mytkom@users.noreply.github.com> Date: Wed, 11 Dec 2024 08:56:02 +0100 Subject: [PATCH 12/19] Get column value by column label in Framework Core ASoA (#13498) --- Framework/Core/include/Framework/ASoA.h | 78 +++++++++++++++++++ .../Core/include/Framework/BinningPolicy.h | 1 - Framework/Core/test/benchmark_ASoA.cxx | 60 ++++++++++++++ 3 files changed, 138 insertions(+), 1 deletion(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 84c6e3ae705fb..cfece12767612 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include #include @@ -2172,6 +2174,82 @@ std::tuple getRowData(arrow::Table* table, T rowIterator, { return std::make_tuple(getSingleRowData(table, rowIterator, ci, ai, globalIndex)...); } + +template +R getColumnValue(const T& rowIterator) +{ + return static_cast(static_cast(rowIterator).get()); +} + +template +using ColumnGetterFunction = R (*)(const T&); + +template +concept dynamic_with_common_getter = is_dynamic_column && + // lambda is callable without additional free args + framework::pack_size(typename T::bindings_t{}) == framework::pack_size(typename T::callable_t::args{}) && + requires(T t) { + { t.get() } -> std::convertible_to; + }; + +template +concept persistent_with_common_getter = is_persistent_v && requires(T t) { + { t.get() } -> std::convertible_to; +}; + +template C> +ColumnGetterFunction createGetterPtr(const std::string_view& targetColumnLabel) +{ + return targetColumnLabel == C::columnLabel() ? &getColumnValue : nullptr; +} + +template C> +ColumnGetterFunction createGetterPtr(const std::string_view& targetColumnLabel) +{ + std::string_view columnLabel(C::columnLabel()); + + // allows user to use consistent formatting (with prefix) of all column labels + // by default there isn't 'f' prefix for dynamic column labels + if (targetColumnLabel.starts_with("f") && targetColumnLabel.substr(1) == columnLabel) { + return &getColumnValue; + } + + // check also exact match if user is aware of prefix missing + if (targetColumnLabel == columnLabel) { + return &getColumnValue; + } + + return nullptr; +} + +template +ColumnGetterFunction getColumnGetterByLabel(o2::framework::pack, const std::string_view& targetColumnLabel) +{ + ColumnGetterFunction func; + + (void)((func = createGetterPtr(targetColumnLabel), func) || ...); + + if (!func) { + throw framework::runtime_error_f("Getter for \"%s\" not found", targetColumnLabel); + } + + return func; +} + +template +using with_common_getter_t = typename std::conditional || dynamic_with_common_getter, std::true_type, std::false_type>::type; + +template +ColumnGetterFunction getColumnGetterByLabel(const std::string_view& targetColumnLabel) +{ + using TypesWithCommonGetter = o2::framework::selected_pack_multicondition, typename T::columns_t>; + + if (targetColumnLabel.size() == 0) { + throw framework::runtime_error("columnLabel: must not be empty"); + } + + return getColumnGetterByLabel(TypesWithCommonGetter{}, targetColumnLabel); +} } // namespace row_helpers } // namespace o2::soa diff --git a/Framework/Core/include/Framework/BinningPolicy.h b/Framework/Core/include/Framework/BinningPolicy.h index ea04aa3b5a5b3..b5e9ba546c4d9 100644 --- a/Framework/Core/include/Framework/BinningPolicy.h +++ b/Framework/Core/include/Framework/BinningPolicy.h @@ -12,7 +12,6 @@ #ifndef FRAMEWORK_BINNINGPOLICY_H #define FRAMEWORK_BINNINGPOLICY_H -#include "Framework/ASoA.h" #include "Framework/HistogramSpec.h" // only for VARIABLE_WIDTH #include "Framework/Pack.h" diff --git a/Framework/Core/test/benchmark_ASoA.cxx b/Framework/Core/test/benchmark_ASoA.cxx index 8dfac9e735c0b..4001e2a725a15 100644 --- a/Framework/Core/test/benchmark_ASoA.cxx +++ b/Framework/Core/test/benchmark_ASoA.cxx @@ -29,6 +29,7 @@ DECLARE_SOA_COLUMN_FULL(X, x, float, "x"); DECLARE_SOA_COLUMN_FULL(Y, y, float, "y"); DECLARE_SOA_COLUMN_FULL(Z, z, float, "z"); DECLARE_SOA_DYNAMIC_COLUMN(Sum, sum, [](float x, float y) { return x + y; }); +DECLARE_SOA_DYNAMIC_COLUMN(SumFreeArgs, sumFreeArgs, [](float x, float y, float freeArg) { return x + y + freeArg; }); } // namespace test DECLARE_SOA_TABLE(TestTable, "AOD", "TESTTBL", test::X, test::Y, test::Z, test::Sum); @@ -290,6 +291,36 @@ static void BM_ASoADynamicColumnPresent(benchmark::State& state) BENCHMARK(BM_ASoADynamicColumnPresent)->Range(8, 8 << maxrange); +static void BM_ASoADynamicColumnPresentGetGetterByLabel(benchmark::State& state) +{ + // Seed with a real random value, if available + std::default_random_engine e1(1234567891); + std::uniform_real_distribution uniform_dist(0, 1); + + TableBuilder builder; + auto rowWriter = builder.persist({"x", "y", "z"}); + for (auto i = 0; i < state.range(0); ++i) { + rowWriter(0, uniform_dist(e1), uniform_dist(e1), uniform_dist(e1)); + } + auto table = builder.finalize(); + + using Test = o2::soa::InPlaceTable<"A/0"_h, test::X, test::Y, test::Z, test::Sum>; + + for (auto _ : state) { + Test tests{table}; + float sum = 0; + auto xGetter = o2::soa::row_helpers::getColumnGetterByLabel("x"); + auto yGetter = o2::soa::row_helpers::getColumnGetterByLabel("y"); + for (auto& test : tests) { + sum += xGetter(test) + yGetter(test); + } + benchmark::DoNotOptimize(sum); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float) * 2); +} + +BENCHMARK(BM_ASoADynamicColumnPresentGetGetterByLabel)->Range(8, 8 << maxrange); + static void BM_ASoADynamicColumnCall(benchmark::State& state) { // Seed with a real random value, if available @@ -317,4 +348,33 @@ static void BM_ASoADynamicColumnCall(benchmark::State& state) } BENCHMARK(BM_ASoADynamicColumnCall)->Range(8, 8 << maxrange); +static void BM_ASoADynamicColumnCallGetGetterByLabel(benchmark::State& state) +{ + // Seed with a real random value, if available + std::default_random_engine e1(1234567891); + std::uniform_real_distribution uniform_dist(0, 1); + + TableBuilder builder; + auto rowWriter = builder.persist({"x", "y", "z"}); + for (auto i = 0; i < state.range(0); ++i) { + rowWriter(0, uniform_dist(e1), uniform_dist(e1), uniform_dist(e1)); + } + auto table = builder.finalize(); + + // SumFreeArgs presence checks if dynamic columns get() is handled correctly during compilation + using Test = o2::soa::InPlaceTable<"A/0"_h, test::X, test::Y, test::Sum, test::SumFreeArgs>; + + Test tests{table}; + for (auto _ : state) { + float sum = 0; + auto sumGetter = o2::soa::row_helpers::getColumnGetterByLabel("Sum"); + for (auto& test : tests) { + sum += sumGetter(test); + } + benchmark::DoNotOptimize(sum); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float) * 2); +} +BENCHMARK(BM_ASoADynamicColumnCallGetGetterByLabel)->Range(8, 8 << maxrange); + BENCHMARK_MAIN(); From 1432c4b740493e20967a5dd35f1ddb64f83d733c Mon Sep 17 00:00:00 2001 From: Maximiliano Puccio Date: Wed, 11 Dec 2024 14:25:51 +0100 Subject: [PATCH 13/19] Add additional selections on ITS tracks (#13779) --- .../ITSMFT/ITS/tracking/include/ITStracking/Configuration.h | 2 ++ Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 5 ++++- Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h index 976d01f1d476b..82d737153c827 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h @@ -94,6 +94,8 @@ struct TrackingParameters { unsigned long MaxMemory = 12000000000UL; float MaxChi2ClusterAttachment = 60.f; float MaxChi2NDF = 30.f; + float MinPt = 0.f; + unsigned char StartLayerMask = 0x7F; bool FindShortTracks = false; bool PerPrimaryVertexProcessing = false; bool SaveTimeBenchmarks = false; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index c7367b781ba5b..b39d2f5b32d4f 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -573,6 +573,9 @@ void TrackerTraits::findRoads(const int iteration) const int minimumLayer{startLevel - 1}; std::vector trackSeeds; for (int startLayer{mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) { + if ((mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { + continue; + } CA_DEBUGGER(std::cout << "\t\t > Starting processing layer " << startLayer << std::endl); std::vector lastCellId, updatedCellId; std::vector lastCellSeed, updatedCellSeed; @@ -615,7 +618,7 @@ void TrackerTraits::findRoads(const int iteration) temporaryTrack.resetCovariance(); temporaryTrack.setChi2(0); fitSuccess = fitTrack(temporaryTrack, mTrkParams[0].NLayers - 1, -1, -1, mTrkParams[0].MaxChi2ClusterAttachment, mTrkParams[0].MaxChi2NDF, 50.f); - if (!fitSuccess) { + if (!fitSuccess || temporaryTrack.getPt() < mTrkParams[iteration].MinPt) { continue; } tracks[trackIndex++] = temporaryTrack; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 5b8a9bb1cb0f2..89ec83d0f6527 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -47,6 +47,8 @@ void ITSTrackingInterface::initialise() trackParams[2].TrackletMinPt = 0.1f; trackParams[2].CellDeltaTanLambdaSigma *= 4.; trackParams[2].MinTrackLength = 4; + trackParams[2].MinPt = 0.2f; + trackParams[2].StartLayerMask = (1 << 6) + (1 << 3); if (o2::its::TrackerParamConfig::Instance().doUPCIteration) { trackParams[3].TrackletMinPt = 0.1f; trackParams[3].CellDeltaTanLambdaSigma *= 4.; From 3552bce15beb33b94dcce97d6ca129c1e68912cb Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 11 Dec 2024 16:57:42 +0100 Subject: [PATCH 14/19] Make sure only the signposts matching the provided string are enabled (#13790) --- Framework/Core/src/runDataProcessing.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/src/runDataProcessing.cxx b/Framework/Core/src/runDataProcessing.cxx index 0861ae1a65a0c..4bfbc3232822a 100644 --- a/Framework/Core/src/runDataProcessing.cxx +++ b/Framework/Core/src/runDataProcessing.cxx @@ -2786,7 +2786,7 @@ void enableSignposts(std::string const& signpostsToEnable) } auto fullName = prefix + std::string{selectedName, last ? last - selectedName : strlen(selectedName)}; - if (strncmp(name, fullName.data(), fullName.size()) == 0) { + if (fullName == name) { LOGP(info, "Enabling signposts for stream \"{}\" with depth {}.", fullName, maxDepth); _o2_log_set_stacktrace(log, maxDepth); return false; From a21ae94631d1e232c7892ff5989a4a51bb88efbc Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 11 Dec 2024 21:37:43 +0100 Subject: [PATCH 15/19] Fix inverted check in rawtf-reader max-tf-per-file check --- Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx index 58a2a775537d4..ef59d94b26048 100644 --- a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx +++ b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx @@ -431,7 +431,7 @@ void TFReaderSpec::TFBuilder() } else { break; } - if (mInput.maxTFsPerFile > 0 && mInput.maxTFsPerFile >= locID) { // go to next file + if (mInput.maxTFsPerFile > 0 && locID >= mInput.maxTFsPerFile) { // go to next file break; } } From 560a1e913011ef26392d4e8b37a01d4999906760 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 11 Dec 2024 21:51:49 +0100 Subject: [PATCH 16/19] DPL: extra signposts for InitTask callback (#13793) --- Framework/Core/src/DataProcessingDevice.cxx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index f4123de250e79..da92c73e1e16a 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -1008,6 +1008,9 @@ void DataProcessingDevice::InitTask() auto ref = ServiceRegistryRef{mServiceRegistry}; auto& deviceContext = ref.get(); auto& context = ref.get(); + + O2_SIGNPOST_ID_FROM_POINTER(cid, device, &context); + O2_SIGNPOST_START(device, cid, "InitTask", "Entering InitTask callback."); auto& spec = getRunningDevice(mRunningDevice, mServiceRegistry); auto distinct = DataRelayerHelpers::createDistinctRouteIndex(spec.inputs); auto& state = ref.get(); @@ -1098,10 +1101,13 @@ void DataProcessingDevice::InitTask() // We will get there. this->fillContext(mServiceRegistry.get(ServiceRegistry::globalDeviceSalt()), deviceContext); + O2_SIGNPOST_END(device, cid, "InitTask", "Exiting InitTask callback waiting for the remaining region callbacks."); + auto hasPendingEvents = [&mutex = mRegionInfoMutex, &pendingRegionInfos = mPendingRegionInfos](DeviceContext& deviceContext) { std::lock_guard lock(mutex); return (pendingRegionInfos.empty() == false) || deviceContext.expectedRegionCallbacks > 0; }; + O2_SIGNPOST_START(device, cid, "InitTask", "Waiting for registation events."); /// We now run an event loop also in InitTask. This is needed to: /// * Make sure region registration callbacks are invoked /// on the main thread. @@ -1111,10 +1117,12 @@ void DataProcessingDevice::InitTask() uv_run(state.loop, UV_RUN_ONCE); // Handle callbacks if any { + O2_SIGNPOST_EVENT_EMIT(device, cid, "InitTask", "Memory registration event received."); std::lock_guard lock(mRegionInfoMutex); handleRegionCallbacks(mServiceRegistry, mPendingRegionInfos); } } + O2_SIGNPOST_END(device, cid, "InitTask", "Done waiting for registration events."); } void DataProcessingDevice::fillContext(DataProcessorContext& context, DeviceContext& deviceContext) From 4e92d0dd05f78d7530cdeb1f0a3a33a722bd0f25 Mon Sep 17 00:00:00 2001 From: Marian Ivanov Date: Thu, 12 Dec 2024 10:24:27 +0100 Subject: [PATCH 17/19] O2-4592 - adding TOF dX and dZ to the track QA table with int8 precision (#13787) * O2-4592 - adding TOF dX and dZ to the track QA table with relative sigma precision nBins*2 * O2-4592 - typo fix * O2-4592 - clang-format * AOD: TrackQA_002 Signed-off-by: Felix Schlepper * AOD: Activate TrackQA_002 Signed-off-by: Felix Schlepper * O2-4592 - patch fix from Felix (not full fix yet) * O2-4592 - typ fix - getting TOF source --------- Signed-off-by: Felix Schlepper Co-authored-by: miranov25 Co-authored-by: Felix Schlepper --- .../AODProducerWorkflowSpec.h | 2 ++ Detectors/AOD/src/AODProducerWorkflowSpec.cxx | 31 ++++++++++++++----- .../include/Framework/AnalysisDataModel.h | 16 ++++++++-- Framework/Core/include/Framework/DataTypes.h | 1 + 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h b/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h index 2ab7c531be7a8..05c2975d6413a 100644 --- a/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h +++ b/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h @@ -433,6 +433,8 @@ class AODProducerWorkflowDPL : public Task int8_t dRefGloSnp{std::numeric_limits::min()}; int8_t dRefGloTgl{std::numeric_limits::min()}; int8_t dRefGloQ2Pt{std::numeric_limits::min()}; + int8_t dTofdX{std::numeric_limits::min()}; + int8_t dTofdZ{std::numeric_limits::min()}; }; // helper struct for addToFwdTracksTable() diff --git a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx index 8ee456634c1e1..96ad2459d581c 100644 --- a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx +++ b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx @@ -381,7 +381,9 @@ void AODProducerWorkflowDPL::addToTracksQATable(TracksQACursorType& tracksQACurs trackQAInfoHolder.dRefGloZ, trackQAInfoHolder.dRefGloSnp, trackQAInfoHolder.dRefGloTgl, - trackQAInfoHolder.dRefGloQ2Pt); + trackQAInfoHolder.dRefGloQ2Pt, + trackQAInfoHolder.dTofdX, + trackQAInfoHolder.dTofdZ); } template @@ -2567,6 +2569,12 @@ AODProducerWorkflowDPL::TrackQA AODProducerWorkflowDPL::processBarrelTrackQA(int trackQAHolder.tpcdcaR = 100. * dcaInfo[0] / sqrt(1. + trackPar.getQ2Pt() * trackPar.getQ2Pt()); trackQAHolder.tpcdcaZ = 100. * dcaInfo[1] / sqrt(1. + trackPar.getQ2Pt() * trackPar.getQ2Pt()); } + // This allows to safely clamp any float to one byte, using the + // minmal/maximum values as under-/overflow borders and rounding to the nearest integer + auto safeInt8Clamp = [](auto value) -> int8_t { + using ValType = decltype(value); + return static_cast(TMath::Nint(std::clamp(value, static_cast(std::numeric_limits::min()), static_cast(std::numeric_limits::max())))); + }; /// get tracklet byteMask uint8_t clusterCounters[8] = {0}; { @@ -2597,6 +2605,16 @@ AODProducerWorkflowDPL::TrackQA AODProducerWorkflowDPL::processBarrelTrackQA(int trackQAHolder.tpcdEdxTot1R = uint8_t(tpcOrig.getdEdx().dEdxTotOROC1 * dEdxNorm); trackQAHolder.tpcdEdxTot2R = uint8_t(tpcOrig.getdEdx().dEdxTotOROC2 * dEdxNorm); trackQAHolder.tpcdEdxTot3R = uint8_t(tpcOrig.getdEdx().dEdxTotOROC3 * dEdxNorm); + /// + float scaleTOF{0}; + auto contributorsGIDA = data.getSingleDetectorRefs(trackIndex); + if (contributorsGIDA[GIndex::Source::TOF].isIndexSet()) { // ITS-TPC-TRD-TOF, ITS-TPC-TOF, TPC-TRD-TOF, TPC-TOF + const auto& tofMatch = data.getTOFMatch(trackIndex); + const float qpt = trackPar.getQ2Pt(); + scaleTOF = std::sqrt(o2::aod::track::trackQAScaledTOF[0] * o2::aod::track::trackQAScaledTOF[0] + qpt * qpt * o2::aod::track::trackQAScaledTOF[1] * o2::aod::track::trackQAScaledTOF[1]) / (2. * o2::aod::track::trackQAScaleBins); + trackQAHolder.dTofdX = safeInt8Clamp(tofMatch.getDXatTOF() / scaleTOF); + trackQAHolder.dTofdZ = safeInt8Clamp(tofMatch.getDZatTOF() / scaleTOF); + } // Add matching information at a reference point (defined by // o2::aod::track::trackQARefRadius) in the same frame as the global track @@ -2622,13 +2640,6 @@ AODProducerWorkflowDPL::TrackQA AODProducerWorkflowDPL::processBarrelTrackQA(int return o2::aod::track::trackQAScaleBins / std::sqrt(o2::aod::track::trackQAScaleGloP0[i] * o2::aod::track::trackQAScaleGloP0[i] + (o2::aod::track::trackQAScaleGloP1[i] * x) * (o2::aod::track::trackQAScaleGloP1[i] * x)); }; - // This allows to safely clamp any float to one byte, using the - // minmal/maximum values as under-/overflow borders and rounding to the nearest integer - auto safeInt8Clamp = [](auto value) -> int8_t { - using ValType = decltype(value); - return static_cast(TMath::Nint(std::clamp(value, static_cast(std::numeric_limits::min()), static_cast(std::numeric_limits::max())))); - }; - // Calculate deltas for contributors trackQAHolder.dRefContY = safeInt8Clamp((itsCopy.getY() - tpcCopy.getY()) * scaleCont(0)); trackQAHolder.dRefContZ = safeInt8Clamp((itsCopy.getZ() - tpcCopy.getZ()) * scaleCont(1)); @@ -2641,6 +2652,7 @@ AODProducerWorkflowDPL::TrackQA AODProducerWorkflowDPL::processBarrelTrackQA(int trackQAHolder.dRefGloSnp = safeInt8Clamp(((itsCopy.getSnp() + tpcCopy.getSnp()) * 0.5f - gloCopy.getSnp()) * scaleGlo(2)); trackQAHolder.dRefGloTgl = safeInt8Clamp(((itsCopy.getTgl() + tpcCopy.getTgl()) * 0.5f - gloCopy.getTgl()) * scaleGlo(3)); trackQAHolder.dRefGloQ2Pt = safeInt8Clamp(((itsCopy.getQ2Pt() + tpcCopy.getQ2Pt()) * 0.5f - gloCopy.getQ2Pt()) * scaleGlo(4)); + // if (O2_ENUM_TEST_BIT(mStreamerMask, AODProducerStreamerMask::TrackQA)) { (*mStreamer) << "trackQA" @@ -2684,6 +2696,9 @@ AODProducerWorkflowDPL::TrackQA AODProducerWorkflowDPL::processBarrelTrackQA(int << "trackQAHolder.dRefGloSnp=" << trackQAHolder.dRefGloSnp << "trackQAHolder.dRefGloTgl=" << trackQAHolder.dRefGloTgl << "trackQAHolder.dRefGloQ2Pt=" << trackQAHolder.dRefGloQ2Pt + << "trackQAHolder.dTofdX=" << trackQAHolder.dTofdX + << "trackQAHolder.dTofdZ=" << trackQAHolder.dTofdZ + << "scaleTOF=" << scaleTOF << "\n"; } } diff --git a/Framework/Core/include/Framework/AnalysisDataModel.h b/Framework/Core/include/Framework/AnalysisDataModel.h index c050977833937..a2c5ff88965d3 100644 --- a/Framework/Core/include/Framework/AnalysisDataModel.h +++ b/Framework/Core/include/Framework/AnalysisDataModel.h @@ -686,6 +686,8 @@ DECLARE_SOA_COLUMN(DeltaRefGloParamZ, deltaRefGloParamZ, int8_t); //! No DECLARE_SOA_COLUMN(DeltaRefGloParamSnp, deltaRefGloParamSnp, int8_t); //! Normalized delta of global track to average contributors matched tracks at reference point in the same frame Snp DECLARE_SOA_COLUMN(DeltaRefGloParamTgl, deltaRefGloParamTgl, int8_t); //! Normalized delta of global track to average contributors matched tracks at reference point in the same frame Tgl DECLARE_SOA_COLUMN(DeltaRefGloParamQ2Pt, deltaRefGloParamQ2Pt, int8_t); //! Normalized delta of global track to average contributors matched tracks at reference point in the same frame Q2Pt +DECLARE_SOA_COLUMN(DeltaTOFdX, deltaTOFdX, int8_t); //! +DECLARE_SOA_COLUMN(DeltaTOFdZ, deltaTOFdZ, int8_t); //! DECLARE_SOA_DYNAMIC_COLUMN(IsDummy, isDummy, //! indicates if the propagation of the contrib. tracks was successful and residuals are available [](int8_t cY, int8_t cZ, int8_t cSnp, int8_t cTgl, int8_t cQ2Pt, int8_t gY, int8_t gZ, int8_t gSnp, int8_t gTgl, int8_t gQ2Pt) -> bool { @@ -709,7 +711,17 @@ DECLARE_SOA_TABLE_VERSIONED(TracksQA_001, "AOD", "TRACKQA", 1, //! trackQA infor trackqa::IsDummy); -using TracksQAVersion = TracksQA_001; +DECLARE_SOA_TABLE_VERSIONED(TracksQA_002, "AOD", "TRACKQA", 2, //! trackQA information - version 2 - including contributor residuals of matched tracks at reference radius + TOF delta information + o2::soa::Index<>, trackqa::TrackId, trackqa::TPCTime0, trackqa::TPCDCAR, trackqa::TPCDCAZ, trackqa::TPCClusterByteMask, + trackqa::TPCdEdxMax0R, trackqa::TPCdEdxMax1R, trackqa::TPCdEdxMax2R, trackqa::TPCdEdxMax3R, + trackqa::TPCdEdxTot0R, trackqa::TPCdEdxTot1R, trackqa::TPCdEdxTot2R, trackqa::TPCdEdxTot3R, + trackqa::DeltaRefContParamY, trackqa::DeltaRefContParamZ, trackqa::DeltaRefContParamSnp, trackqa::DeltaRefContParamTgl, trackqa::DeltaRefContParamQ2Pt, + trackqa::DeltaRefGloParamY, trackqa::DeltaRefGloParamZ, trackqa::DeltaRefGloParamSnp, trackqa::DeltaRefGloParamTgl, trackqa::DeltaRefGloParamQ2Pt, + trackqa::DeltaTOFdX, trackqa::DeltaTOFdZ, + trackqa::IsDummy); + +using TracksQAVersion = TracksQA_002; using TracksQA = TracksQAVersion::iterator; namespace fwdtrack @@ -1651,7 +1663,7 @@ using Tracked3body = Tracked3Bodys::iterator; namespace origins { DECLARE_SOA_COLUMN(DataframeID, dataframeID, uint64_t); //! Data frame ID (what is usually found in directory name in the AO2D.root, i.e. DF_XXX) -} // namespace origin +} // namespace origins DECLARE_SOA_TABLE(Origins, "AOD", "ORIGIN", //! Table which contains the IDs of all dataframes merged into this dataframe o2::soa::Index<>, origins::DataframeID); diff --git a/Framework/Core/include/Framework/DataTypes.h b/Framework/Core/include/Framework/DataTypes.h index 9d829159718d8..682237c9ae632 100644 --- a/Framework/Core/include/Framework/DataTypes.h +++ b/Framework/Core/include/Framework/DataTypes.h @@ -130,6 +130,7 @@ constexpr std::array trackQAScaleContP0{0.257192, 0.0775375, 0.0042428 constexpr std::array trackQAScaleContP1{0.189371, 0.409071, 0.00694444, 0.00720038, 0.0806902}; constexpr std::array trackQAScaleGloP0{0.130985, 0.0775375, 0.00194703, 0.000405458, 0.0160007}; constexpr std::array trackQAScaleGloP1{0.183731, 0.409071, 0.00621802, 0.00624881, 0.0418957}; +constexpr std::array trackQAScaledTOF{1.1, 0.33}; } // namespace o2::aod::track namespace o2::aod::fwdtrack From deecd31316d6b1d2eee2e0ac51026446d7d1f803 Mon Sep 17 00:00:00 2001 From: Mario Ciacco Date: Thu, 12 Dec 2024 11:50:57 +0100 Subject: [PATCH 18/19] Bump trackextra table to the latest version (#13785) * Bump trackextra table to the latest version * update table version in test --- Detectors/AOD/src/AODProducerWorkflowSpec.cxx | 2 +- Framework/Core/include/Framework/AnalysisDataModel.h | 6 ++++-- Framework/Core/src/AODReaderHelpers.cxx | 2 ++ Framework/Core/test/test_AnalysisTask.cxx | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx index 96ad2459d581c..58648527f2856 100644 --- a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx +++ b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx @@ -337,7 +337,7 @@ void AODProducerWorkflowDPL::addToTracksExtraTable(TracksExtraCursorType& tracks extraInfoHolder.itsClusterSizes, extraInfoHolder.tpcNClsFindable, extraInfoHolder.tpcNClsFindableMinusFound, - // extraInfoHolder.tpcNClsFindableMinusPID, + extraInfoHolder.tpcNClsFindableMinusPID, extraInfoHolder.tpcNClsFindableMinusCrossedRows, extraInfoHolder.tpcNClsShared, extraInfoHolder.trdPattern, diff --git a/Framework/Core/include/Framework/AnalysisDataModel.h b/Framework/Core/include/Framework/AnalysisDataModel.h index a2c5ff88965d3..b40468039243b 100644 --- a/Framework/Core/include/Framework/AnalysisDataModel.h +++ b/Framework/Core/include/Framework/AnalysisDataModel.h @@ -638,8 +638,8 @@ DECLARE_SOA_EXTENDED_TABLE(TracksExtra_002, StoredTracksExtra_002, "EXTRACKEXTRA DECLARE_SOA_TABLE(Run2TrackExtras, "AOD", "RUN2TRACKEXTRA", track::ITSSignal); -using StoredTracksExtra = StoredTracksExtra_001; -using TracksExtra = TracksExtra_001; +using StoredTracksExtra = StoredTracksExtra_002; +using TracksExtra = TracksExtra_002; using Track = Tracks::iterator; using TrackIU = TracksIU::iterator; @@ -1893,6 +1893,8 @@ DECLARE_EQUIVALENT_FOR_INDEX_NG("TRACK_IU/0", "TRACKEXTRA/0"); DECLARE_EQUIVALENT_FOR_INDEX_NG("TRACK/0", "TRACKEXTRA/1"); DECLARE_EQUIVALENT_FOR_INDEX_NG("TRACK_IU/0", "TRACKEXTRA/1"); DECLARE_EQUIVALENT_FOR_INDEX_NG("TRACKEXTRA/0", "TRACKEXTRA/1"); +DECLARE_EQUIVALENT_FOR_INDEX_NG("TRACKEXTRA/0", "TRACKEXTRA/2"); +DECLARE_EQUIVALENT_FOR_INDEX_NG("TRACKEXTRA/1", "TRACKEXTRA/2"); DECLARE_EQUIVALENT_FOR_INDEX_NG("HMPID/0", "HMPID/1"); DECLARE_EQUIVALENT_FOR_INDEX_NG("MFTTracks/0", "MFTTracks/1"); } // namespace soa diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/Core/src/AODReaderHelpers.cxx index 7f2b50186ac0f..f6513b5facea3 100644 --- a/Framework/Core/src/AODReaderHelpers.cxx +++ b/Framework/Core/src/AODReaderHelpers.cxx @@ -166,6 +166,8 @@ AlgorithmSpec AODReaderHelpers::aodSpawnerCallback(std::vector& reque outputs.adopt(Output{origin, description, version}, maker.template operator()>()); } else if (version == 1U) { outputs.adopt(Output{origin, description, version}, maker.template operator()>()); + } else if (version == 2U) { + outputs.adopt(Output{origin, description, version}, maker.template operator()>()); } } else if (description == header::DataDescription{"EXMFTTRACK"}) { if (version == 0U) { diff --git a/Framework/Core/test/test_AnalysisTask.cxx b/Framework/Core/test/test_AnalysisTask.cxx index 9153aa8c40098..ff0f7da1acaab 100644 --- a/Framework/Core/test/test_AnalysisTask.cxx +++ b/Framework/Core/test/test_AnalysisTask.cxx @@ -209,7 +209,7 @@ TEST_CASE("AdaptorCompilation") REQUIRE(task2.inputs.size() == 10); REQUIRE(task2.inputs[2].binding == "TracksExtension"); REQUIRE(task2.inputs[1].binding == "Tracks"); - REQUIRE(task2.inputs[4].binding == "TracksExtra_001Extension"); + REQUIRE(task2.inputs[4].binding == "TracksExtra_002Extension"); REQUIRE(task2.inputs[3].binding == "TracksExtra"); REQUIRE(task2.inputs[6].binding == "TracksCovExtension"); REQUIRE(task2.inputs[5].binding == "TracksCov"); From 319d2cf6baa17cd623ed57323e714c726f618aec Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 12 Dec 2024 12:18:23 +0100 Subject: [PATCH 19/19] DPL Analysis: add RNTuple arrow::Dataset support As part of the changes, move the actual logic which serialises / deserialised things to plugins so that we do not need to depend or RNTuple in production code. --- Framework/AnalysisSupport/CMakeLists.txt | 10 + .../AnalysisSupport/src/RNTuplePlugin.cxx | 816 +++++++++++++++++ Framework/AnalysisSupport/src/TTreePlugin.cxx | 862 ++++++++++++++++++ Framework/Core/CMakeLists.txt | 3 + Framework/Core/include/Framework/Plugins.h | 8 + .../include/Framework/RootArrowFilesystem.h | 174 +--- Framework/Core/src/Plugin.cxx | 69 ++ Framework/Core/src/RootArrowFilesystem.cxx | 721 +-------------- Framework/Core/test/test_Root2ArrowTable.cxx | 105 ++- 9 files changed, 1941 insertions(+), 827 deletions(-) create mode 100644 Framework/AnalysisSupport/src/RNTuplePlugin.cxx create mode 100644 Framework/AnalysisSupport/src/TTreePlugin.cxx diff --git a/Framework/AnalysisSupport/CMakeLists.txt b/Framework/AnalysisSupport/CMakeLists.txt index 5fb1282469711..05cd8075a49d8 100644 --- a/Framework/AnalysisSupport/CMakeLists.txt +++ b/Framework/AnalysisSupport/CMakeLists.txt @@ -24,6 +24,16 @@ o2_add_library(FrameworkAnalysisSupport PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_LIST_DIR}/src PUBLIC_LINK_LIBRARIES O2::Framework ${EXTRA_TARGETS} ROOT::TreePlayer) +o2_add_library(FrameworkAnalysisRNTupleSupport + SOURCES src/RNTuplePlugin.cxx + PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_LIST_DIR}/src + PUBLIC_LINK_LIBRARIES O2::Framework ${EXTRA_TARGETS} ROOT::ROOTNTuple) + +o2_add_library(FrameworkAnalysisTTreeSupport + SOURCES src/TTreePlugin.cxx + PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_LIST_DIR}/src + PUBLIC_LINK_LIBRARIES O2::Framework ${EXTRA_TARGETS} ROOT::TreePlayer) + o2_add_test(DataInputDirector NAME test_Framework_test_DataInputDirector SOURCES test/test_DataInputDirector.cxx COMPONENT_NAME Framework diff --git a/Framework/AnalysisSupport/src/RNTuplePlugin.cxx b/Framework/AnalysisSupport/src/RNTuplePlugin.cxx new file mode 100644 index 0000000000000..5e73c1790e63a --- /dev/null +++ b/Framework/AnalysisSupport/src/RNTuplePlugin.cxx @@ -0,0 +1,816 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "Framework/RuntimeError.h" +#include "Framework/RootArrowFilesystem.h" +#include "Framework/Plugins.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +template class + std::unique_ptr; + +namespace o2::framework +{ + +class RNTupleFileWriteOptions : public arrow::dataset::FileWriteOptions +{ + public: + RNTupleFileWriteOptions(std::shared_ptr format) + : FileWriteOptions(format) + { + } +}; + +// A filesystem which allows me to get a RNTuple +class RNTupleFileSystem : public VirtualRootFileSystemBase +{ + public: + ~RNTupleFileSystem() override; + + std::shared_ptr GetSubFilesystem(arrow::dataset::FileSource source) override + { + return std::dynamic_pointer_cast(shared_from_this()); + }; + virtual ROOT::Experimental::RNTuple* GetRNTuple(arrow::dataset::FileSource source) = 0; +}; + +class SingleRNTupleFileSystem : public RNTupleFileSystem +{ + public: + SingleRNTupleFileSystem(ROOT::Experimental::RNTuple* tuple) + : RNTupleFileSystem(), + mTuple(tuple) + { + } + + arrow::Result GetFileInfo(std::string const& path) override; + + std::string type_name() const override + { + return "rntuple"; + } + + ROOT::Experimental::RNTuple* GetRNTuple(arrow::dataset::FileSource) override + { + // Simply return the only TTree we have + return mTuple; + } + + private: + ROOT::Experimental::RNTuple* mTuple; +}; + +arrow::Result SingleRNTupleFileSystem::GetFileInfo(std::string const& path) +{ + arrow::dataset::FileSource source(path, shared_from_this()); + arrow::fs::FileInfo result; + result.set_path(path); + result.set_type(arrow::fs::FileType::File); + return result; +} + +class RNTupleFileFragment : public arrow::dataset::FileFragment +{ + public: + RNTupleFileFragment(arrow::dataset::FileSource source, + std::shared_ptr format, + arrow::compute::Expression partition_expression, + std::shared_ptr physical_schema) + : FileFragment(std::move(source), std::move(format), std::move(partition_expression), std::move(physical_schema)) + { + } +}; + +class RNTupleFileFormat : public arrow::dataset::FileFormat +{ + size_t& mTotCompressedSize; + size_t& mTotUncompressedSize; + + public: + RNTupleFileFormat(size_t& totalCompressedSize, size_t& totalUncompressedSize) + : FileFormat({}), + mTotCompressedSize(totalCompressedSize), + mTotUncompressedSize(totalUncompressedSize) + { + } + + ~RNTupleFileFormat() override = default; + + std::string type_name() const override + { + return "rntuple"; + } + + bool Equals(const FileFormat& other) const override + { + return other.type_name() == this->type_name(); + } + + arrow::Result IsSupported(const arrow::dataset::FileSource& source) const override + { + auto fs = std::dynamic_pointer_cast(source.filesystem()); + auto subFs = fs->GetSubFilesystem(source); + if (std::dynamic_pointer_cast(subFs)) { + return true; + } + return false; + } + + arrow::Result> Inspect(const arrow::dataset::FileSource& source) const override; + + arrow::Result ScanBatchesAsync( + const std::shared_ptr& options, + const std::shared_ptr& fragment) const override; + + std::shared_ptr DefaultWriteOptions() override; + + arrow::Result> MakeWriter(std::shared_ptr destination, + std::shared_ptr schema, + std::shared_ptr options, + arrow::fs::FileLocator destination_locator) const override; + arrow::Result> MakeFragment( + arrow::dataset::FileSource source, arrow::compute::Expression partition_expression, + std::shared_ptr physical_schema) override; +}; + +struct RootNTupleVisitor : public ROOT::Experimental::Detail::RFieldVisitor { + void VisitArrayField(const ROOT::Experimental::RArrayField& field) override + { + int size = field.GetLength(); + RootNTupleVisitor valueVisitor{}; + auto valueField = field.GetSubFields()[0]; + valueField->AcceptVisitor(valueVisitor); + auto type = valueVisitor.datatype; + this->datatype = arrow::fixed_size_list(type, size); + } + + void VisitRVecField(const ROOT::Experimental::RRVecField& field) override + { + RootNTupleVisitor valueVisitor{}; + auto valueField = field.GetSubFields()[0]; + valueField->AcceptVisitor(valueVisitor); + auto type = valueVisitor.datatype; + this->datatype = arrow::list(type); + } + + void VisitField(const ROOT::Experimental::RFieldBase& field) override + { + throw o2::framework::runtime_error_f("Unknown field %s with type %s", field.GetFieldName().c_str(), field.GetTypeName().c_str()); + } + + void VisitIntField(const ROOT::Experimental::RField& field) override + { + this->datatype = arrow::int32(); + } + + void VisitBoolField(const ROOT::Experimental::RField& field) override + { + this->datatype = arrow::boolean(); + } + + void VisitFloatField(const ROOT::Experimental::RField& field) override + { + this->datatype = arrow::float32(); + } + + void VisitDoubleField(const ROOT::Experimental::RField& field) override + { + this->datatype = arrow::float64(); + } + std::shared_ptr datatype; +}; +} // namespace o2::framework + +auto arrowTypeFromRNTuple(ROOT::Experimental::RFieldBase const& field, int size) +{ + o2::framework::RootNTupleVisitor visitor; + field.AcceptVisitor(visitor); + return visitor.datatype; +} + +namespace o2::framework +{ +std::unique_ptr rootFieldFromArrow(std::shared_ptr field, std::string name) +{ + using namespace ROOT::Experimental; + switch (field->type()->id()) { + case arrow::Type::BOOL: + return std::make_unique>(name); + case arrow::Type::UINT8: + return std::make_unique>(name); + case arrow::Type::UINT16: + return std::make_unique>(name); + case arrow::Type::UINT32: + return std::make_unique>(name); + case arrow::Type::UINT64: + return std::make_unique>(name); + case arrow::Type::INT8: + return std::make_unique>(name); + case arrow::Type::INT16: + return std::make_unique>(name); + case arrow::Type::INT32: + return std::make_unique>(name); + case arrow::Type::INT64: + return std::make_unique>(name); + case arrow::Type::FLOAT: + return std::make_unique>(name); + case arrow::Type::DOUBLE: + return std::make_unique>(name); + default: + throw runtime_error("Unsupported arrow column type"); + } +} + +class RNTupleFileWriter : public arrow::dataset::FileWriter +{ + std::shared_ptr mWriter; + bool firstBatch = true; + std::vector> valueArrays; + std::vector> valueTypes; + std::vector valueCount; + + public: + RNTupleFileWriter(std::shared_ptr schema, std::shared_ptr options, + std::shared_ptr destination, + arrow::fs::FileLocator destination_locator) + : FileWriter(schema, options, destination, destination_locator) + { + using namespace ROOT::Experimental; + + auto model = RNTupleModel::CreateBare(); + // Let's create a model from the physical schema + for (auto i = 0u; i < schema->fields().size(); ++i) { + auto& field = schema->field(i); + + // Construct all the needed branches. + switch (field->type()->id()) { + case arrow::Type::FIXED_SIZE_LIST: { + auto list = std::static_pointer_cast(field->type()); + auto valueField = field->type()->field(0); + model->AddField(std::make_unique(field->name(), rootFieldFromArrow(valueField, "_0"), list->list_size())); + } break; + case arrow::Type::LIST: { + auto valueField = field->type()->field(0); + model->AddField(std::make_unique(field->name(), rootFieldFromArrow(valueField, "_0"))); + } break; + default: { + model->AddField(rootFieldFromArrow(field, field->name())); + } break; + } + } + auto fileStream = std::dynamic_pointer_cast(destination_); + auto* file = dynamic_cast(fileStream->GetDirectory()); + mWriter = RNTupleWriter::Append(std::move(model), destination_locator_.path, *file, {}); + } + + arrow::Status Write(const std::shared_ptr& batch) override + { + if (firstBatch) { + firstBatch = false; + } + + // Support writing empty tables + if (batch->columns().empty() || batch->num_rows() == 0) { + return arrow::Status::OK(); + } + + for (auto i = 0u; i < batch->columns().size(); ++i) { + auto column = batch->column(i); + auto& field = batch->schema()->field(i); + + valueArrays.push_back(nullptr); + valueTypes.push_back(nullptr); + valueCount.push_back(1); + + switch (field->type()->id()) { + case arrow::Type::FIXED_SIZE_LIST: { + auto list = std::static_pointer_cast(column); + auto listType = std::static_pointer_cast(field->type()); + if (field->type()->field(0)->type()->id() == arrow::Type::BOOL) { + auto boolArray = std::static_pointer_cast(list->values()); + int64_t length = boolArray->length(); + arrow::UInt8Builder builder; + auto ok = builder.Reserve(length); + + for (int64_t i = 0; i < length; ++i) { + if (boolArray->IsValid(i)) { + // Expand each boolean value (true/false) to uint8 (1/0) + uint8_t value = boolArray->Value(i) ? 1 : 0; + auto ok = builder.Append(value); + } else { + // Append null for invalid entries + auto ok = builder.AppendNull(); + } + } + valueArrays.back() = *builder.Finish(); + valueTypes.back() = valueArrays.back()->type(); + } else { + valueArrays.back() = list->values(); + valueTypes.back() = field->type()->field(0)->type(); + } + valueCount.back() = listType->list_size(); + } break; + case arrow::Type::LIST: { + auto list = std::static_pointer_cast(column); + valueArrays.back() = list; + valueTypes.back() = field->type()->field(0)->type(); + valueCount.back() = -1; + } break; + case arrow::Type::BOOL: { + // We unpack the array + auto boolArray = std::static_pointer_cast(column); + int64_t length = boolArray->length(); + arrow::UInt8Builder builder; + auto ok = builder.Reserve(length); + + for (int64_t i = 0; i < length; ++i) { + if (boolArray->IsValid(i)) { + // Expand each boolean value (true/false) to uint8 (1/0) + uint8_t value = boolArray->Value(i) ? 1 : 0; + auto ok = builder.Append(value); + } else { + // Append null for invalid entries + auto ok = builder.AppendNull(); + } + } + valueArrays.back() = *builder.Finish(); + valueTypes.back() = valueArrays.back()->type(); + } break; + default: + valueArrays.back() = column; + valueTypes.back() = field->type(); + break; + } + } + + int64_t pos = 0; + + auto entry = mWriter->CreateEntry(); + + while (pos < batch->num_rows()) { + for (size_t ci = 0; ci < batch->columns().size(); ++ci) { + auto type = batch->column(ci)->type(); + auto field = batch->schema()->field(ci); + auto token = entry->GetToken(field->name()); + + switch (type->id()) { + case arrow::Type::LIST: { + auto list = std::static_pointer_cast(valueArrays[ci]); + auto value_slice = list->value_slice(pos); + + valueCount[ci] = value_slice->length(); + auto bindValue = [&vc = valueCount, ci, token](auto array, std::unique_ptr& entry) -> void { + using value_type = std::decay_t::value_type; + auto v = std::make_shared>((value_type*)array->raw_values(), vc[ci]); + entry->BindValue(token, v); + }; + switch (valueTypes[ci]->id()) { + case arrow::Type::FLOAT: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + case arrow::Type::DOUBLE: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + case arrow::Type::INT8: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + case arrow::Type::INT16: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + case arrow::Type::INT32: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + case arrow::Type::INT64: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + case arrow::Type::UINT8: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + case arrow::Type::UINT16: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + case arrow::Type::UINT32: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + case arrow::Type::UINT64: { + bindValue(std::static_pointer_cast(value_slice), entry); + } break; + default: { + throw runtime_error("Unsupported kind of VLA"); + } break; + } + } break; + case arrow::Type::FIXED_SIZE_LIST: { + entry->BindRawPtr(token, (void*)(valueArrays[ci]->data()->buffers[1]->data() + pos * valueCount[ci] * valueTypes[ci]->byte_width())); + } break; + case arrow::Type::BOOL: { + // Not sure we actually need this + entry->BindRawPtr(token, (bool*)(valueArrays[ci]->data()->buffers[1]->data() + pos * 1)); + } break; + default: + // By default we consider things scalars. + entry->BindRawPtr(token, (void*)(valueArrays[ci]->data()->buffers[1]->data() + pos * valueTypes[ci]->byte_width())); + break; + } + } + mWriter->Fill(*entry); + ++pos; + } + // mWriter->CommitCluster(); + + return arrow::Status::OK(); + } + + arrow::Future<> + FinishInternal() override + { + return {}; + }; +}; + +arrow::Result> RNTupleFileFormat::Inspect(const arrow::dataset::FileSource& source) const +{ + + auto fs = std::dynamic_pointer_cast(source.filesystem()); + // Actually get the TTree from the ROOT file. + auto ntupleFs = std::dynamic_pointer_cast(fs->GetSubFilesystem(source)); + if (!ntupleFs.get()) { + throw runtime_error_f("Unknown filesystem %s\n", source.filesystem()->type_name().c_str()); + } + ROOT::Experimental::RNTuple* rntuple = ntupleFs->GetRNTuple(source); + + auto inspector = ROOT::Experimental::RNTupleInspector::Create(rntuple); + + auto reader = ROOT::Experimental::RNTupleReader::Open(rntuple); + + auto& tupleField0 = reader->GetModel().GetFieldZero(); + std::vector> fields; + for (auto& tupleField : tupleField0.GetSubFields()) { + auto field = std::make_shared(tupleField->GetFieldName(), arrowTypeFromRNTuple(*tupleField, tupleField->GetValueSize())); + fields.push_back(field); + } + + return std::make_shared(fields); +} + +arrow::Result RNTupleFileFormat::ScanBatchesAsync( + const std::shared_ptr& options, + const std::shared_ptr& fragment) const +{ + auto dataset_schema = options->dataset_schema; + auto ntupleFragment = std::dynamic_pointer_cast(fragment); + + auto generator = [pool = options->pool, ntupleFragment, dataset_schema, &totalCompressedSize = mTotCompressedSize, + &totalUncompressedSize = mTotUncompressedSize]() -> arrow::Future> { + using namespace ROOT::Experimental; + std::vector> columns; + std::vector> fields = dataset_schema->fields(); + + auto containerFS = std::dynamic_pointer_cast(ntupleFragment->source().filesystem()); + auto fs = std::dynamic_pointer_cast(containerFS->GetSubFilesystem(ntupleFragment->source())); + + int64_t rows = -1; + ROOT::Experimental::RNTuple* rntuple = fs->GetRNTuple(ntupleFragment->source()); + auto reader = ROOT::Experimental::RNTupleReader::Open(rntuple); + auto& model = reader->GetModel(); + for (auto& physicalField : fields) { + auto bulk = model.CreateBulk(physicalField->name()); + + auto listType = std::dynamic_pointer_cast(physicalField->type()); + + auto& descriptor = reader->GetDescriptor(); + auto totalEntries = reader->GetNEntries(); + + if (rows == -1) { + rows = totalEntries; + } + if (rows != totalEntries) { + throw runtime_error_f("Unmatching number of rows for branch %s", physicalField->name().c_str()); + } + arrow::Status status; + int readEntries = 0; + std::shared_ptr array; + if (physicalField->type() == arrow::boolean() || + (listType && physicalField->type()->field(0)->type() == arrow::boolean())) { + if (listType) { + std::unique_ptr builder = nullptr; + auto status = arrow::MakeBuilder(pool, physicalField->type()->field(0)->type(), &builder); + if (!status.ok()) { + throw runtime_error("Cannot create value builder"); + } + auto listBuilder = std::make_unique(pool, std::move(builder), listType->list_size()); + auto valueBuilder = listBuilder.get()->value_builder(); + // boolean array special case: we need to use builder to create the bitmap + status = valueBuilder->Reserve(totalEntries * listType->list_size()); + status &= listBuilder->Reserve(totalEntries); + if (!status.ok()) { + throw runtime_error("Failed to reserve memory for array builder"); + } + auto clusterIt = descriptor.FindClusterId(0, 0); + // No adoption for now... + // bulk.AdoptBuffer(buffer, totalEntries) + while (clusterIt != kInvalidDescriptorId) { + auto& index = descriptor.GetClusterDescriptor(clusterIt); + auto mask = std::make_unique(index.GetNEntries()); + std::fill(mask.get(), mask.get() + index.GetNEntries(), true); + void* ptr = bulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()); + int readLast = index.GetNEntries(); + readEntries += readLast; + status &= static_cast(valueBuilder)->AppendValues(reinterpret_cast(ptr), readLast * listType->list_size()); + clusterIt = descriptor.FindNextClusterId(clusterIt); + } + status &= static_cast(listBuilder.get())->AppendValues(readEntries); + if (!status.ok()) { + throw runtime_error("Failed to append values to array"); + } + status &= listBuilder->Finish(&array); + if (!status.ok()) { + throw runtime_error("Failed to create array"); + } + } else if (listType == nullptr) { + std::unique_ptr builder = nullptr; + auto status = arrow::MakeBuilder(pool, physicalField->type(), &builder); + if (!status.ok()) { + throw runtime_error("Cannot create builder"); + } + auto valueBuilder = static_cast(builder.get()); + // boolean array special case: we need to use builder to create the bitmap + status = valueBuilder->Reserve(totalEntries); + if (!status.ok()) { + throw runtime_error("Failed to reserve memory for array builder"); + } + auto clusterIt = descriptor.FindClusterId(0, 0); + while (clusterIt != kInvalidDescriptorId) { + auto& index = descriptor.GetClusterDescriptor(clusterIt); + auto mask = std::make_unique(index.GetNEntries()); + std::fill(mask.get(), mask.get() + index.GetNEntries(), true); + void* ptr = bulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()); + int readLast = index.GetNEntries(); + readEntries += readLast; + status &= valueBuilder->AppendValues(reinterpret_cast(ptr), readLast); + clusterIt = descriptor.FindNextClusterId(clusterIt); + } + if (!status.ok()) { + throw runtime_error("Failed to append values to array"); + } + status &= valueBuilder->Finish(&array); + if (!status.ok()) { + throw runtime_error("Failed to create array"); + } + } + } else { + // other types: use serialized read to build arrays directly. + auto typeSize = physicalField->type()->byte_width(); + // FIXME: for now... + auto bytes = 0; + auto branchSize = bytes ? bytes : 1000000; + auto&& result = arrow::AllocateResizableBuffer(branchSize, pool); + if (!result.ok()) { + throw runtime_error("Cannot allocate values buffer"); + } + std::shared_ptr arrowValuesBuffer = std::move(result).ValueUnsafe(); + auto ptr = arrowValuesBuffer->mutable_data(); + if (ptr == nullptr) { + throw runtime_error("Invalid buffer"); + } + + std::unique_ptr offsetBuffer = nullptr; + + std::shared_ptr arrowOffsetBuffer; + std::span offsets; + int size = 0; + uint32_t totalSize = 0; + int64_t listSize = 1; + if (auto fixedSizeList = std::dynamic_pointer_cast(physicalField->type())) { + listSize = fixedSizeList->list_size(); + typeSize = fixedSizeList->field(0)->type()->byte_width(); + auto clusterIt = descriptor.FindClusterId(0, 0); + while (clusterIt != kInvalidDescriptorId) { + auto& index = descriptor.GetClusterDescriptor(clusterIt); + auto mask = std::make_unique(index.GetNEntries()); + std::fill(mask.get(), mask.get() + index.GetNEntries(), true); + void* inPtr = bulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()); + + int readLast = index.GetNEntries(); + if (listSize == -1) { + size = offsets[readEntries + readLast] - offsets[readEntries]; + } else { + size = readLast * listSize; + } + readEntries += readLast; + memcpy(ptr, inPtr, size * typeSize); + ptr += (ptrdiff_t)(size * typeSize); + clusterIt = descriptor.FindNextClusterId(clusterIt); + } + } else if (auto vlaListType = std::dynamic_pointer_cast(physicalField->type())) { + listSize = -1; + typeSize = vlaListType->field(0)->type()->byte_width(); + offsetBuffer = std::make_unique(TBuffer::EMode::kWrite, 4 * 1024 * 1024); + result = arrow::AllocateResizableBuffer((totalEntries + 1) * (int64_t)sizeof(int), pool); + if (!result.ok()) { + throw runtime_error("Cannot allocate offset buffer"); + } + arrowOffsetBuffer = std::move(result).ValueUnsafe(); + + // Offset bulk + auto offsetBulk = model.CreateBulk(physicalField->name()); + // Actual values are in a different place... + bulk = model.CreateBulk(physicalField->name()); + auto clusterIt = descriptor.FindClusterId(0, 0); + auto* ptrOffset = reinterpret_cast(arrowOffsetBuffer->mutable_data()); + auto* tPtrOffset = reinterpret_cast(ptrOffset); + offsets = std::span{tPtrOffset, tPtrOffset + totalEntries + 1}; + + auto copyOffsets = [&arrowValuesBuffer, &pool, &ptrOffset, &ptr, &totalSize](auto inPtr, size_t total) { + using value_type = typename std::decay_t::value_type; + for (size_t i = 0; i < total; i++) { + *ptrOffset++ = totalSize; + totalSize += inPtr[i].size(); + } + *ptrOffset = totalSize; + auto&& result = arrow::AllocateResizableBuffer(totalSize * sizeof(value_type), pool); + if (!result.ok()) { + throw runtime_error("Cannot allocate values buffer"); + } + arrowValuesBuffer = std::move(result).ValueUnsafe(); + ptr = (uint8_t*)(arrowValuesBuffer->mutable_data()); + // Calculate the size of the buffer here. + for (size_t i = 0; i < total; i++) { + int vlaSizeInBytes = inPtr[i].size() * sizeof(value_type); + if (vlaSizeInBytes == 0) { + continue; + } + memcpy(ptr, inPtr[i].data(), vlaSizeInBytes); + ptr += vlaSizeInBytes; + } + }; + + while (clusterIt != kInvalidDescriptorId) { + auto& index = descriptor.GetClusterDescriptor(clusterIt); + auto mask = std::make_unique(index.GetNEntries()); + std::fill(mask.get(), mask.get() + index.GetNEntries(), true); + int readLast = index.GetNEntries(); + switch (vlaListType->field(0)->type()->id()) { + case arrow::Type::FLOAT: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + case arrow::Type::DOUBLE: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + case arrow::Type::INT8: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + case arrow::Type::INT16: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + case arrow::Type::INT32: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + case arrow::Type::INT64: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + case arrow::Type::UINT8: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + case arrow::Type::UINT16: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + case arrow::Type::UINT32: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + case arrow::Type::UINT64: { + copyOffsets((ROOT::Internal::VecOps::RVec*)offsetBulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()), readLast); + } break; + default: { + throw runtime_error("Unsupported kind of VLA"); + } break; + } + + readEntries += readLast; + clusterIt = descriptor.FindNextClusterId(clusterIt); + } + } else { + auto clusterIt = descriptor.FindClusterId(0, 0); + while (clusterIt != kInvalidDescriptorId) { + auto& index = descriptor.GetClusterDescriptor(clusterIt); + auto mask = std::make_unique(index.GetNEntries()); + std::fill(mask.get(), mask.get() + index.GetNEntries(), true); + void* inPtr = bulk.ReadBulk(RClusterIndex(clusterIt, index.GetFirstEntryIndex()), mask.get(), index.GetNEntries()); + + int readLast = index.GetNEntries(); + if (listSize == -1) { + size = offsets[readEntries + readLast] - offsets[readEntries]; + } else { + size = readLast * listSize; + } + readEntries += readLast; + memcpy(ptr, inPtr, size * typeSize); + ptr += (ptrdiff_t)(size * typeSize); + clusterIt = descriptor.FindNextClusterId(clusterIt); + } + } + switch (listSize) { + case -1: { + auto varray = std::make_shared(physicalField->type()->field(0)->type(), totalSize, arrowValuesBuffer); + array = std::make_shared(physicalField->type(), readEntries, arrowOffsetBuffer, varray); + } break; + case 1: { + totalSize = readEntries * listSize; + array = std::make_shared(physicalField->type(), readEntries, arrowValuesBuffer); + + } break; + default: { + totalSize = readEntries * listSize; + auto varray = std::make_shared(physicalField->type()->field(0)->type(), totalSize, arrowValuesBuffer); + array = std::make_shared(physicalField->type(), readEntries, varray); + } + } + } + columns.push_back(array); + } + + auto batch = arrow::RecordBatch::Make(dataset_schema, rows, columns); + return batch; + }; + + return generator; +} + +arrow::Result> RNTupleFileFormat::MakeWriter(std::shared_ptr destination, + std::shared_ptr schema, + std::shared_ptr options, + arrow::fs::FileLocator destination_locator) const +{ + auto writer = std::make_shared(schema, options, destination, destination_locator); + return std::dynamic_pointer_cast(writer); +} + +arrow::Result> RNTupleFileFormat::MakeFragment( + arrow::dataset::FileSource source, arrow::compute::Expression partition_expression, + std::shared_ptr physical_schema) +{ + std::shared_ptr format = std::make_shared(mTotCompressedSize, mTotUncompressedSize); + + auto fragment = std::make_shared(std::move(source), std::move(format), + std::move(partition_expression), + std::move(physical_schema)); + return std::dynamic_pointer_cast(fragment); +} + +RNTupleFileSystem::~RNTupleFileSystem() = default; + +std::shared_ptr + RNTupleFileFormat::DefaultWriteOptions() +{ + return std::make_shared(shared_from_this()); +} + +struct RNTuplePluginContext { + size_t totalCompressedSize = 0; + size_t totalUncompressedSize = 0; + std::shared_ptr format = nullptr; +}; + +struct RNTupleObjectReadingImplementation : public RootArrowFactoryPlugin { + RootArrowFactory* create() override + { + auto context = new RNTuplePluginContext; + context->format = std::make_shared(context->totalCompressedSize, context->totalUncompressedSize); + return new RootArrowFactory{ + .options = [context]() { return context->format->DefaultWriteOptions(); }, + .format = [context]() { return context->format; }, + .getSubFilesystem = [](void* handle) { + auto rntuple = (ROOT::Experimental::RNTuple*)handle; + return std::shared_ptr(new SingleRNTupleFileSystem(rntuple)); }, + }; + } +}; + +DEFINE_DPL_PLUGINS_BEGIN +DEFINE_DPL_PLUGIN_INSTANCE(RNTupleObjectReadingImplementation, RootObjectReadingImplementation); +DEFINE_DPL_PLUGINS_END +} // namespace o2::framework diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx new file mode 100644 index 0000000000000..e376ed8b96268 --- /dev/null +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -0,0 +1,862 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "Framework/RootArrowFilesystem.h" +#include "Framework/Plugins.h" +#include "Framework/Signpost.h" +#include "Framework/Endian.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +O2_DECLARE_DYNAMIC_LOG(root_arrow_fs); + +namespace o2::framework +{ + +class TTreeFileWriteOptions : public arrow::dataset::FileWriteOptions +{ + public: + TTreeFileWriteOptions(std::shared_ptr format) + : FileWriteOptions(format) + { + } +}; + +// A filesystem which allows me to get a TTree +class TTreeFileSystem : public VirtualRootFileSystemBase +{ + public: + ~TTreeFileSystem() override; + + std::shared_ptr GetSubFilesystem(arrow::dataset::FileSource source) override + { + return std::dynamic_pointer_cast(shared_from_this()); + }; + + arrow::Result> OpenOutputStream( + const std::string& path, + const std::shared_ptr& metadata) override; + + virtual TTree* GetTree(arrow::dataset::FileSource source) = 0; +}; + +class SingleTreeFileSystem : public TTreeFileSystem +{ + public: + SingleTreeFileSystem(TTree* tree) + : TTreeFileSystem(), + mTree(tree) + { + } + + arrow::Result GetFileInfo(std::string const& path) override; + + std::string type_name() const override + { + return "ttree"; + } + + TTree* GetTree(arrow::dataset::FileSource) override + { + // Simply return the only TTree we have + return mTree; + } + + private: + TTree* mTree; +}; + +arrow::Result SingleTreeFileSystem::GetFileInfo(std::string const& path) +{ + arrow::dataset::FileSource source(path, shared_from_this()); + arrow::fs::FileInfo result; + result.set_path(path); + result.set_type(arrow::fs::FileType::File); + return result; +} + +class TTreeFileFragment : public arrow::dataset::FileFragment +{ + public: + TTreeFileFragment(arrow::dataset::FileSource source, + std::shared_ptr format, + arrow::compute::Expression partition_expression, + std::shared_ptr physical_schema) + : FileFragment(std::move(source), std::move(format), std::move(partition_expression), std::move(physical_schema)) + { + } +}; + +class TTreeFileFormat : public arrow::dataset::FileFormat +{ + size_t& mTotCompressedSize; + size_t& mTotUncompressedSize; + + public: + TTreeFileFormat(size_t& totalCompressedSize, size_t& totalUncompressedSize) + : FileFormat({}), + mTotCompressedSize(totalCompressedSize), + mTotUncompressedSize(totalUncompressedSize) + { + } + + ~TTreeFileFormat() override = default; + + std::string type_name() const override + { + return "ttree"; + } + + bool Equals(const FileFormat& other) const override + { + return other.type_name() == this->type_name(); + } + + arrow::Result IsSupported(const arrow::dataset::FileSource& source) const override + { + auto fs = std::dynamic_pointer_cast(source.filesystem()); + auto subFs = fs->GetSubFilesystem(source); + if (std::dynamic_pointer_cast(subFs)) { + return true; + } + return false; + } + + arrow::Result> Inspect(const arrow::dataset::FileSource& source) const override; + /// \brief Create a FileFragment for a FileSource. + arrow::Result> MakeFragment( + arrow::dataset::FileSource source, arrow::compute::Expression partition_expression, + std::shared_ptr physical_schema) override; + + arrow::Result> MakeWriter(std::shared_ptr destination, std::shared_ptr schema, std::shared_ptr options, arrow::fs::FileLocator destination_locator) const override; + + std::shared_ptr DefaultWriteOptions() override; + + arrow::Result ScanBatchesAsync( + const std::shared_ptr& options, + const std::shared_ptr& fragment) const override; +}; + +// An arrow outputstream which allows to write to a TTree. Eventually +// with a prefix for the branches. +class TTreeOutputStream : public arrow::io::OutputStream +{ + public: + TTreeOutputStream(TTree*, std::string branchPrefix); + + arrow::Status Close() override; + + arrow::Result Tell() const override; + + arrow::Status Write(const void* data, int64_t nbytes) override; + + bool closed() const override; + + TBranch* CreateBranch(char const* branchName, char const* sizeBranch); + + TTree* GetTree() + { + return mTree; + } + + private: + TTree* mTree; + std::string mBranchPrefix; +}; + +// An arrow outputstream which allows to write to a ttree +// @a branch prefix is to be used to identify a set of branches which all belong to +// the same table. +TTreeOutputStream::TTreeOutputStream(TTree* f, std::string branchPrefix) + : mTree(f), + mBranchPrefix(std::move(branchPrefix)) +{ +} + +arrow::Status TTreeOutputStream::Close() +{ + if (mTree->GetCurrentFile() == nullptr) { + return arrow::Status::Invalid("Cannot close a tree not attached to a file"); + } + mTree->GetCurrentFile()->Close(); + return arrow::Status::OK(); +} + +arrow::Result TTreeOutputStream::Tell() const +{ + return arrow::Result(arrow::Status::NotImplemented("Cannot move")); +} + +arrow::Status TTreeOutputStream::Write(const void* data, int64_t nbytes) +{ + return arrow::Status::NotImplemented("Cannot write raw bytes to a TTree"); +} + +bool TTreeOutputStream::closed() const +{ + // A standalone tree is never closed. + if (mTree->GetCurrentFile() == nullptr) { + return false; + } + return mTree->GetCurrentFile()->IsOpen() == false; +} + +TBranch* TTreeOutputStream::CreateBranch(char const* branchName, char const* sizeBranch) +{ + return mTree->Branch((mBranchPrefix + "/" + branchName).c_str(), (char*)nullptr, (mBranchPrefix + sizeBranch).c_str()); +} + +struct TTreePluginContext { + size_t totalCompressedSize = 0; + size_t totalUncompressedSize = 0; + std::shared_ptr format = nullptr; +}; + +struct TTreeObjectReadingImplementation : public RootArrowFactoryPlugin { + RootArrowFactory* create() override + { + auto context = new TTreePluginContext; + context->format = std::make_shared(context->totalCompressedSize, context->totalUncompressedSize); + return new RootArrowFactory{ + .options = [context]() { return context->format->DefaultWriteOptions(); }, + .format = [context]() { return context->format; }, + .getSubFilesystem = [](void* handle) { + auto tree = (TTree*)handle; + return std::shared_ptr(new SingleTreeFileSystem(tree)); }, + }; + } +}; + +arrow::Result TTreeFileFormat::ScanBatchesAsync( + const std::shared_ptr& options, + const std::shared_ptr& fragment) const +{ + // Get the fragment as a TTreeFragment. This might be PART of a TTree. + auto treeFragment = std::dynamic_pointer_cast(fragment); + // This is the schema we want to read + auto dataset_schema = options->dataset_schema; + + auto generator = [pool = options->pool, treeFragment, dataset_schema, &totalCompressedSize = mTotCompressedSize, + &totalUncompressedSize = mTotUncompressedSize]() -> arrow::Future> { + auto schema = treeFragment->format()->Inspect(treeFragment->source()); + + std::vector> columns; + std::vector> fields = dataset_schema->fields(); + auto physical_schema = *treeFragment->ReadPhysicalSchema(); + + static TBufferFile buffer{TBuffer::EMode::kWrite, 4 * 1024 * 1024}; + auto containerFS = std::dynamic_pointer_cast(treeFragment->source().filesystem()); + auto fs = std::dynamic_pointer_cast(containerFS->GetSubFilesystem(treeFragment->source())); + + int64_t rows = -1; + TTree* tree = fs->GetTree(treeFragment->source()); + for (auto& field : fields) { + // The field actually on disk + auto physicalField = physical_schema->GetFieldByName(field->name()); + TBranch* branch = tree->GetBranch(physicalField->name().c_str()); + assert(branch); + buffer.Reset(); + auto totalEntries = branch->GetEntries(); + if (rows == -1) { + rows = totalEntries; + } + if (rows != totalEntries) { + throw runtime_error_f("Unmatching number of rows for branch %s", branch->GetName()); + } + arrow::Status status; + int readEntries = 0; + std::shared_ptr array; + auto listType = std::dynamic_pointer_cast(physicalField->type()); + if (physicalField->type() == arrow::boolean() || + (listType && physicalField->type()->field(0)->type() == arrow::boolean())) { + if (listType) { + std::unique_ptr builder = nullptr; + auto status = arrow::MakeBuilder(pool, physicalField->type()->field(0)->type(), &builder); + if (!status.ok()) { + throw runtime_error("Cannot create value builder"); + } + auto listBuilder = std::make_unique(pool, std::move(builder), listType->list_size()); + auto valueBuilder = listBuilder.get()->value_builder(); + // boolean array special case: we need to use builder to create the bitmap + status = valueBuilder->Reserve(totalEntries * listType->list_size()); + status &= listBuilder->Reserve(totalEntries); + if (!status.ok()) { + throw runtime_error("Failed to reserve memory for array builder"); + } + while (readEntries < totalEntries) { + auto readLast = branch->GetBulkRead().GetBulkEntries(readEntries, buffer); + readEntries += readLast; + status &= static_cast(valueBuilder)->AppendValues(reinterpret_cast(buffer.GetCurrent()), readLast * listType->list_size()); + } + status &= static_cast(listBuilder.get())->AppendValues(readEntries); + if (!status.ok()) { + throw runtime_error("Failed to append values to array"); + } + status &= listBuilder->Finish(&array); + if (!status.ok()) { + throw runtime_error("Failed to create array"); + } + } else if (listType == nullptr) { + std::unique_ptr builder = nullptr; + auto status = arrow::MakeBuilder(pool, physicalField->type(), &builder); + if (!status.ok()) { + throw runtime_error("Cannot create builder"); + } + auto valueBuilder = static_cast(builder.get()); + // boolean array special case: we need to use builder to create the bitmap + status = valueBuilder->Reserve(totalEntries); + if (!status.ok()) { + throw runtime_error("Failed to reserve memory for array builder"); + } + while (readEntries < totalEntries) { + auto readLast = branch->GetBulkRead().GetBulkEntries(readEntries, buffer); + readEntries += readLast; + status &= valueBuilder->AppendValues(reinterpret_cast(buffer.GetCurrent()), readLast); + } + if (!status.ok()) { + throw runtime_error("Failed to append values to array"); + } + status &= valueBuilder->Finish(&array); + if (!status.ok()) { + throw runtime_error("Failed to create array"); + } + } + } else { + // other types: use serialized read to build arrays directly. + auto typeSize = physicalField->type()->byte_width(); + // This is needed for branches which have not been persisted. + auto bytes = branch->GetTotBytes(); + auto branchSize = bytes ? bytes : 1000000; + auto&& result = arrow::AllocateResizableBuffer(branchSize, pool); + if (!result.ok()) { + throw runtime_error("Cannot allocate values buffer"); + } + std::shared_ptr arrowValuesBuffer = std::move(result).ValueUnsafe(); + auto ptr = arrowValuesBuffer->mutable_data(); + if (ptr == nullptr) { + throw runtime_error("Invalid buffer"); + } + + std::unique_ptr offsetBuffer = nullptr; + + uint32_t offset = 0; + int count = 0; + std::shared_ptr arrowOffsetBuffer; + std::span offsets; + int size = 0; + uint32_t totalSize = 0; + TBranch* mSizeBranch = nullptr; + int64_t listSize = 1; + if (auto fixedSizeList = std::dynamic_pointer_cast(physicalField->type())) { + listSize = fixedSizeList->list_size(); + typeSize = fixedSizeList->field(0)->type()->byte_width(); + } else if (auto vlaListType = std::dynamic_pointer_cast(physicalField->type())) { + listSize = -1; + typeSize = vlaListType->field(0)->type()->byte_width(); + } + if (listSize == -1) { + mSizeBranch = branch->GetTree()->GetBranch((std::string{branch->GetName()} + "_size").c_str()); + offsetBuffer = std::make_unique(TBuffer::EMode::kWrite, 4 * 1024 * 1024); + result = arrow::AllocateResizableBuffer((totalEntries + 1) * (int64_t)sizeof(int), pool); + if (!result.ok()) { + throw runtime_error("Cannot allocate offset buffer"); + } + arrowOffsetBuffer = std::move(result).ValueUnsafe(); + unsigned char* ptrOffset = arrowOffsetBuffer->mutable_data(); + auto* tPtrOffset = reinterpret_cast(ptrOffset); + offsets = std::span{tPtrOffset, tPtrOffset + totalEntries + 1}; + + // read sizes first + while (readEntries < totalEntries) { + auto readLast = mSizeBranch->GetBulkRead().GetEntriesSerialized(readEntries, *offsetBuffer); + readEntries += readLast; + for (auto i = 0; i < readLast; ++i) { + offsets[count++] = (int)offset; + offset += swap32_(reinterpret_cast(offsetBuffer->GetCurrent())[i]); + } + } + offsets[count] = (int)offset; + totalSize = offset; + readEntries = 0; + } + + while (readEntries < totalEntries) { + auto readLast = branch->GetBulkRead().GetEntriesSerialized(readEntries, buffer); + if (listSize == -1) { + size = offsets[readEntries + readLast] - offsets[readEntries]; + } else { + size = readLast * listSize; + } + readEntries += readLast; + swapCopy(ptr, buffer.GetCurrent(), size, typeSize); + ptr += (ptrdiff_t)(size * typeSize); + } + if (listSize >= 1) { + totalSize = readEntries * listSize; + } + std::shared_ptr varray; + switch (listSize) { + case -1: + varray = std::make_shared(physicalField->type()->field(0)->type(), totalSize, arrowValuesBuffer); + array = std::make_shared(physicalField->type(), readEntries, arrowOffsetBuffer, varray); + break; + case 1: + array = std::make_shared(physicalField->type(), readEntries, arrowValuesBuffer); + break; + default: + varray = std::make_shared(physicalField->type()->field(0)->type(), totalSize, arrowValuesBuffer); + array = std::make_shared(physicalField->type(), readEntries, varray); + } + } + + branch->SetStatus(false); + branch->DropBaskets("all"); + branch->Reset(); + branch->GetTransientBuffer(0)->Expand(0); + + columns.push_back(array); + } + auto batch = arrow::RecordBatch::Make(dataset_schema, rows, columns); + totalCompressedSize += tree->GetZipBytes(); + totalUncompressedSize += tree->GetTotBytes(); + return batch; + }; + return generator; +} + +char const* rootSuffixFromArrow(arrow::Type::type id) +{ + switch (id) { + case arrow::Type::BOOL: + return "/O"; + case arrow::Type::UINT8: + return "/b"; + case arrow::Type::UINT16: + return "/s"; + case arrow::Type::UINT32: + return "/i"; + case arrow::Type::UINT64: + return "/l"; + case arrow::Type::INT8: + return "/B"; + case arrow::Type::INT16: + return "/S"; + case arrow::Type::INT32: + return "/I"; + case arrow::Type::INT64: + return "/L"; + case arrow::Type::FLOAT: + return "/F"; + case arrow::Type::DOUBLE: + return "/D"; + default: + throw runtime_error("Unsupported arrow column type"); + } +} + +arrow::Result> TTreeFileSystem::OpenOutputStream( + const std::string& path, + const std::shared_ptr& metadata) +{ + arrow::dataset::FileSource source{path, shared_from_this()}; + auto prefix = metadata->Get("branch_prefix"); + if (prefix.ok()) { + return std::make_shared(GetTree(source), *prefix); + } + return std::make_shared(GetTree(source), ""); +} + +namespace +{ +struct BranchInfo { + std::string name; + TBranch* ptr; + bool mVLA; +}; +} // namespace + +auto arrowTypeFromROOT(EDataType type, int size) +{ + auto typeGenerator = [](std::shared_ptr const& type, int size) -> std::shared_ptr { + switch (size) { + case -1: + return arrow::list(type); + case 1: + return std::move(type); + default: + return arrow::fixed_size_list(type, size); + } + }; + + switch (type) { + case EDataType::kBool_t: + return typeGenerator(arrow::boolean(), size); + case EDataType::kUChar_t: + return typeGenerator(arrow::uint8(), size); + case EDataType::kUShort_t: + return typeGenerator(arrow::uint16(), size); + case EDataType::kUInt_t: + return typeGenerator(arrow::uint32(), size); + case EDataType::kULong64_t: + return typeGenerator(arrow::uint64(), size); + case EDataType::kChar_t: + return typeGenerator(arrow::int8(), size); + case EDataType::kShort_t: + return typeGenerator(arrow::int16(), size); + case EDataType::kInt_t: + return typeGenerator(arrow::int32(), size); + case EDataType::kLong64_t: + return typeGenerator(arrow::int64(), size); + case EDataType::kFloat_t: + return typeGenerator(arrow::float32(), size); + case EDataType::kDouble_t: + return typeGenerator(arrow::float64(), size); + default: + throw o2::framework::runtime_error_f("Unsupported branch type: %d", static_cast(type)); + } +} + +arrow::Result> TTreeFileFormat::Inspect(const arrow::dataset::FileSource& source) const +{ + arrow::Schema schema{{}}; + auto fs = std::dynamic_pointer_cast(source.filesystem()); + // Actually get the TTree from the ROOT file. + auto treeFs = std::dynamic_pointer_cast(fs->GetSubFilesystem(source)); + if (!treeFs.get()) { + throw runtime_error_f("Unknown filesystem %s\n", source.filesystem()->type_name().c_str()); + } + TTree* tree = treeFs->GetTree(source); + + auto branches = tree->GetListOfBranches(); + auto n = branches->GetEntries(); + + std::vector branchInfos; + for (auto i = 0; i < n; ++i) { + auto branch = static_cast(branches->At(i)); + auto name = std::string{branch->GetName()}; + auto pos = name.find("_size"); + if (pos != std::string::npos) { + name.erase(pos); + branchInfos.emplace_back(BranchInfo{name, (TBranch*)nullptr, true}); + } else { + auto lookup = std::find_if(branchInfos.begin(), branchInfos.end(), [&](BranchInfo const& bi) { + return bi.name == name; + }); + if (lookup == branchInfos.end()) { + branchInfos.emplace_back(BranchInfo{name, branch, false}); + } else { + lookup->ptr = branch; + } + } + } + + std::vector> fields; + tree->SetCacheSize(25000000); + for (auto& bi : branchInfos) { + static TClass* cls; + EDataType type; + bi.ptr->GetExpectedType(cls, type); + auto listSize = -1; + if (!bi.mVLA) { + listSize = static_cast(bi.ptr->GetListOfLeaves()->At(0))->GetLenStatic(); + } + auto field = std::make_shared(bi.ptr->GetName(), arrowTypeFromROOT(type, listSize)); + fields.push_back(field); + + tree->AddBranchToCache(bi.ptr); + if (strncmp(bi.ptr->GetName(), "fIndexArray", strlen("fIndexArray")) == 0) { + std::string sizeBranchName = bi.ptr->GetName(); + sizeBranchName += "_size"; + auto* sizeBranch = (TBranch*)tree->GetBranch(sizeBranchName.c_str()); + if (sizeBranch) { + tree->AddBranchToCache(sizeBranch); + } + } + } + tree->StopCacheLearningPhase(); + + return std::make_shared(fields); +} + +/// \brief Create a FileFragment for a FileSource. +arrow::Result> TTreeFileFormat::MakeFragment( + arrow::dataset::FileSource source, arrow::compute::Expression partition_expression, + std::shared_ptr physical_schema) +{ + std::shared_ptr format = std::make_shared(mTotCompressedSize, mTotUncompressedSize); + + auto fragment = std::make_shared(std::move(source), std::move(format), + std::move(partition_expression), + std::move(physical_schema)); + return std::dynamic_pointer_cast(fragment); +} + +class TTreeFileWriter : public arrow::dataset::FileWriter +{ + std::vector branches; + std::vector sizesBranches; + std::vector> valueArrays; + std::vector> sizeArrays; + std::vector> valueTypes; + + std::vector valuesIdealBasketSize; + std::vector sizeIdealBasketSize; + + std::vector typeSizes; + std::vector listSizes; + bool firstBasket = true; + + // This is to create a batsket size according to the first batch. + void finaliseBasketSize(std::shared_ptr firstBatch) + { + O2_SIGNPOST_ID_FROM_POINTER(sid, root_arrow_fs, this); + O2_SIGNPOST_START(root_arrow_fs, sid, "finaliseBasketSize", "First batch with %lli rows received and %zu columns", + firstBatch->num_rows(), firstBatch->columns().size()); + for (size_t i = 0; i < branches.size(); i++) { + auto* branch = branches[i]; + auto* sizeBranch = sizesBranches[i]; + + int valueSize = valueTypes[i]->byte_width(); + if (listSizes[i] == 1) { + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, sid, "finaliseBasketSize", "Branch %s exists and uses %d bytes per entry for %lli entries.", + branch->GetName(), valueSize, firstBatch->num_rows()); + assert(sizeBranch == nullptr); + branch->SetBasketSize(1024 + firstBatch->num_rows() * valueSize); + } else if (listSizes[i] == -1) { + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, sid, "finaliseBasketSize", "Branch %s exists and uses %d bytes per entry.", + branch->GetName(), valueSize); + // This should probably lookup the + auto column = firstBatch->GetColumnByName(schema_->field(i)->name()); + auto list = std::static_pointer_cast(column); + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, sid, "finaliseBasketSize", "Branch %s needed. Associated size branch %s and there are %lli entries of size %d in that list.", + branch->GetName(), sizeBranch->GetName(), list->length(), valueSize); + branch->SetBasketSize(1024 + firstBatch->num_rows() * valueSize * list->length()); + sizeBranch->SetBasketSize(1024 + firstBatch->num_rows() * 4); + } else { + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, sid, "finaliseBasketSize", "Branch %s needed. There are %lli entries per array of size %d in that list.", + branch->GetName(), listSizes[i], valueSize); + assert(sizeBranch == nullptr); + branch->SetBasketSize(1024 + firstBatch->num_rows() * valueSize * listSizes[i]); + } + + auto field = firstBatch->schema()->field(i); + if (field->name().starts_with("fIndexArray")) { + // One int per array to keep track of the size + int idealBasketSize = 4 * firstBatch->num_rows() + 1024 + field->type()->byte_width() * firstBatch->num_rows(); // minimal additional size needed, otherwise we get 2 baskets + int basketSize = std::max(32000, idealBasketSize); // keep a minimum value + sizeBranch->SetBasketSize(basketSize); + branch->SetBasketSize(basketSize); + } + } + O2_SIGNPOST_END(root_arrow_fs, sid, "finaliseBasketSize", "Done"); + } + + public: + // Create the TTree based on the physical_schema, not the one in the batch. + // The write method will have to reconcile the two schemas. + TTreeFileWriter(std::shared_ptr schema, std::shared_ptr options, + std::shared_ptr destination, + arrow::fs::FileLocator destination_locator) + : FileWriter(schema, options, destination, destination_locator) + { + // Batches have the same number of entries for each column. + auto directoryStream = std::dynamic_pointer_cast(destination_); + auto treeStream = std::dynamic_pointer_cast(destination_); + + if (directoryStream.get()) { + TDirectoryFile* dir = directoryStream->GetDirectory(); + dir->cd(); + auto* tree = new TTree(destination_locator_.path.c_str(), ""); + treeStream = std::make_shared(tree, ""); + } else if (treeStream.get()) { + // We already have a tree stream, let's derive a new one + // with the destination_locator_.path as prefix for the branches + // This way we can multiplex multiple tables in the same tree. + auto tree = treeStream->GetTree(); + treeStream = std::make_shared(tree, destination_locator_.path); + } else { + // I could simply set a prefix here to merge to an already existing tree. + throw std::runtime_error("Unsupported backend."); + } + + for (auto i = 0u; i < schema->fields().size(); ++i) { + auto& field = schema->field(i); + listSizes.push_back(1); + + int valuesIdealBasketSize = 0; + // Construct all the needed branches. + switch (field->type()->id()) { + case arrow::Type::FIXED_SIZE_LIST: { + listSizes.back() = std::static_pointer_cast(field->type())->list_size(); + valuesIdealBasketSize = 1024 + valueTypes.back()->byte_width() * listSizes.back(); + valueTypes.push_back(field->type()->field(0)->type()); + sizesBranches.push_back(nullptr); + std::string leafList = fmt::format("{}[{}]{}", field->name(), listSizes.back(), rootSuffixFromArrow(valueTypes.back()->id())); + branches.push_back(treeStream->CreateBranch(field->name().c_str(), leafList.c_str())); + } break; + case arrow::Type::LIST: { + valueTypes.push_back(field->type()->field(0)->type()); + std::string leafList = fmt::format("{}[{}_size]{}", field->name(), field->name(), rootSuffixFromArrow(valueTypes.back()->id())); + listSizes.back() = -1; // VLA, we need to calculate it on the fly; + std::string sizeLeafList = field->name() + "_size/I"; + sizesBranches.push_back(treeStream->CreateBranch((field->name() + "_size").c_str(), sizeLeafList.c_str())); + branches.push_back(treeStream->CreateBranch(field->name().c_str(), leafList.c_str())); + // Notice that this could be replaced by a better guess of the + // average size of the list elements, but this is not trivial. + } break; + default: { + valueTypes.push_back(field->type()); + std::string leafList = field->name() + rootSuffixFromArrow(valueTypes.back()->id()); + sizesBranches.push_back(nullptr); + branches.push_back(treeStream->CreateBranch(field->name().c_str(), leafList.c_str())); + } break; + } + } + // We create the branches from the schema + } + + arrow::Status Write(const std::shared_ptr& batch) override + { + if (firstBasket) { + firstBasket = false; + finaliseBasketSize(batch); + } + + // Support writing empty tables + if (batch->columns().empty() || batch->num_rows() == 0) { + return arrow::Status::OK(); + } + + // Batches have the same number of entries for each column. + auto directoryStream = std::dynamic_pointer_cast(destination_); + TTree* tree = nullptr; + if (directoryStream.get()) { + TDirectoryFile* dir = directoryStream->GetDirectory(); + tree = (TTree*)dir->Get(destination_locator_.path.c_str()); + } + auto treeStream = std::dynamic_pointer_cast(destination_); + + if (!tree) { + // I could simply set a prefix here to merge to an already existing tree. + throw std::runtime_error("Unsupported backend."); + } + + for (auto i = 0u; i < batch->columns().size(); ++i) { + auto column = batch->column(i); + auto& field = batch->schema()->field(i); + + valueArrays.push_back(nullptr); + + switch (field->type()->id()) { + case arrow::Type::FIXED_SIZE_LIST: { + auto list = std::static_pointer_cast(column); + valueArrays.back() = list->values(); + } break; + case arrow::Type::LIST: { + auto list = std::static_pointer_cast(column); + valueArrays.back() = list; + } break; + case arrow::Type::BOOL: { + // In case of arrays of booleans, we need to go back to their + // char based representation for ROOT to save them. + auto boolArray = std::static_pointer_cast(column); + + int64_t length = boolArray->length(); + arrow::UInt8Builder builder; + auto ok = builder.Reserve(length); + + for (int64_t i = 0; i < length; ++i) { + if (boolArray->IsValid(i)) { + // Expand each boolean value (true/false) to uint8 (1/0) + uint8_t value = boolArray->Value(i) ? 1 : 0; + auto ok = builder.Append(value); + } else { + // Append null for invalid entries + auto ok = builder.AppendNull(); + } + } + valueArrays.back() = *builder.Finish(); + } break; + default: + valueArrays.back() = column; + } + } + + int64_t pos = 0; + while (pos < batch->num_rows()) { + for (size_t bi = 0; bi < branches.size(); ++bi) { + auto* branch = branches[bi]; + auto* sizeBranch = sizesBranches[bi]; + auto array = batch->column(bi); + auto& field = batch->schema()->field(bi); + auto& listSize = listSizes[bi]; + auto valueType = valueTypes[bi]; + auto valueArray = valueArrays[bi]; + + switch (field->type()->id()) { + case arrow::Type::LIST: { + auto list = std::static_pointer_cast(array); + listSize = list->value_length(pos); + uint8_t const* buffer = std::static_pointer_cast(valueArray)->values()->data() + array->offset() + list->value_offset(pos) * valueType->byte_width(); + branch->SetAddress((void*)buffer); + sizeBranch->SetAddress(&listSize); + }; + break; + case arrow::Type::FIXED_SIZE_LIST: + default: { + uint8_t const* buffer = std::static_pointer_cast(valueArray)->values()->data() + array->offset() + pos * listSize * valueType->byte_width(); + branch->SetAddress((void*)buffer); + }; + } + } + tree->Fill(); + ++pos; + } + return arrow::Status::OK(); + } + + arrow::Future<> FinishInternal() override + { + auto treeStream = std::dynamic_pointer_cast(destination_); + TTree* tree = treeStream->GetTree(); + tree->Write("", TObject::kOverwrite); + tree->SetDirectory(nullptr); + + return {}; + }; +}; +arrow::Result> TTreeFileFormat::MakeWriter(std::shared_ptr destination, std::shared_ptr schema, std::shared_ptr options, arrow::fs::FileLocator destination_locator) const +{ + auto writer = std::make_shared(schema, options, destination, destination_locator); + return std::dynamic_pointer_cast(writer); +} + +std::shared_ptr TTreeFileFormat::DefaultWriteOptions() +{ + std::shared_ptr options( + new TTreeFileWriteOptions(shared_from_this())); + return options; +} + +TTreeFileSystem::~TTreeFileSystem() = default; + +DEFINE_DPL_PLUGINS_BEGIN +DEFINE_DPL_PLUGIN_INSTANCE(TTreeObjectReadingImplementation, RootObjectReadingImplementation); +DEFINE_DPL_PLUGINS_END +} // namespace o2::framework diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index 5cdd1241ecfb0..84a4a7f8b8c69 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -159,6 +159,8 @@ o2_add_library(Framework FairMQ::FairMQ ROOT::Tree ROOT::Hist + ROOT::ROOTNTuple + ROOT::ROOTNTupleUtil O2::FrameworkFoundation O2::CommonConstants O2::Headers @@ -299,6 +301,7 @@ add_executable(o2-test-framework-root target_link_libraries(o2-test-framework-root PRIVATE O2::Framework) target_link_libraries(o2-test-framework-root PRIVATE O2::Catch2) target_link_libraries(o2-test-framework-root PRIVATE ROOT::ROOTDataFrame) +target_link_libraries(o2-test-framework-root PRIVATE ROOT::ROOTNTuple) set_property(TARGET o2-test-framework-root PROPERTY RUNTIME_OUTPUT_DIRECTORY ${outdir}) add_test(NAME framework:root COMMAND o2-test-framework-root --skip-benchmarks) add_test(NAME framework:crash COMMAND sh -e -c "PATH=${CMAKE_RUNTIME_OUTPUT_DIRECTORY}:$PATH ${CMAKE_CURRENT_LIST_DIR}/test/test_AllCrashTypes.sh") diff --git a/Framework/Core/include/Framework/Plugins.h b/Framework/Core/include/Framework/Plugins.h index 23d55a512e1fa..925943c6bffc3 100644 --- a/Framework/Core/include/Framework/Plugins.h +++ b/Framework/Core/include/Framework/Plugins.h @@ -36,6 +36,14 @@ enum struct DplPluginKind : int { // set, you might want to load metadata from it and attach it to the // configuration. Capability, + // A RootObjectReadingCapability is used to discover if there is away + // to read and understand an object serialised with ROOT. + RootObjectReadingCapability, + + // A RootObjectReadingImplementation is actually used to read said object + // using the arrow dataset API + RootObjectReadingImplementation, + // A plugin which was not initialised properly. Unknown }; diff --git a/Framework/Core/include/Framework/RootArrowFilesystem.h b/Framework/Core/include/Framework/RootArrowFilesystem.h index 48d817bc9ddf2..8744656e7d55d 100644 --- a/Framework/Core/include/Framework/RootArrowFilesystem.h +++ b/Framework/Core/include/Framework/RootArrowFilesystem.h @@ -11,6 +11,7 @@ #ifndef O2_FRAMEWORK_ROOT_ARROW_FILESYSTEM_H_ #define O2_FRAMEWORK_ROOT_ARROW_FILESYSTEM_H_ +#include #include #include #include @@ -18,23 +19,12 @@ #include class TFile; -class TBranch; -class TTree; class TBufferFile; class TDirectoryFile; namespace o2::framework { -class TTreeFileWriteOptions : public arrow::dataset::FileWriteOptions -{ - public: - TTreeFileWriteOptions(std::shared_ptr format) - : FileWriteOptions(format) - { - } -}; - // This is to avoid having to implement a bunch of unimplemented methods // for all the possible virtual filesystem we can invent on top of ROOT // data structures. @@ -79,46 +69,43 @@ class VirtualRootFileSystemBase : public arrow::fs::FileSystem const std::shared_ptr& metadata) override; }; -// A filesystem which allows me to get a TTree -class TTreeFileSystem : public VirtualRootFileSystemBase -{ - public: - ~TTreeFileSystem() override; - - std::shared_ptr GetSubFilesystem(arrow::dataset::FileSource source) override - { - return std::dynamic_pointer_cast(shared_from_this()); - }; - - arrow::Result> OpenOutputStream( - const std::string& path, - const std::shared_ptr& metadata) override; - - virtual TTree* GetTree(arrow::dataset::FileSource source) = 0; +struct RootArrowFactory final { + std::function()> options = nullptr; + std::function()> format = nullptr; + std::function(void*)> getSubFilesystem = nullptr; }; -class SingleTreeFileSystem : public TTreeFileSystem -{ - public: - SingleTreeFileSystem(TTree* tree) - : TTreeFileSystem(), - mTree(tree) - { - } +struct RootArrowFactoryPlugin { + virtual RootArrowFactory* create() = 0; +}; - std::string type_name() const override - { - return "ttree"; - } +// A registry for all the possible ways of encoding a table in a TFile +struct RootObjectReadingCapability { + // The unique name of this capability + std::string name = "unknown"; + // Given a TFile, return the object which this capability support + // Use a void * in order not to expose the kind of object to the + // generic reading code. This is also where we load the plugin + // which will be used for the actual creation. + std::function getHandle; + // Same as the above, but uses a TBufferFile as storage + std::function getBufferHandle; + // This must be implemented to load the actual RootArrowFactory plugin which + // implements this capability. This way the detection of the file format + // (via get handle) does not need to know about the actual code which performs + // the serialization (and might depend on e.g. RNTuple). + std::function factory; +}; - TTree* GetTree(arrow::dataset::FileSource) override - { - // Simply return the only TTree we have - return mTree; - } +struct RootObjectReadingCapabilityPlugin { + virtual RootObjectReadingCapability* create() = 0; +}; - private: - TTree* mTree; +// This acts as registry of all the capabilities (i.e. the ability to +// associate a given object in a root file to the serialization plugin) and +// the factory (i.e. the serialization plugin) +struct RootObjectReadingFactory { + std::vector capabilities; }; class TFileFileSystem : public VirtualRootFileSystemBase @@ -126,7 +113,7 @@ class TFileFileSystem : public VirtualRootFileSystemBase public: arrow::Result GetFileInfo(const std::string& path) override; - TFileFileSystem(TDirectoryFile* f, size_t readahead); + TFileFileSystem(TDirectoryFile* f, size_t readahead, RootObjectReadingFactory&); std::string type_name() const override { @@ -147,12 +134,13 @@ class TFileFileSystem : public VirtualRootFileSystemBase private: TDirectoryFile* mFile; + RootObjectReadingFactory& mObjectFactory; }; class TBufferFileFS : public VirtualRootFileSystemBase { public: - TBufferFileFS(TBufferFile* f); + TBufferFileFS(TBufferFile* f, RootObjectReadingFactory&); arrow::Result GetFileInfo(const std::string& path) override; std::string type_name() const override @@ -165,68 +153,7 @@ class TBufferFileFS : public VirtualRootFileSystemBase private: TBufferFile* mBuffer; std::shared_ptr mFilesystem; -}; - -class TTreeFileFragment : public arrow::dataset::FileFragment -{ - public: - TTreeFileFragment(arrow::dataset::FileSource source, - std::shared_ptr format, - arrow::compute::Expression partition_expression, - std::shared_ptr physical_schema) - : FileFragment(std::move(source), std::move(format), std::move(partition_expression), std::move(physical_schema)) - { - } -}; - -class TTreeFileFormat : public arrow::dataset::FileFormat -{ - size_t& mTotCompressedSize; - size_t& mTotUncompressedSize; - - public: - TTreeFileFormat(size_t& totalCompressedSize, size_t& totalUncompressedSize) - : FileFormat({}), - mTotCompressedSize(totalCompressedSize), - mTotUncompressedSize(totalUncompressedSize) - { - } - - ~TTreeFileFormat() override = default; - - std::string type_name() const override - { - return "ttree"; - } - - bool Equals(const FileFormat& other) const override - { - return other.type_name() == this->type_name(); - } - - arrow::Result IsSupported(const arrow::dataset::FileSource& source) const override - { - auto fs = std::dynamic_pointer_cast(source.filesystem()); - auto subFs = fs->GetSubFilesystem(source); - if (std::dynamic_pointer_cast(subFs)) { - return true; - } - return false; - } - - arrow::Result> Inspect(const arrow::dataset::FileSource& source) const override; - /// \brief Create a FileFragment for a FileSource. - arrow::Result> MakeFragment( - arrow::dataset::FileSource source, arrow::compute::Expression partition_expression, - std::shared_ptr physical_schema) override; - - arrow::Result> MakeWriter(std::shared_ptr destination, std::shared_ptr schema, std::shared_ptr options, arrow::fs::FileLocator destination_locator) const override; - - std::shared_ptr DefaultWriteOptions() override; - - arrow::Result ScanBatchesAsync( - const std::shared_ptr& options, - const std::shared_ptr& fragment) const override; + RootObjectReadingFactory& mObjectFactory; }; // An arrow outputstream which allows to write to a TDirectoryFile. @@ -255,33 +182,6 @@ class TDirectoryFileOutputStream : public arrow::io::OutputStream TDirectoryFile* mDirectory; }; -// An arrow outputstream which allows to write to a TTree. Eventually -// with a prefix for the branches. -class TTreeOutputStream : public arrow::io::OutputStream -{ - public: - TTreeOutputStream(TTree*, std::string branchPrefix); - - arrow::Status Close() override; - - arrow::Result Tell() const override; - - arrow::Status Write(const void* data, int64_t nbytes) override; - - bool closed() const override; - - TBranch* CreateBranch(char const* branchName, char const* sizeBranch); - - TTree* GetTree() - { - return mTree; - } - - private: - TTree* mTree; - std::string mBranchPrefix; -}; - } // namespace o2::framework #endif // O2_FRAMEWORK_ROOT_ARROW_FILESYSTEM_H_ diff --git a/Framework/Core/src/Plugin.cxx b/Framework/Core/src/Plugin.cxx index 0d225b81c0581..46248ab467366 100644 --- a/Framework/Core/src/Plugin.cxx +++ b/Framework/Core/src/Plugin.cxx @@ -11,10 +11,15 @@ #include "Framework/Plugins.h" #include "Framework/ConfigParamDiscovery.h" #include "Framework/ConfigParamRegistry.h" +#include "Framework/RootArrowFilesystem.h" #include "Framework/Logger.h" #include "Framework/Capability.h" #include "Framework/Signpost.h" #include "Framework/VariantJSONHelpers.h" +#include "Framework/PluginManager.h" +#include +#include +#include #include #include @@ -168,11 +173,75 @@ struct DiscoverAODOptionsInCommandLine : o2::framework::ConfigDiscoveryPlugin { } }; +struct ImplementationContext { + std::vector implementations; +}; + +std::function getHandleByClass(char const* classname) +{ + return [classname](TDirectoryFile* file, std::string const& path) { return file->GetObjectChecked(path.c_str(), TClass::GetClass(classname)); }; +} + +std::function getBufferHandleByClass(char const* classname) +{ + return [classname](TBufferFile* buffer, std::string const& path) { buffer->Reset(); return buffer->ReadObjectAny(TClass::GetClass(classname)); }; +} + +void lazyLoadFactory(std::vector& implementations, char const* specs) +{ + // Lazy loading of the plugin so that we do not bring in RNTuple / TTree if not needed + if (implementations.empty()) { + std::vector plugins; + auto morePlugins = PluginManager::parsePluginSpecString(specs); + for (auto& extra : morePlugins) { + plugins.push_back(extra); + } + PluginManager::loadFromPlugin(plugins, implementations); + if (implementations.empty()) { + return; + } + } +} + +struct RNTupleObjectReadingCapability : o2::framework::RootObjectReadingCapabilityPlugin { + RootObjectReadingCapability* create() override + { + auto context = new ImplementationContext; + + return new RootObjectReadingCapability{ + .name = "rntuple", + .getHandle = getHandleByClass("ROOT::Experimental::RNTuple"), + .getBufferHandle = getBufferHandleByClass("ROOT::Experimental::RNTuple"), + .factory = [context]() -> RootArrowFactory& { + lazyLoadFactory(context->implementations, "O2FrameworkAnalysisRNTupleSupport:RNTupleObjectReadingImplementation"); + return context->implementations.back(); + }}; + } +}; + +struct TTreeObjectReadingCapability : o2::framework::RootObjectReadingCapabilityPlugin { + RootObjectReadingCapability* create() override + { + auto context = new ImplementationContext; + + return new RootObjectReadingCapability{ + .name = "ttree", + .getHandle = getHandleByClass("TTree"), + .getBufferHandle = getBufferHandleByClass("TTree"), + .factory = [context]() -> RootArrowFactory& { + lazyLoadFactory(context->implementations, "O2FrameworkAnalysisTTreeSupport:TTreeObjectReadingImplementation"); + return context->implementations.back(); + }}; + } +}; + DEFINE_DPL_PLUGINS_BEGIN DEFINE_DPL_PLUGIN_INSTANCE(DiscoverMetadataInAODCapability, Capability); DEFINE_DPL_PLUGIN_INSTANCE(DiscoverMetadataInCommandLineCapability, Capability); DEFINE_DPL_PLUGIN_INSTANCE(DiscoverAODOptionsInCommandLineCapability, Capability); DEFINE_DPL_PLUGIN_INSTANCE(DiscoverMetadataInCommandLine, ConfigDiscovery); DEFINE_DPL_PLUGIN_INSTANCE(DiscoverAODOptionsInCommandLine, ConfigDiscovery); +DEFINE_DPL_PLUGIN_INSTANCE(RNTupleObjectReadingCapability, RootObjectReadingCapability); +DEFINE_DPL_PLUGIN_INSTANCE(TTreeObjectReadingCapability, RootObjectReadingCapability); DEFINE_DPL_PLUGINS_END } // namespace o2::framework diff --git a/Framework/Core/src/RootArrowFilesystem.cxx b/Framework/Core/src/RootArrowFilesystem.cxx index 5f2d21d942d37..545ba6f0afb71 100644 --- a/Framework/Core/src/RootArrowFilesystem.cxx +++ b/Framework/Core/src/RootArrowFilesystem.cxx @@ -9,9 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. #include "Framework/RootArrowFilesystem.h" -#include "Framework/Endian.h" #include "Framework/RuntimeError.h" -#include "Framework/Signpost.h" #include #include #include @@ -19,93 +17,48 @@ #include #include #include -#include #include -#include #include #include #include #include #include #include -#include #include -#include -#include +template class + std::shared_ptr; -O2_DECLARE_DYNAMIC_LOG(root_arrow_fs); - -namespace -{ -struct BranchInfo { - std::string name; - TBranch* ptr; - bool mVLA; -}; -} // namespace - -auto arrowTypeFromROOT(EDataType type, int size) -{ - auto typeGenerator = [](std::shared_ptr const& type, int size) -> std::shared_ptr { - switch (size) { - case -1: - return arrow::list(type); - case 1: - return std::move(type); - default: - return arrow::fixed_size_list(type, size); - } - }; - - switch (type) { - case EDataType::kBool_t: - return typeGenerator(arrow::boolean(), size); - case EDataType::kUChar_t: - return typeGenerator(arrow::uint8(), size); - case EDataType::kUShort_t: - return typeGenerator(arrow::uint16(), size); - case EDataType::kUInt_t: - return typeGenerator(arrow::uint32(), size); - case EDataType::kULong64_t: - return typeGenerator(arrow::uint64(), size); - case EDataType::kChar_t: - return typeGenerator(arrow::int8(), size); - case EDataType::kShort_t: - return typeGenerator(arrow::int16(), size); - case EDataType::kInt_t: - return typeGenerator(arrow::int32(), size); - case EDataType::kLong64_t: - return typeGenerator(arrow::int64(), size); - case EDataType::kFloat_t: - return typeGenerator(arrow::float32(), size); - case EDataType::kDouble_t: - return typeGenerator(arrow::float64(), size); - default: - throw o2::framework::runtime_error_f("Unsupported branch type: %d", static_cast(type)); - } -} namespace o2::framework { using arrow::Status; -TFileFileSystem::TFileFileSystem(TDirectoryFile* f, size_t readahead) +TFileFileSystem::TFileFileSystem(TDirectoryFile* f, size_t readahead, RootObjectReadingFactory& factory) : VirtualRootFileSystemBase(), - mFile(f) + mFile(f), + mObjectFactory(factory) { ((TFile*)mFile)->SetReadaheadSize(50 * 1024 * 1024); } std::shared_ptr TFileFileSystem::GetSubFilesystem(arrow::dataset::FileSource source) { - auto tree = (TTree*)mFile->GetObjectChecked(source.path().c_str(), TClass::GetClass()); - if (tree) { - return std::shared_ptr(new SingleTreeFileSystem(tree)); + // We use a plugin to create the actual objects inside the + // file, so that we can support TTree and RNTuple at the same time + // without having to depend on both. + for (auto& capability : mObjectFactory.capabilities) { + void* handle = capability.getHandle(mFile, source.path()); + if (!handle) { + continue; + } + if (handle) { + return capability.factory().getSubFilesystem(handle); + } } auto directory = (TDirectoryFile*)mFile->GetObjectChecked(source.path().c_str(), TClass::GetClass()); if (directory) { - return std::shared_ptr(new TFileFileSystem(directory, 50 * 1024 * 1024)); + return std::shared_ptr(new TFileFileSystem(directory, 50 * 1024 * 1024, mObjectFactory)); } throw runtime_error_f("Unsupported file layout"); } @@ -120,10 +73,14 @@ arrow::Result TFileFileSystem::GetFileInfo(const std::strin auto fs = GetSubFilesystem(source); // For now we only support single trees. - if (std::dynamic_pointer_cast(fs)) { - result.set_type(arrow::fs::FileType::File); + if (std::dynamic_pointer_cast(fs)) { + result.set_type(arrow::fs::FileType::Directory); return result; } + // Everything else is a file, if it was created. + if (fs.get()) { + result.set_type(arrow::fs::FileType::File); + } return result; } @@ -137,7 +94,7 @@ arrow::Result> TFileFileSystem::OpenOut auto* dir = dynamic_cast(this->GetFile()->Get(path.c_str())); if (!dir) { - throw runtime_error_f("Unable to open directory %s in file %s", path.c_str(), GetFile()->GetName()); + return arrow::Status::Invalid(fmt::format("Unable to open directory {} in file {} ", path.c_str(), GetFile()->GetName())); } auto stream = std::make_shared(dir); return stream; @@ -219,81 +176,6 @@ arrow::Result> VirtualRootFileSystemBas return arrow::Status::NotImplemented("No random access file system"); } -arrow::Result> TTreeFileFormat::Inspect(const arrow::dataset::FileSource& source) const -{ - arrow::Schema schema{{}}; - auto fs = std::dynamic_pointer_cast(source.filesystem()); - // Actually get the TTree from the ROOT file. - auto treeFs = std::dynamic_pointer_cast(fs->GetSubFilesystem(source)); - if (!treeFs.get()) { - throw runtime_error_f("Unknown filesystem %s\n", source.filesystem()->type_name().c_str()); - } - TTree* tree = treeFs->GetTree(source); - - auto branches = tree->GetListOfBranches(); - auto n = branches->GetEntries(); - - std::vector branchInfos; - for (auto i = 0; i < n; ++i) { - auto branch = static_cast(branches->At(i)); - auto name = std::string{branch->GetName()}; - auto pos = name.find("_size"); - if (pos != std::string::npos) { - name.erase(pos); - branchInfos.emplace_back(BranchInfo{name, (TBranch*)nullptr, true}); - } else { - auto lookup = std::find_if(branchInfos.begin(), branchInfos.end(), [&](BranchInfo const& bi) { - return bi.name == name; - }); - if (lookup == branchInfos.end()) { - branchInfos.emplace_back(BranchInfo{name, branch, false}); - } else { - lookup->ptr = branch; - } - } - } - - std::vector> fields; - tree->SetCacheSize(25000000); - for (auto& bi : branchInfos) { - static TClass* cls; - EDataType type; - bi.ptr->GetExpectedType(cls, type); - auto listSize = -1; - if (!bi.mVLA) { - listSize = static_cast(bi.ptr->GetListOfLeaves()->At(0))->GetLenStatic(); - } - auto field = std::make_shared(bi.ptr->GetName(), arrowTypeFromROOT(type, listSize)); - fields.push_back(field); - - tree->AddBranchToCache(bi.ptr); - if (strncmp(bi.ptr->GetName(), "fIndexArray", strlen("fIndexArray")) == 0) { - std::string sizeBranchName = bi.ptr->GetName(); - sizeBranchName += "_size"; - auto* sizeBranch = (TBranch*)tree->GetBranch(sizeBranchName.c_str()); - if (sizeBranch) { - tree->AddBranchToCache(sizeBranch); - } - } - } - tree->StopCacheLearningPhase(); - - return std::make_shared(fields); -} - -/// \brief Create a FileFragment for a FileSource. -arrow::Result> TTreeFileFormat::MakeFragment( - arrow::dataset::FileSource source, arrow::compute::Expression partition_expression, - std::shared_ptr physical_schema) -{ - std::shared_ptr format = std::make_shared(mTotCompressedSize, mTotUncompressedSize); - - auto fragment = std::make_shared(std::move(source), std::move(format), - std::move(partition_expression), - std::move(physical_schema)); - return std::dynamic_pointer_cast(fragment); -} - // An arrow outputstream which allows to write to a ttree TDirectoryFileOutputStream::TDirectoryFileOutputStream(TDirectoryFile* f) : mDirectory(f) @@ -321,544 +203,14 @@ bool TDirectoryFileOutputStream::closed() const return mDirectory->GetFile()->IsOpen() == false; } -// An arrow outputstream which allows to write to a ttree -// @a branch prefix is to be used to identify a set of branches which all belong to -// the same table. -TTreeOutputStream::TTreeOutputStream(TTree* f, std::string branchPrefix) - : mTree(f), - mBranchPrefix(std::move(branchPrefix)) -{ -} - -arrow::Status TTreeOutputStream::Close() -{ - if (mTree->GetCurrentFile() == nullptr) { - return arrow::Status::Invalid("Cannot close a tree not attached to a file"); - } - mTree->GetCurrentFile()->Close(); - return arrow::Status::OK(); -} - -arrow::Result TTreeOutputStream::Tell() const -{ - return arrow::Result(arrow::Status::NotImplemented("Cannot move")); -} - -arrow::Status TTreeOutputStream::Write(const void* data, int64_t nbytes) -{ - return arrow::Status::NotImplemented("Cannot write raw bytes to a TTree"); -} - -bool TTreeOutputStream::closed() const -{ - // A standalone tree is never closed. - if (mTree->GetCurrentFile() == nullptr) { - return false; - } - return mTree->GetCurrentFile()->IsOpen() == false; -} - -TBranch* TTreeOutputStream::CreateBranch(char const* branchName, char const* sizeBranch) -{ - return mTree->Branch((mBranchPrefix + "/" + branchName).c_str(), (char*)nullptr, (mBranchPrefix + sizeBranch).c_str()); -} - -char const* rootSuffixFromArrow(arrow::Type::type id) -{ - switch (id) { - case arrow::Type::BOOL: - return "/O"; - case arrow::Type::UINT8: - return "/b"; - case arrow::Type::UINT16: - return "/s"; - case arrow::Type::UINT32: - return "/i"; - case arrow::Type::UINT64: - return "/l"; - case arrow::Type::INT8: - return "/B"; - case arrow::Type::INT16: - return "/S"; - case arrow::Type::INT32: - return "/I"; - case arrow::Type::INT64: - return "/L"; - case arrow::Type::FLOAT: - return "/F"; - case arrow::Type::DOUBLE: - return "/D"; - default: - throw runtime_error("Unsupported arrow column type"); - } -} - -class TTreeFileWriter : public arrow::dataset::FileWriter -{ - std::vector branches; - std::vector sizesBranches; - std::vector> valueArrays; - std::vector> sizeArrays; - std::vector> valueTypes; - - std::vector valuesIdealBasketSize; - std::vector sizeIdealBasketSize; - - std::vector typeSizes; - std::vector listSizes; - bool firstBasket = true; - - // This is to create a batsket size according to the first batch. - void finaliseBasketSize(std::shared_ptr firstBatch) - { - O2_SIGNPOST_ID_FROM_POINTER(sid, root_arrow_fs, this); - O2_SIGNPOST_START(root_arrow_fs, sid, "finaliseBasketSize", "First batch with %lli rows received and %zu columns", - firstBatch->num_rows(), firstBatch->columns().size()); - for (size_t i = 0; i < branches.size(); i++) { - auto* branch = branches[i]; - auto* sizeBranch = sizesBranches[i]; - - int valueSize = valueTypes[i]->byte_width(); - if (listSizes[i] == 1) { - O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, sid, "finaliseBasketSize", "Branch %s exists and uses %d bytes per entry for %lli entries.", - branch->GetName(), valueSize, firstBatch->num_rows()); - assert(sizeBranch == nullptr); - branch->SetBasketSize(1024 + firstBatch->num_rows() * valueSize); - } else if (listSizes[i] == -1) { - O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, sid, "finaliseBasketSize", "Branch %s exists and uses %d bytes per entry.", - branch->GetName(), valueSize); - // This should probably lookup the - auto column = firstBatch->GetColumnByName(schema_->field(i)->name()); - auto list = std::static_pointer_cast(column); - O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, sid, "finaliseBasketSize", "Branch %s needed. Associated size branch %s and there are %lli entries of size %d in that list.", - branch->GetName(), sizeBranch->GetName(), list->length(), valueSize); - branch->SetBasketSize(1024 + firstBatch->num_rows() * valueSize * list->length()); - sizeBranch->SetBasketSize(1024 + firstBatch->num_rows() * 4); - } else { - O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, sid, "finaliseBasketSize", "Branch %s needed. There are %lli entries per array of size %d in that list.", - branch->GetName(), listSizes[i], valueSize); - assert(sizeBranch == nullptr); - branch->SetBasketSize(1024 + firstBatch->num_rows() * valueSize * listSizes[i]); - } - - auto field = firstBatch->schema()->field(i); - if (field->name().starts_with("fIndexArray")) { - // One int per array to keep track of the size - int idealBasketSize = 4 * firstBatch->num_rows() + 1024 + field->type()->byte_width() * firstBatch->num_rows(); // minimal additional size needed, otherwise we get 2 baskets - int basketSize = std::max(32000, idealBasketSize); // keep a minimum value - sizeBranch->SetBasketSize(basketSize); - branch->SetBasketSize(basketSize); - } - } - O2_SIGNPOST_END(root_arrow_fs, sid, "finaliseBasketSize", "Done"); - } - - public: - // Create the TTree based on the physical_schema, not the one in the batch. - // The write method will have to reconcile the two schemas. - TTreeFileWriter(std::shared_ptr schema, std::shared_ptr options, - std::shared_ptr destination, - arrow::fs::FileLocator destination_locator) - : FileWriter(schema, options, destination, destination_locator) - { - // Batches have the same number of entries for each column. - auto directoryStream = std::dynamic_pointer_cast(destination_); - auto treeStream = std::dynamic_pointer_cast(destination_); - - if (directoryStream.get()) { - TDirectoryFile* dir = directoryStream->GetDirectory(); - dir->cd(); - auto* tree = new TTree(destination_locator_.path.c_str(), ""); - treeStream = std::make_shared(tree, ""); - } else if (treeStream.get()) { - // We already have a tree stream, let's derive a new one - // with the destination_locator_.path as prefix for the branches - // This way we can multiplex multiple tables in the same tree. - auto tree = treeStream->GetTree(); - treeStream = std::make_shared(tree, destination_locator_.path); - } else { - // I could simply set a prefix here to merge to an already existing tree. - throw std::runtime_error("Unsupported backend."); - } - - for (auto i = 0u; i < schema->fields().size(); ++i) { - auto& field = schema->field(i); - listSizes.push_back(1); - - int valuesIdealBasketSize = 0; - // Construct all the needed branches. - switch (field->type()->id()) { - case arrow::Type::FIXED_SIZE_LIST: { - listSizes.back() = std::static_pointer_cast(field->type())->list_size(); - valuesIdealBasketSize = 1024 + valueTypes.back()->byte_width() * listSizes.back(); - valueTypes.push_back(field->type()->field(0)->type()); - sizesBranches.push_back(nullptr); - std::string leafList = fmt::format("{}[{}]{}", field->name(), listSizes.back(), rootSuffixFromArrow(valueTypes.back()->id())); - branches.push_back(treeStream->CreateBranch(field->name().c_str(), leafList.c_str())); - } break; - case arrow::Type::LIST: { - valueTypes.push_back(field->type()->field(0)->type()); - std::string leafList = fmt::format("{}[{}_size]{}", field->name(), field->name(), rootSuffixFromArrow(valueTypes.back()->id())); - listSizes.back() = -1; // VLA, we need to calculate it on the fly; - std::string sizeLeafList = field->name() + "_size/I"; - sizesBranches.push_back(treeStream->CreateBranch((field->name() + "_size").c_str(), sizeLeafList.c_str())); - branches.push_back(treeStream->CreateBranch(field->name().c_str(), leafList.c_str())); - // Notice that this could be replaced by a better guess of the - // average size of the list elements, but this is not trivial. - } break; - default: { - valueTypes.push_back(field->type()); - std::string leafList = field->name() + rootSuffixFromArrow(valueTypes.back()->id()); - sizesBranches.push_back(nullptr); - branches.push_back(treeStream->CreateBranch(field->name().c_str(), leafList.c_str())); - } break; - } - } - // We create the branches from the schema - } - - arrow::Status Write(const std::shared_ptr& batch) override - { - if (firstBasket) { - firstBasket = false; - finaliseBasketSize(batch); - } - - // Support writing empty tables - if (batch->columns().empty() || batch->num_rows() == 0) { - return arrow::Status::OK(); - } - - // Batches have the same number of entries for each column. - auto directoryStream = std::dynamic_pointer_cast(destination_); - TTree* tree = nullptr; - if (directoryStream.get()) { - TDirectoryFile* dir = directoryStream->GetDirectory(); - tree = (TTree*)dir->Get(destination_locator_.path.c_str()); - } - auto treeStream = std::dynamic_pointer_cast(destination_); - - if (!tree) { - // I could simply set a prefix here to merge to an already existing tree. - throw std::runtime_error("Unsupported backend."); - } - - for (auto i = 0u; i < batch->columns().size(); ++i) { - auto column = batch->column(i); - auto& field = batch->schema()->field(i); - - valueArrays.push_back(nullptr); - - switch (field->type()->id()) { - case arrow::Type::FIXED_SIZE_LIST: { - auto list = std::static_pointer_cast(column); - valueArrays.back() = list->values(); - } break; - case arrow::Type::LIST: { - auto list = std::static_pointer_cast(column); - valueArrays.back() = list; - } break; - case arrow::Type::BOOL: { - // In case of arrays of booleans, we need to go back to their - // char based representation for ROOT to save them. - auto boolArray = std::static_pointer_cast(column); - - int64_t length = boolArray->length(); - arrow::UInt8Builder builder; - auto ok = builder.Reserve(length); - - for (int64_t i = 0; i < length; ++i) { - if (boolArray->IsValid(i)) { - // Expand each boolean value (true/false) to uint8 (1/0) - uint8_t value = boolArray->Value(i) ? 1 : 0; - auto ok = builder.Append(value); - } else { - // Append null for invalid entries - auto ok = builder.AppendNull(); - } - } - valueArrays.back() = *builder.Finish(); - } break; - default: - valueArrays.back() = column; - } - } - - int64_t pos = 0; - while (pos < batch->num_rows()) { - for (size_t bi = 0; bi < branches.size(); ++bi) { - auto* branch = branches[bi]; - auto* sizeBranch = sizesBranches[bi]; - auto array = batch->column(bi); - auto& field = batch->schema()->field(bi); - auto& listSize = listSizes[bi]; - auto valueType = valueTypes[bi]; - auto valueArray = valueArrays[bi]; - - switch (field->type()->id()) { - case arrow::Type::LIST: { - auto list = std::static_pointer_cast(array); - listSize = list->value_length(pos); - uint8_t const* buffer = std::static_pointer_cast(valueArray)->values()->data() + array->offset() + list->value_offset(pos) * valueType->byte_width(); - branch->SetAddress((void*)buffer); - sizeBranch->SetAddress(&listSize); - }; - break; - case arrow::Type::FIXED_SIZE_LIST: - default: { - uint8_t const* buffer = std::static_pointer_cast(valueArray)->values()->data() + array->offset() + pos * listSize * valueType->byte_width(); - branch->SetAddress((void*)buffer); - }; - } - } - tree->Fill(); - ++pos; - } - return arrow::Status::OK(); - } - - arrow::Future<> FinishInternal() override - { - auto treeStream = std::dynamic_pointer_cast(destination_); - TTree* tree = treeStream->GetTree(); - tree->Write("", TObject::kOverwrite); - tree->SetDirectory(nullptr); - - return {}; - }; -}; - -arrow::Result> TTreeFileFormat::MakeWriter(std::shared_ptr destination, std::shared_ptr schema, std::shared_ptr options, arrow::fs::FileLocator destination_locator) const -{ - auto writer = std::make_shared(schema, options, destination, destination_locator); - return std::dynamic_pointer_cast(writer); -} - -std::shared_ptr TTreeFileFormat::DefaultWriteOptions() -{ - std::shared_ptr options( - new TTreeFileWriteOptions(shared_from_this())); - return options; -} - -arrow::Result TTreeFileFormat::ScanBatchesAsync( - const std::shared_ptr& options, - const std::shared_ptr& fragment) const -{ - // Get the fragment as a TTreeFragment. This might be PART of a TTree. - auto treeFragment = std::dynamic_pointer_cast(fragment); - // This is the schema we want to read - auto dataset_schema = options->dataset_schema; - - auto generator = [pool = options->pool, treeFragment, dataset_schema, &totalCompressedSize = mTotCompressedSize, - &totalUncompressedSize = mTotUncompressedSize]() -> arrow::Future> { - auto schema = treeFragment->format()->Inspect(treeFragment->source()); - - std::vector> columns; - std::vector> fields = dataset_schema->fields(); - auto physical_schema = *treeFragment->ReadPhysicalSchema(); - - static TBufferFile buffer{TBuffer::EMode::kWrite, 4 * 1024 * 1024}; - auto containerFS = std::dynamic_pointer_cast(treeFragment->source().filesystem()); - auto fs = std::dynamic_pointer_cast(containerFS->GetSubFilesystem(treeFragment->source())); - - int64_t rows = -1; - TTree* tree = fs->GetTree(treeFragment->source()); - for (auto& field : fields) { - // The field actually on disk - auto physicalField = physical_schema->GetFieldByName(field->name()); - TBranch* branch = tree->GetBranch(physicalField->name().c_str()); - assert(branch); - buffer.Reset(); - auto totalEntries = branch->GetEntries(); - if (rows == -1) { - rows = totalEntries; - } - if (rows != totalEntries) { - throw runtime_error_f("Unmatching number of rows for branch %s", branch->GetName()); - } - arrow::Status status; - int readEntries = 0; - std::shared_ptr array; - auto listType = std::dynamic_pointer_cast(physicalField->type()); - if (physicalField->type() == arrow::boolean() || - (listType && physicalField->type()->field(0)->type() == arrow::boolean())) { - if (listType) { - std::unique_ptr builder = nullptr; - auto status = arrow::MakeBuilder(pool, physicalField->type()->field(0)->type(), &builder); - if (!status.ok()) { - throw runtime_error("Cannot create value builder"); - } - auto listBuilder = std::make_unique(pool, std::move(builder), listType->list_size()); - auto valueBuilder = listBuilder.get()->value_builder(); - // boolean array special case: we need to use builder to create the bitmap - status = valueBuilder->Reserve(totalEntries * listType->list_size()); - status &= listBuilder->Reserve(totalEntries); - if (!status.ok()) { - throw runtime_error("Failed to reserve memory for array builder"); - } - while (readEntries < totalEntries) { - auto readLast = branch->GetBulkRead().GetBulkEntries(readEntries, buffer); - readEntries += readLast; - status &= static_cast(valueBuilder)->AppendValues(reinterpret_cast(buffer.GetCurrent()), readLast * listType->list_size()); - } - status &= static_cast(listBuilder.get())->AppendValues(readEntries); - if (!status.ok()) { - throw runtime_error("Failed to append values to array"); - } - status &= listBuilder->Finish(&array); - if (!status.ok()) { - throw runtime_error("Failed to create array"); - } - } else if (listType == nullptr) { - std::unique_ptr builder = nullptr; - auto status = arrow::MakeBuilder(pool, physicalField->type(), &builder); - if (!status.ok()) { - throw runtime_error("Cannot create builder"); - } - auto valueBuilder = static_cast(builder.get()); - // boolean array special case: we need to use builder to create the bitmap - status = valueBuilder->Reserve(totalEntries); - if (!status.ok()) { - throw runtime_error("Failed to reserve memory for array builder"); - } - while (readEntries < totalEntries) { - auto readLast = branch->GetBulkRead().GetBulkEntries(readEntries, buffer); - readEntries += readLast; - status &= valueBuilder->AppendValues(reinterpret_cast(buffer.GetCurrent()), readLast); - } - if (!status.ok()) { - throw runtime_error("Failed to append values to array"); - } - status &= valueBuilder->Finish(&array); - if (!status.ok()) { - throw runtime_error("Failed to create array"); - } - } - } else { - // other types: use serialized read to build arrays directly. - auto typeSize = physicalField->type()->byte_width(); - // This is needed for branches which have not been persisted. - auto bytes = branch->GetTotBytes(); - auto branchSize = bytes ? bytes : 1000000; - auto&& result = arrow::AllocateResizableBuffer(branchSize, pool); - if (!result.ok()) { - throw runtime_error("Cannot allocate values buffer"); - } - std::shared_ptr arrowValuesBuffer = std::move(result).ValueUnsafe(); - auto ptr = arrowValuesBuffer->mutable_data(); - if (ptr == nullptr) { - throw runtime_error("Invalid buffer"); - } - - std::unique_ptr offsetBuffer = nullptr; - - uint32_t offset = 0; - int count = 0; - std::shared_ptr arrowOffsetBuffer; - std::span offsets; - int size = 0; - uint32_t totalSize = 0; - TBranch* mSizeBranch = nullptr; - int64_t listSize = 1; - if (auto fixedSizeList = std::dynamic_pointer_cast(physicalField->type())) { - listSize = fixedSizeList->list_size(); - typeSize = fixedSizeList->field(0)->type()->byte_width(); - } else if (auto vlaListType = std::dynamic_pointer_cast(physicalField->type())) { - listSize = -1; - typeSize = vlaListType->field(0)->type()->byte_width(); - } - if (listSize == -1) { - mSizeBranch = branch->GetTree()->GetBranch((std::string{branch->GetName()} + "_size").c_str()); - offsetBuffer = std::make_unique(TBuffer::EMode::kWrite, 4 * 1024 * 1024); - result = arrow::AllocateResizableBuffer((totalEntries + 1) * (int64_t)sizeof(int), pool); - if (!result.ok()) { - throw runtime_error("Cannot allocate offset buffer"); - } - arrowOffsetBuffer = std::move(result).ValueUnsafe(); - unsigned char* ptrOffset = arrowOffsetBuffer->mutable_data(); - auto* tPtrOffset = reinterpret_cast(ptrOffset); - offsets = std::span{tPtrOffset, tPtrOffset + totalEntries + 1}; - - // read sizes first - while (readEntries < totalEntries) { - auto readLast = mSizeBranch->GetBulkRead().GetEntriesSerialized(readEntries, *offsetBuffer); - readEntries += readLast; - for (auto i = 0; i < readLast; ++i) { - offsets[count++] = (int)offset; - offset += swap32_(reinterpret_cast(offsetBuffer->GetCurrent())[i]); - } - } - offsets[count] = (int)offset; - totalSize = offset; - readEntries = 0; - } - - while (readEntries < totalEntries) { - auto readLast = branch->GetBulkRead().GetEntriesSerialized(readEntries, buffer); - if (listSize == -1) { - size = offsets[readEntries + readLast] - offsets[readEntries]; - } else { - size = readLast * listSize; - } - readEntries += readLast; - swapCopy(ptr, buffer.GetCurrent(), size, typeSize); - ptr += (ptrdiff_t)(size * typeSize); - } - if (listSize >= 1) { - totalSize = readEntries * listSize; - } - std::shared_ptr varray; - switch (listSize) { - case -1: - varray = std::make_shared(physicalField->type()->field(0)->type(), totalSize, arrowValuesBuffer); - array = std::make_shared(physicalField->type(), readEntries, arrowOffsetBuffer, varray); - break; - case 1: - array = std::make_shared(physicalField->type(), readEntries, arrowValuesBuffer); - break; - default: - varray = std::make_shared(physicalField->type()->field(0)->type(), totalSize, arrowValuesBuffer); - array = std::make_shared(physicalField->type(), readEntries, varray); - } - } - - branch->SetStatus(false); - branch->DropBaskets("all"); - branch->Reset(); - branch->GetTransientBuffer(0)->Expand(0); - - columns.push_back(array); - } - auto batch = arrow::RecordBatch::Make(dataset_schema, rows, columns); - totalCompressedSize += tree->GetZipBytes(); - totalUncompressedSize += tree->GetTotBytes(); - return batch; - }; - return generator; -} - -arrow::Result> TTreeFileSystem::OpenOutputStream( - const std::string& path, - const std::shared_ptr& metadata) -{ - arrow::dataset::FileSource source{path, shared_from_this()}; - auto prefix = metadata->Get("branch_prefix"); - if (prefix.ok()) { - return std::make_shared(GetTree(source), *prefix); - } - return std::make_shared(GetTree(source), ""); -} - -TBufferFileFS::TBufferFileFS(TBufferFile* f) +TBufferFileFS::TBufferFileFS(TBufferFile* f, RootObjectReadingFactory& factory) : VirtualRootFileSystemBase(), mBuffer(f), - mFilesystem(nullptr) + mFilesystem(nullptr), + mObjectFactory(factory) { } -TTreeFileSystem::~TTreeFileSystem() = default; - arrow::Result TBufferFileFS::GetFileInfo(const std::string& path) { arrow::fs::FileInfo result; @@ -871,19 +223,26 @@ arrow::Result TBufferFileFS::GetFileInfo(const std::string& return result; } - // For now we only support single trees. - if (std::dynamic_pointer_cast(mFilesystem)) { - result.set_type(arrow::fs::FileType::File); + auto info = mFilesystem->GetFileInfo(path); + if (!info.ok()) { return result; } + + result.set_type(info->type()); return result; } std::shared_ptr TBufferFileFS::GetSubFilesystem(arrow::dataset::FileSource source) { - if (!mFilesystem.get()) { - auto tree = ((TTree*)mBuffer->ReadObject(TTree::Class())); - mFilesystem = std::make_shared(tree); + // We use a plugin to create the actual objects inside the + // file, so that we can support TTree and RNTuple at the same time + // without having to depend on both. + for (auto& capability : mObjectFactory.capabilities) { + void* handle = capability.getBufferHandle(mBuffer, source.path()); + if (handle) { + mFilesystem = capability.factory().getSubFilesystem(handle); + break; + } } return mFilesystem; } diff --git a/Framework/Core/test/test_Root2ArrowTable.cxx b/Framework/Core/test/test_Root2ArrowTable.cxx index 8440e942903a5..8eb3a9825f0f7 100644 --- a/Framework/Core/test/test_Root2ArrowTable.cxx +++ b/Framework/Core/test/test_Root2ArrowTable.cxx @@ -14,6 +14,7 @@ #include "Framework/TableBuilder.h" #include "Framework/RootTableBuilderHelpers.h" #include "Framework/ASoA.h" +#include "Framework/PluginManager.h" #include "../src/ArrowDebugHelpers.h" #include @@ -26,6 +27,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -232,10 +240,31 @@ TEST_CASE("RootTree2Fragment") file->WriteObjectAny(&t1, t1.Class()); auto* fileRead = new TBufferFile(TBuffer::kRead, file->BufferSize(), file->Buffer(), false, nullptr); - size_t totalSizeCompressed = 0; - size_t totalSizeUncompressed = 0; - auto format = std::make_shared(totalSizeCompressed, totalSizeUncompressed); - auto fs = std::make_shared(fileRead); + std::vector capabilitiesSpecs = { + "O2Framework:RNTupleObjectReadingCapability", + "O2Framework:TTreeObjectReadingCapability", + }; + + std::vector plugins; + for (auto spec : capabilitiesSpecs) { + auto morePlugins = PluginManager::parsePluginSpecString(spec); + for (auto& extra : morePlugins) { + plugins.push_back(extra); + } + } + REQUIRE(plugins.size() == 2); + + RootObjectReadingFactory factory; + std::vector configDiscoverySpec = {}; + PluginManager::loadFromPlugin(plugins, factory.capabilities); + REQUIRE(factory.capabilities.size() == 2); + REQUIRE(factory.capabilities[0].name == "rntuple"); + REQUIRE(factory.capabilities[1].name == "ttree"); + + // Plugins are hardcoded for now... + auto format = factory.capabilities[1].factory().format(); + + auto fs = std::make_shared(fileRead, factory); arrow::dataset::FileSource source("p", fs); REQUIRE(format->IsSupported(source) == true); @@ -439,10 +468,34 @@ TEST_CASE("RootTree2Dataset") } f->Write(); - size_t totalSizeCompressed = 0; - size_t totalSizeUncompressed = 0; - auto format = std::make_shared(totalSizeCompressed, totalSizeUncompressed); - auto fs = std::make_shared(f, 50 * 1024 * 1024); + std::vector capabilitiesSpecs = { + "O2Framework:RNTupleObjectReadingCapability", + "O2Framework:TTreeObjectReadingCapability", + }; + + RootObjectReadingFactory factory; + + std::vector plugins; + for (auto spec : capabilitiesSpecs) { + auto morePlugins = PluginManager::parsePluginSpecString(spec); + for (auto& extra : morePlugins) { + plugins.push_back(extra); + } + } + REQUIRE(plugins.size() == 2); + + PluginManager::loadFromPlugin(plugins, factory.capabilities); + + REQUIRE(factory.capabilities.size() == 2); + REQUIRE(factory.capabilities[0].name == "rntuple"); + REQUIRE(factory.capabilities[1].name == "ttree"); + + // Plugins are hardcoded for now... + auto rNtupleFormat = factory.capabilities[0].factory().format(); + auto format = factory.capabilities[1].factory().format(); + + auto fs = std::make_shared(f, 50 * 1024 * 1024, factory); + arrow::dataset::FileSource source("DF_2/tracks", fs); REQUIRE(format->IsSupported(source) == true); auto schemaOpt = format->Inspect(source); @@ -464,7 +517,7 @@ TEST_CASE("RootTree2Dataset") validateContents(*result); auto* output = new TMemFile("foo", "RECREATE"); - auto outFs = std::make_shared(output, 0); + auto outFs = std::make_shared(output, 0, factory); // Open a stream at toplevel auto destination = outFs->OpenOutputStream("/", {}); @@ -503,4 +556,38 @@ TEST_CASE("RootTree2Dataset") REQUIRE((*resultWritten)->num_rows() == 100); validateContents(*resultWritten); } + arrow::fs::FileLocator rnTupleLocator{outFs, "/rntuple"}; + // We write an RNTuple in the same TMemFile, using /rntuple as a location + auto rntupleDestination = std::dynamic_pointer_cast(*destination); + + { + auto rNtupleWriter = rNtupleFormat->MakeWriter(*destination, schema, {}, rnTupleLocator); + auto rNtupleSuccess = rNtupleWriter->get()->Write(*result); + REQUIRE(rNtupleSuccess.ok()); + } + + // And now we can read back the RNTuple into a RecordBatch + arrow::dataset::FileSource writtenRntupleSource("/rntuple", outFs); + auto newRNTupleFS = outFs->GetSubFilesystem(writtenRntupleSource); + + REQUIRE(rNtupleFormat->IsSupported(writtenRntupleSource) == true); + + auto rntupleSchemaOpt = rNtupleFormat->Inspect(writtenRntupleSource); + REQUIRE(rntupleSchemaOpt.ok()); + auto rntupleSchemaWritten = *rntupleSchemaOpt; + REQUIRE(validateSchema(rntupleSchemaWritten)); + + auto rntupleFragmentWritten = rNtupleFormat->MakeFragment(writtenRntupleSource, {}, rntupleSchemaWritten); + REQUIRE(rntupleFragmentWritten.ok()); + auto rntupleOptionsWritten = std::make_shared(); + rntupleOptionsWritten->dataset_schema = rntupleSchemaWritten; + auto rntupleScannerWritten = rNtupleFormat->ScanBatchesAsync(rntupleOptionsWritten, *rntupleFragmentWritten); + REQUIRE(rntupleScannerWritten.ok()); + auto rntupleBatchesWritten = (*rntupleScannerWritten)(); + auto rntupleResultWritten = rntupleBatchesWritten.result(); + REQUIRE(rntupleResultWritten.ok()); + REQUIRE((*rntupleResultWritten)->columns().size() == 10); + REQUIRE(validateSchema((*rntupleResultWritten)->schema())); + REQUIRE((*rntupleResultWritten)->num_rows() == 100); + REQUIRE(validateContents(*rntupleResultWritten)); }