Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Davidl root reader t directory #579

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions include/podio/ROOTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,17 @@ class ROOTReader {
///
/// @param filenames The filenames of all input files that should be read
void openFiles(const std::vector<std::string>& filenames);

/**
* Open trees for reading from the specified TDirectory.
*
* This can be used with a TMemFile for in-memory operation via streaming.
* The specified directory should contain all trees including metadata
* and category trees.
*
* @param dir The TDirectory to look for the podio trees in.
*/
void openTDirectory(TDirectory *dir);

/// Read the next data entry for a given category.
///
Expand Down Expand Up @@ -127,16 +138,25 @@ class ROOTReader {
}

private:

void readMetaData();

/// Helper struct to group together all the necessary state to read / process
/// a given category. A "category" in this case describes all frames with the
/// same name which are constrained by the ROOT file structure that we use to
/// have the same contents. It encapsulates all state that is necessary for
/// reading from a TTree / TChain (i.e. collection infos, branches, ...)
struct CategoryInfo {
/// constructor from chain for more convenient map insertion
CategoryInfo(std::unique_ptr<TChain>&& c) : chain(std::move(c)) {
}
std::unique_ptr<TChain> chain{nullptr}; ///< The TChain with the data
CategoryInfo() : chain("unused"){}

// The copy constructor and assignment operators are explicitly deleted
// here since TChain has these declared private and therefore inaccessible.
CategoryInfo(const podio::ROOTReader::CategoryInfo&) = delete;
CategoryInfo& operator=(const podio::ROOTReader::CategoryInfo&) = delete;

TChain chain; ///< The TChain with the data (if reading from files)
TTree *tree = {nullptr}; ///< The TTree with the data (use this, not chain!)
unsigned entry{0}; ///< The next entry to read
std::vector<std::pair<std::string, detail::CollectionInfo>> storedClasses{}; ///< The stored collections in this
///< category
Expand Down Expand Up @@ -166,7 +186,8 @@ class ROOTReader {
podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl, bool reloadBranches,
unsigned int localEntry);

std::unique_ptr<TChain> m_metaChain{nullptr}; ///< The metadata tree
TTree* m_metaTree{nullptr}; ///< The metadata tree (use this to access)
TChain m_metaChain{"unused"}; ///< A TChain (only used if reading from files. m_metaTree will point to this if needed)
std::unordered_map<std::string, CategoryInfo> m_categories{}; ///< All categories
std::vector<std::string> m_availCategories{}; ///< All available categories from this file

Expand Down
149 changes: 95 additions & 54 deletions src/ROOTReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
namespace podio {

std::tuple<std::vector<root_utils::CollectionBranches>, std::vector<std::pair<std::string, detail::CollectionInfo>>>
createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable,
createCollectionBranches(TTree* tree, const podio::CollectionIDTable& idTable,
const std::vector<root_utils::CollectionInfoT>& collInfo);

std::tuple<std::vector<root_utils::CollectionBranches>, std::vector<std::pair<std::string, detail::CollectionInfo>>>
createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable,
createCollectionBranchesIndexBased(TTree* tree, const podio::CollectionIDTable& idTable,
const std::vector<root_utils::CollectionInfoT>& collInfo);

GenericParameters ROOTReader::readEntryParameters(ROOTReader::CategoryInfo& catInfo, bool reloadBranches,
Expand All @@ -35,7 +35,7 @@ GenericParameters ROOTReader::readEntryParameters(ROOTReader::CategoryInfo& catI
// Make sure to have a valid branch pointer after switching trees in the chain
// as well as on the first event
if (reloadBranches) {
paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName);
paramBranches.data = root_utils::getBranch(catInfo.tree, root_utils::paramBranchName);
}
auto* branch = paramBranches.data;

Expand All @@ -58,22 +58,30 @@ std::unique_ptr<ROOTFrameData> ROOTReader::readEntry(const std::string& name, co
}

std::unique_ptr<ROOTFrameData> ROOTReader::readEntry(ROOTReader::CategoryInfo& catInfo) {
if (!catInfo.chain) {
if (!catInfo.tree) {
return nullptr;
}
if (catInfo.entry >= catInfo.chain->GetEntries()) {
if (catInfo.entry >= catInfo.tree->GetEntries()) {
return nullptr;
}

// After switching trees in the chain, branch pointers get invalidated so
// they need to be reassigned.
// NOTE: root 6.22/06 requires that we get completely new branches here,
// with 6.20/04 we could just re-set them
const auto preTreeNo = catInfo.chain->GetTreeNumber();
const auto localEntry = catInfo.chain->LoadTree(catInfo.entry);
const auto treeChange = catInfo.chain->GetTreeNumber() != preTreeNo;
// Also need to make sure to handle the first event
const auto reloadBranches = treeChange || localEntry == 0;
// Initialize assuming catInfo.tree is a TTree and not a TChain
auto localEntry = catInfo.entry;
auto reloadBranches = (localEntry == 0);

// Handle case when catInfo.tree actually points to a TChain
if(catInfo.tree->IsA() == TChain::Class()){
// After switching trees in the chain, branch pointers get invalidated so
// they need to be reassigned.
// NOTE: root 6.22/06 requires that we get completely new branches here,
// with 6.20/04 we could just re-set them
auto chain = static_cast<TChain*>(catInfo.tree);
const auto preTreeNo = chain->GetTreeNumber();
localEntry = chain->LoadTree(catInfo.entry);
const auto treeChange = chain->GetTreeNumber() != preTreeNo;
// Also need to make sure to handle the first event
reloadBranches = treeChange || localEntry == 0;
}

ROOTFrameData::BufferMap buffers;
for (size_t i = 0; i < catInfo.storedClasses.size(); ++i) {
Expand All @@ -99,7 +107,7 @@ podio::CollectionReadBuffers ROOTReader::getCollectionBuffers(ROOTReader::Catego
auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{});

if (reloadBranches) {
root_utils::resetBranches(catInfo.chain.get(), branches, name);
root_utils::resetBranches(catInfo.tree, branches, name);
}

// set the addresses and read the data
Expand All @@ -121,21 +129,21 @@ ROOTReader::CategoryInfo& ROOTReader::getCategoryInfo(const std::string& categor
return it->second;
}

// Use a nullptr TChain to signify an invalid category request
// Create empty CategoryInfo to signify an invalid category request
// TODO: Warn / log
static auto invalidCategory = CategoryInfo{nullptr};
static auto invalidCategory = CategoryInfo();

return invalidCategory;
}

void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category) {
catInfo.table = std::make_shared<podio::CollectionIDTable>();
auto* table = catInfo.table.get();
auto* tableBranch = root_utils::getBranch(m_metaChain.get(), root_utils::idTableName(category));
auto* tableBranch = root_utils::getBranch(m_metaTree, root_utils::idTableName(category));
tableBranch->SetAddress(&table);
tableBranch->GetEntry(0);

auto* collInfoBranch = root_utils::getBranch(m_metaChain.get(), root_utils::collInfoName(category));
auto* collInfoBranch = root_utils::getBranch(m_metaTree, root_utils::collInfoName(category));

auto collInfo = new std::vector<root_utils::CollectionInfoT>();
if (m_fileVersion < podio::version::Version{0, 16, 4}) {
Expand All @@ -157,22 +165,22 @@ void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category
// from older versions
if (m_fileVersion < podio::version::Version{0, 16, 99}) {
std::tie(catInfo.branches, catInfo.storedClasses) =
createCollectionBranchesIndexBased(catInfo.chain.get(), *catInfo.table, *collInfo);
createCollectionBranchesIndexBased(catInfo.tree, *catInfo.table, *collInfo);
} else {
std::tie(catInfo.branches, catInfo.storedClasses) =
createCollectionBranches(catInfo.chain.get(), *catInfo.table, *collInfo);
createCollectionBranches(catInfo.tree, *catInfo.table, *collInfo);
}

delete collInfo;

// Finally set up the branches for the parameters
root_utils::CollectionBranches paramBranches{};
paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName);
paramBranches.data = root_utils::getBranch(catInfo.tree, root_utils::paramBranchName);
catInfo.branches.push_back(paramBranches);
}

std::vector<std::string> getAvailableCategories(TChain* metaChain) {
auto* branches = metaChain->GetListOfBranches();
std::vector<std::string> getAvailableCategories(TTree* metaTree) {
auto* branches = metaTree->GetListOfBranches();
std::vector<std::string> brNames;
brNames.reserve(branches->GetEntries());

Expand All @@ -189,54 +197,87 @@ std::vector<std::string> getAvailableCategories(TChain* metaChain) {
return brNames;
}

/// @brief Read version and data model from the m_metaTree
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring needs to be in the header file in our doxygen configuration.

void ROOTReader::readMetaData() {
podio::version::Version* versionPtr{nullptr};
if (auto* versionBranch = root_utils::getBranch(m_metaTree, root_utils::versionBranchName)) {
versionBranch->SetAddress(&versionPtr);
versionBranch->GetEntry(0);
}
m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0};
delete versionPtr;

if (auto* edmDefBranch = root_utils::getBranch(m_metaTree, root_utils::edmDefBranchName)) {
auto* datamodelDefs = new DatamodelDefinitionHolder::MapType{};
edmDefBranch->SetAddress(&datamodelDefs);
edmDefBranch->GetEntry(0);
m_datamodelHolder = DatamodelDefinitionHolder(std::move(*datamodelDefs));
delete datamodelDefs;
}
}

void ROOTReader::openFile(const std::string& filename) {
openFiles({filename});
}

void ROOTReader::openFiles(const std::vector<std::string>& filenames) {
m_metaChain = std::make_unique<TChain>(root_utils::metaTreeName);
m_metaChain.SetName(root_utils::metaTreeName);
// NOTE: We simply assume that the meta data doesn't change throughout the
// chain! This essentially boils down to the assumption that all files that
// are read this way were written with the same settings.
// Reading all files is done to check that all file exists
for (const auto& filename : filenames) {
if (!m_metaChain->Add(filename.c_str(), -1)) {
if (!m_metaChain.Add(filename.c_str(), -1)) {
throw std::runtime_error("File " + filename + " couldn't be found or the \"" + root_utils::metaTreeName +
"\" tree couldn't be read.");
}
}

podio::version::Version* versionPtr{nullptr};
if (auto* versionBranch = root_utils::getBranch(m_metaChain.get(), root_utils::versionBranchName)) {
versionBranch->SetAddress(&versionPtr);
versionBranch->GetEntry(0);
}
m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0};
delete versionPtr;
// Make m_metaTree point to m_metaChain. It is done this way in order
// to support both cases when files are used or a memory-resident TTree
// is used which cannot be part of a TChain.
m_metaTree = &m_metaChain;

if (auto* edmDefBranch = root_utils::getBranch(m_metaChain.get(), root_utils::edmDefBranchName)) {
auto* datamodelDefs = new DatamodelDefinitionHolder::MapType{};
edmDefBranch->SetAddress(&datamodelDefs);
edmDefBranch->GetEntry(0);
m_datamodelHolder = DatamodelDefinitionHolder(std::move(*datamodelDefs));
delete datamodelDefs;
}
// Read in version and data model info
readMetaData();

// Do some work up front for setting up categories and setup all the chains
// and record the available categories. The rest of the setup follows on
// demand when the category is first read
m_availCategories = ::podio::getAvailableCategories(m_metaChain.get());
m_availCategories = ::podio::getAvailableCategories(m_metaTree);
for (const auto& cat : m_availCategories) {
auto [it, _] = m_categories.try_emplace(cat, std::make_unique<TChain>(cat.c_str()));
auto [it, _] = m_categories.try_emplace(cat);
it->second.chain.SetName(cat.c_str());
for (const auto& fn : filenames) {
it->second.chain->Add(fn.c_str());
it->second.chain.Add(fn.c_str());
}
it->second.tree = &it->second.chain; // Make the tree point to our internal chain
}
}

void ROOTReader::openTDirectory(TDirectory *dir) {

m_metaTree = dynamic_cast<TTree*>(dir->Get(root_utils::metaTreeName));

// Read in version and data model info
readMetaData();

// Do some work up front for setting up categories and setup all the chains
// and record the available categories. The rest of the setup follows on
// demand when the category is first read
m_availCategories = ::podio::getAvailableCategories(m_metaTree);
for (const auto& cat : m_availCategories) {
auto tree = dynamic_cast<TTree*>(dir->Get(cat.c_str()));
if( tree ){
auto [it, _] = m_categories.try_emplace(cat);
it->second.tree = tree;
}
}
}

unsigned ROOTReader::getEntries(const std::string& name) const {
if (auto it = m_categories.find(name); it != m_categories.end()) {
return it->second.chain->GetEntries();
return it->second.tree->GetEntries();
}

return 0;
Expand All @@ -252,7 +293,7 @@ std::vector<std::string_view> ROOTReader::getAvailableCategories() const {
}

std::tuple<std::vector<root_utils::CollectionBranches>, std::vector<std::pair<std::string, detail::CollectionInfo>>>
createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable,
createCollectionBranchesIndexBased(TTree* tree, const podio::CollectionIDTable& idTable,
const std::vector<root_utils::CollectionInfoT>& collInfo) {

size_t collectionIndex{0};
Expand All @@ -275,23 +316,23 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable
if (isSubsetColl) {
// Only one branch will exist and we can trivially get its name
auto brName = root_utils::refBranch(name, 0);
branches.refs.push_back(root_utils::getBranch(chain, brName.c_str()));
branches.refs.push_back(root_utils::getBranch(tree, brName.c_str()));
branches.refNames.emplace_back(std::move(brName));
} else {
// This branch is guaranteed to exist since only collections that are
// also written to file are in the info metadata that we work with here
branches.data = root_utils::getBranch(chain, name.c_str());
branches.data = root_utils::getBranch(tree, name.c_str());

const auto buffers = collection->getBuffers();
for (size_t i = 0; i < buffers.references->size(); ++i) {
auto brName = root_utils::refBranch(name, i);
branches.refs.push_back(root_utils::getBranch(chain, brName.c_str()));
branches.refs.push_back(root_utils::getBranch(tree, brName.c_str()));
branches.refNames.emplace_back(std::move(brName));
}

for (size_t i = 0; i < buffers.vectorMembers->size(); ++i) {
auto brName = root_utils::vecBranch(name, i);
branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str()));
branches.vecs.push_back(root_utils::getBranch(tree, brName.c_str()));
branches.vecNames.emplace_back(std::move(brName));
}
}
Expand All @@ -304,7 +345,7 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable
}

std::tuple<std::vector<root_utils::CollectionBranches>, std::vector<std::pair<std::string, detail::CollectionInfo>>>
createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable,
createCollectionBranches(TTree* tree, const podio::CollectionIDTable& idTable,
const std::vector<root_utils::CollectionInfoT>& collInfo) {

size_t collectionIndex{0};
Expand All @@ -322,22 +363,22 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable,
if (isSubsetColl) {
// Only one branch will exist and we can trivially get its name
auto brName = root_utils::subsetBranch(name);
branches.refs.push_back(root_utils::getBranch(chain, brName.c_str()));
branches.refs.push_back(root_utils::getBranch(tree, brName.c_str()));
branches.refNames.emplace_back(std::move(brName));
} else {
// This branch is guaranteed to exist since only collections that are
// also written to file are in the info metadata that we work with here
branches.data = root_utils::getBranch(chain, name.c_str());
branches.data = root_utils::getBranch(tree, name.c_str());

const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(collType);
for (const auto& relName : relVecNames.relations) {
auto brName = root_utils::refBranch(name, relName);
branches.refs.push_back(root_utils::getBranch(chain, brName.c_str()));
branches.refs.push_back(root_utils::getBranch(tree, brName.c_str()));
branches.refNames.emplace_back(std::move(brName));
}
for (const auto& vecName : relVecNames.vectorMembers) {
auto brName = root_utils::refBranch(name, vecName);
branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str()));
branches.vecs.push_back(root_utils::getBranch(tree, brName.c_str()));
branches.vecNames.emplace_back(std::move(brName));
}
}
Expand Down
1 change: 1 addition & 0 deletions tests/CTestCustom.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ
check_benchmark_outputs
read_frame_legacy_root
read_frame_root_multiple
read_frame_root_tdirectory
write_python_frame_root
read_python_frame_root
read_and_write_frame_root
Expand Down
2 changes: 2 additions & 0 deletions tests/root_io/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set(root_dependent_tests
write_frame_root.cpp
read_python_frame_root.cpp
read_frame_root_multiple.cpp
read_frame_root_tdirectory.cpp
read_and_write_frame_root.cpp
)
if(ENABLE_RNTUPLE)
Expand All @@ -23,6 +24,7 @@ endforeach()
set_tests_properties(
read_frame_root
read_frame_root_multiple
read_frame_root_tdirectory
read_and_write_frame_root

PROPERTIES
Expand Down
Loading
Loading