Skip to content

Commit

Permalink
Improve logging in case of OOM (AliceO2Group#13786)
Browse files Browse the repository at this point in the history
  • Loading branch information
mconcas authored Dec 10, 2024
1 parent 24e05f9 commit f4f8f43
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ class TimeFrame
void printCellLUTonLayer(int i);
void printTrackletLUTs();
void printCellLUTs();
void printROFInfo(const int rofId);

IndexTableUtils mIndexTableUtils;

Expand Down
14 changes: 14 additions & 0 deletions Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -608,5 +608,19 @@ void TimeFrame::printNClsPerROF()
std::cout << std::endl;
}
}

void TimeFrame::printROFInfo(const int rofId)
{
std::cout << "ROF " << rofId << " dump:" << std::endl;
for (int iLayer{0}; iLayer < mClusters.size(); ++iLayer) {
std::cout << "Layer " << iLayer << " has: " << getClustersOnLayer(rofId, iLayer).size() << " clusters." << std::endl;
}
std::cout << "Number of seeding vertices: " << getPrimaryVertices(rofId).size() << std::endl;
int iVertex{0};
for (auto& v : getPrimaryVertices(rofId)) {
std::cout << "\t vertex " << iVertex++ << ": x=" << v.getX() << " " << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors." << std::endl;
}
}

} // namespace its
} // namespace o2
15 changes: 11 additions & 4 deletions Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ namespace o2
{
namespace its
{
using o2::its::constants::GB;

Tracker::Tracker(o2::its::TrackerTraits* traits)
{
Expand Down Expand Up @@ -74,25 +75,31 @@ void Tracker::clustersToTracks(std::function<void(std::string s)> logger, std::f
&Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex);
nTracklets += mTraits->getTFNumberOfTracklets();
if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) {
error(fmt::format("Too much memory used during trackleting in iteration {}, check the detector status and/or the selections.", iteration));
mTimeFrame->printROFInfo(iROFs);
error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.",
iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB));
break;
}
float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f;
if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) {
error(fmt::format("Too many tracklets per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, mTrkParams[iteration].TrackletsPerClusterLimit));
error(fmt::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}",
trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit));
break;
}

timeCells += evaluateTask(
&Tracker::computeCells, "Cell finding", [](std::string) {}, iteration);
nCells += mTraits->getTFNumberOfCells();
if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) {
error(fmt::format("Too much memory used during cell finding in iteration {}, check the detector status and/or the selections.", iteration));
mTimeFrame->printROFInfo(iROFs);
error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.",
iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB));
break;
}
float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f;
if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) {
error(fmt::format("Too many cells per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, mTrkParams[iteration].CellsPerClusterLimit));
error(fmt::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}",
cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit));
break;
}

Expand Down

0 comments on commit f4f8f43

Please sign in to comment.