From c4d2ad8fa95be19d0c097b6d2ad09250eba793c8 Mon Sep 17 00:00:00 2001 From: Shamser Ahmed Date: Thu, 27 Jun 2024 09:49:59 +0100 Subject: [PATCH] HPCC-32138 Capture all temp file stats (including read stats). Implement a generic mergeStats function that will remap to the stat names before setting them Signed-off-by: Shamser Ahmed --- system/jlib/jstats.h | 29 +++++++++++++++++++ .../hashdistrib/thhashdistribslave.cpp | 21 ++------------ thorlcr/thorutil/thormisc.cpp | 5 ++++ thorlcr/thorutil/thormisc.hpp | 3 ++ 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/system/jlib/jstats.h b/system/jlib/jstats.h index 253d640c94d..c98d66be9bb 100644 --- a/system/jlib/jstats.h +++ b/system/jlib/jstats.h @@ -23,6 +23,7 @@ #include "jmutex.hpp" #include #include +#include #include "jstatcodes.h" @@ -874,6 +875,34 @@ void setStat(CRuntimeStatisticCollection & stats, INTERFACE * source, StatisticK template void setStat(CRuntimeStatisticCollection & stats, const Shared & source, StatisticKind kind) { setStat(stats, source.get(), kind); } +template +void mergeRemappedStats(CRuntimeStatisticCollection & stats, INTERFACE * source, const StatisticsMapping & mapping, const std::map & remap) +{ + if (!source) + return; + unsigned max = mapping.numStatistics(); + for (unsigned i=0; i < max; i++) + { + StatisticKind kind = mapping.getKind(i); + if (auto matched=remap.find(kind); matched != remap.end()) + stats.mergeStatistic(matched->second, source->getStatistic(kind)); + else + stats.mergeStatistic(kind, source->getStatistic(kind)); + } +} + +template +void mergeRemappedStats(CRuntimeStatisticCollection & stats, INTERFACE * source, const std::map & remap) +{ + mergeRemappedStats(stats, source, stats.queryMapping(), remap); +} + +template +void mergeRemappedStats(CRuntimeStatisticCollection & stats, const Shared & source, const std::map & remap) +{ + mergeRemappedStats(stats, source.get(), stats.queryMapping(), remap); +} + //--------------------------------------------------------------------------------------------------------------------- //A class for minimizing the overhead of collecting timestamps. diff --git a/thorlcr/activities/hashdistrib/thhashdistribslave.cpp b/thorlcr/activities/hashdistrib/thhashdistribslave.cpp index 0b814f94b63..8de5547d344 100644 --- a/thorlcr/activities/hashdistrib/thhashdistribslave.cpp +++ b/thorlcr/activities/hashdistrib/thhashdistribslave.cpp @@ -2760,26 +2760,11 @@ class CSpill : implements IRowWriter, public CSimpleInterface ::Release(writer); writer = NULL; spillFileIO->flush(); - mergeStats(stats, this); - spillFile->noteSize(getStatistic(StSizeSpillFile)); + mergeRemappedStats(stats, spillFileIO, diskToSpillStatsMap); + stats.setStatistic(StNumSpills, 1); + spillFile->noteSize(spillFileIO->getStatistic(StSizeDiskWrite)); spillFileIO.clear(); } - inline __int64 getStatistic(StatisticKind kind) const - { - switch (kind) - { - case StSizeSpillFile: - return spillFileIO->getStatistic(StSizeDiskWrite); - case StTimeSortElapsed: - return spillFileIO->getStatistic(StTimeDiskWriteIO); - case StSizeDiskWrite: - return 0; // Return file size as StSizeSpillFile kind. To avoid confusion, StSizeDiskWrite will not be returned - case StNumSpills: - return 1; - default: - return spillFileIO->getStatistic(kind); - } - } // IRowWriter virtual void putRow(const void *row) override { diff --git a/thorlcr/thorutil/thormisc.cpp b/thorlcr/thorutil/thormisc.cpp index 48dc1231ee1..f69488918a7 100644 --- a/thorlcr/thorutil/thormisc.cpp +++ b/thorlcr/thorutil/thormisc.cpp @@ -95,6 +95,11 @@ const StatisticsMapping soapcallActivityStatistics({}, basicActivityStatistics, const StatisticsMapping hashDedupActivityStatistics({StNumSpills, StSizeSpillFile, StTimeSortElapsed, StSizePeakTempDisk}, diskWriteRemoteStatistics, basicActivityStatistics); const StatisticsMapping hashDistribActivityStatistics({StNumLocalRows, StNumRemoteRows, StSizeRemoteWrite}, basicActivityStatistics); +const std::map diskToSpillStatsMap +={ {StSizeDiskWrite, StSizeSpillFile}, + {StTimeDiskWriteIO, StTimeSortElapsed} + }; + MODULE_INIT(INIT_PRIORITY_STANDARD) { ClusterMPAllocator.setown(createMPtagRangeAllocator(MPTAG_THORGLOBAL_BASE,MPTAG_THORGLOBAL_COUNT)); diff --git a/thorlcr/thorutil/thormisc.hpp b/thorlcr/thorutil/thormisc.hpp index cb259a7053a..45232edef3e 100644 --- a/thorlcr/thorutil/thormisc.hpp +++ b/thorlcr/thorutil/thormisc.hpp @@ -162,6 +162,9 @@ extern graph_decl const StatisticsMapping indexReadFileStatistics; extern graph_decl const StatisticsMapping hashDedupActivityStatistics; extern graph_decl const StatisticsMapping hashDistribActivityStatistics; +// Maps disk related stats to spill stats +extern graph_decl const std::map diskToSpillStatsMap; + class BooleanOnOff { bool &tf;