Skip to content

Commit

Permalink
HPCC-32031 Generate summary information in workunit to speed up file …
Browse files Browse the repository at this point in the history
…list operations

Additional changes follwing review/regression test

Signed-off-by: Richard Chapman <[email protected]>
  • Loading branch information
richardkchapman committed Jun 26, 2024
1 parent 342ff2b commit e587736
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 9 deletions.
1 change: 1 addition & 0 deletions common/pkgfiles/referencedfilelist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,7 @@ bool ReferencedFileList::addFilesFromQuery(IConstWorkUnit *cw, const IHpccPackag
SummaryMap files;
if (cw->getSummary(SummaryType::ReadFile, files) &&
cw->getSummary(SummaryType::ReadIndex, files))
// MORE - should this also include ReadPersist?
{
for (const auto& [lName, isOpt] : files)
{
Expand Down
3 changes: 3 additions & 0 deletions common/workunit/workunit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8731,9 +8731,12 @@ static const char *summaryTypeName(SummaryType type)
{
case SummaryType::ReadFile: return "ReadFile";
case SummaryType::ReadIndex: return "ReadIndex";
case SummaryType::ReadPersist: return "ReadPersist";
case SummaryType::WriteFile: return "WriteFile";
case SummaryType::WriteIndex: return "WriteIndex";
case SummaryType::WritePersist: return "WritePersist";
case SummaryType::SpillFile: return "SpillFile";
case SummaryType::JobTemp: return "JobTemp";
case SummaryType::Service: return "Service";
default:
throwUnexpected();
Expand Down
7 changes: 6 additions & 1 deletion common/workunit/workunit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1187,11 +1187,16 @@ enum class SummaryType
First,
ReadFile = First,
ReadIndex,
ReadPersist,
WriteFile,
WriteIndex,
WritePersist,
SpillFile,
JobTemp,
Service,
NumItems
// Keep these at the end
NumItems,
None = NumItems
};

struct ncasecomp {
Expand Down
15 changes: 9 additions & 6 deletions ecl/hqlcpp/hqlcpp.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -2146,14 +2146,17 @@ protected:
SummaryMap summaries[(int) SummaryType::NumItems];
void noteSummaryInfo(const char *name, SummaryType type, bool isOpt)
{
SummaryMap &map = summaries[(int) type];
if (isOpt)
if (type != SummaryType::None)
{
if (map.find(name) == map.end())
map[name] = true;
SummaryMap &map = summaries[(int) type];
if (isOpt)
{
if (map.find(name) == map.end())
map[name] = true;
}
else
map[name] = false;
}
else
map[name] = false;
}
};

Expand Down
14 changes: 12 additions & 2 deletions ecl/hqlcpp/hqlhtcpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10952,15 +10952,18 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutput(BuildCtx & ctx, IHqlExp
Owned<ABoundActivity> boundDataset = buildCachedActivity(ctx, dataset);
ThorActivityKind kind = TAKdiskwrite;
const char * activityArgName = "DiskWrite";
SummaryType summaryType = SummaryType::WriteFile;
if (expr->getOperator() == no_spill)
{
kind = TAKspill;
activityArgName = "Spill";
summaryType = SummaryType::SpillFile;
}
else if (pipe)
{
kind = TAKpipewrite;
activityArgName = "PipeWrite";
summaryType = SummaryType::None;
}
else if (csvAttr)
{
Expand All @@ -10973,7 +10976,14 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutput(BuildCtx & ctx, IHqlExp
activityArgName = "XmlWrite";
}
else if (expr->hasAttribute(_spill_Atom))
{
kind = TAKspillwrite;
summaryType = SummaryType::SpillFile;
}
if (expr->hasAttribute(jobTempAtom))
summaryType = SummaryType::JobTemp;
else if (expr->hasAttribute(_workflowPersist_Atom))
summaryType = SummaryType::WritePersist;

bool useImplementationClass = options.minimizeActivityClasses && targetRoxie() && expr->hasAttribute(_spill_Atom);
Owned<ActivityInstance> instance = new ActivityInstance(*this, ctx, kind, expr, activityArgName);
Expand Down Expand Up @@ -11071,7 +11081,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutput(BuildCtx & ctx, IHqlExp
if (filename && filename->getOperator() != no_pipe)
{
bool isDynamic = expr->hasAttribute(resultAtom) || hasDynamicFilename(expr);
buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", filename, isDynamic, SummaryType::WriteFile, false);
buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", filename, isDynamic, summaryType, false);
if (!filename->isConstant())
constFilename = false;
}
Expand Down Expand Up @@ -11173,7 +11183,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutput(BuildCtx & ctx, IHqlExp
{
assertex(tempCount.get() && !hasDynamic(expr));
instance->addConstructorParameter(tempCount);
addFilenameConstructorParameter(*instance, WaFilename, filename, SummaryType::WriteFile);
addFilenameConstructorParameter(*instance, WaFilename, filename, summaryType);
}

instance->addSignedAttribute(expr->queryAttribute(_signed_Atom));
Expand Down
7 changes: 7 additions & 0 deletions ecl/hqlcpp/hqlsource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1184,7 +1184,14 @@ void SourceBuilder::buildFilenameMember()
case TAKindexgroupaggregate:
summaryType = SummaryType::ReadIndex;
break;
case TAKspillread:
summaryType = SummaryType::SpillFile;
break;
}
if (tableExpr->hasAttribute(jobTempAtom))
summaryType = SummaryType::JobTemp;
else if (tableExpr->hasAttribute(_workflowPersist_Atom))
summaryType = SummaryType::ReadPersist;
translator.buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", nameExpr, translator.hasDynamicFilename(tableExpr), summaryType, tableExpr->hasAttribute(optAtom));
}

Expand Down

0 comments on commit e587736

Please sign in to comment.