Skip to content

Commit

Permalink
HPCC-32031 Generate summary information in workunit to speed up file …
Browse files Browse the repository at this point in the history
…list operations

Track other flags besides isOpt, so can be used by roxie queryHash()

Signed-off-by: Richard Chapman <[email protected]>
  • Loading branch information
richardkchapman committed Jun 26, 2024
1 parent b6df4c9 commit bddfe0c
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 36 deletions.
4 changes: 2 additions & 2 deletions common/pkgfiles/referencedfilelist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -969,11 +969,11 @@ bool ReferencedFileList::addFilesFromQuery(IConstWorkUnit *cw, const IHpccPackag
if (cw->getSummary(SummaryType::ReadFile, files) &&
cw->getSummary(SummaryType::ReadIndex, files))
{
for (const auto& [lName, isOpt] : files)
for (const auto& [lName, summaryFlags] : files)
{
const char *logicalName = lName.c_str();
StringArray subfileNames;
unsigned flags = isOpt ? RefFileOptional : RefFileNotOptional;
unsigned flags = (summaryFlags & SummaryFlags::IsOpt) ? RefFileOptional : RefFileNotOptional;
if (pkg)
{
const char *pkgid = pkg->locateSuperFile(logicalName);
Expand Down
17 changes: 11 additions & 6 deletions common/workunit/workunit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8756,10 +8756,15 @@ bool CLocalWorkUnit::getSummary(SummaryType type, SummaryMap &map) const
const char *name = s.item(idx);
if (name && *name)
{
bool isOpt = name[0]=='?';
name++;
if (!isOpt || map.find(name) == map.end())
map[name] = isOpt;
char *end = nullptr;
SummaryFlags flags = (SummaryFlags) strtol(name, &end, 16);
if (*end!=':')
return false; // unrecognized format
name = end+1;
if (map.find(name) == map.end())
map[name] = flags;
else
map[name] = map[name] & flags;
}
}
return true;
Expand All @@ -8768,11 +8773,11 @@ bool CLocalWorkUnit::getSummary(SummaryType type, SummaryMap &map) const
void CLocalWorkUnit::setSummary(SummaryType type, const SummaryMap &map)
{
StringBuffer list;
for (const auto& [name, isOpt] : map)
for (const auto& [name, flags] : map)
{
if (list.length())
list.append('\n');
list.appendf("%c%s", isOpt ? '?': '+', name.c_str());
list.appendf("%01x:%s", (unsigned) flags, name.c_str());
}
CriticalBlock block(crit);
IPropertyTree *summaries = ensurePTree(p, "Summaries");
Expand Down
10 changes: 9 additions & 1 deletion common/workunit/workunit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1198,13 +1198,21 @@ enum class SummaryType
None = NumItems
};

enum SummaryFlags : byte
{
None = 0,
IsOpt = 0x01,
IsSigned = 0x02,
};
BITMASK_ENUM(SummaryFlags);

struct ncasecomp {
bool operator() (const std::string& lhs, const std::string& rhs) const {
return stricmp(lhs.c_str(), rhs.c_str()) < 0;
}
};

typedef std::map<std::string, bool, ncasecomp> SummaryMap;
typedef std::map<std::string, SummaryFlags, ncasecomp> SummaryMap;

interface IWorkUnit;
interface IUserDescriptor;
Expand Down
14 changes: 8 additions & 6 deletions ecl/hqlcpp/hqlckey.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ class KeyedJoinInfo : public CInterface
bool needToExtractJoinFields() const { return extractJoinFieldsTransform != NULL; }
bool hasPostFilter() const { return monitors->queryExtraFilter() || fileFilter; }
bool requireActivityForKey() const { return hasComplexIndex; }
bool isKeySigned() { return key->hasAttribute(_signed_Atom); }
bool isFileSigned() { return file && file->hasAttribute(_signed_Atom); }

void reportFailureReason(IHqlExpression * cond) { monitors->reportFailureReason(cond); }
bool useValueSets() const { return createValueSets; }
Expand Down Expand Up @@ -1192,7 +1194,7 @@ void HqlCppTranslator::buildKeyedJoinExtra(ActivityInstance & instance, IHqlExpr

//virtual const char * getFileName() = 0; // Returns filename of raw file fpos'es refer into
if (info->isFullJoin())
buildFilenameFunction(instance, instance.createctx, WaFilename, "getFileName", info->queryFileFilename(), hasDynamicFilename(info->queryFile()), SummaryType::ReadFile, info->isKeyOpt());
buildFilenameFunction(instance, instance.createctx, WaFilename, "getFileName", info->queryFileFilename(), hasDynamicFilename(info->queryFile()), SummaryType::ReadFile, info->isKeyOpt(), info->isFileSigned());

//virtual bool diskAccessRequired() = 0;
if (info->isFullJoin())
Expand Down Expand Up @@ -1229,7 +1231,7 @@ void HqlCppTranslator::buildKeyJoinIndexReadHelper(ActivityInstance & instance,
info->buildExtractIndexReadFields(instance.startctx);

//virtual const char * getIndexFileName() = 0;
buildFilenameFunction(instance, instance.startctx, WaIndexname, "getIndexFileName", info->queryKeyFilename(), hasDynamicFilename(info->queryKey()), SummaryType::ReadIndex, info->isKeyOpt());
buildFilenameFunction(instance, instance.startctx, WaIndexname, "getIndexFileName", info->queryKeyFilename(), hasDynamicFilename(info->queryKey()), SummaryType::ReadIndex, info->isKeyOpt(), info->isKeySigned());

//virtual IOutputMetaData * queryIndexRecordSize() = 0;
LinkedHqlExpr indexExpr = info->queryOriginalKey();
Expand Down Expand Up @@ -1489,7 +1491,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityKeyedDistribute(BuildCtx & ctx
doBuildUnsignedFunction(instance->classctx, "getFlags", flags.str()+1);

//virtual const char * getIndexFileName() = 0;
buildFilenameFunction(*instance, instance->startctx, WaIndexname, "getIndexFileName", keyFilename, dynamic, SummaryType::ReadIndex, info.isKeyOpt());
buildFilenameFunction(*instance, instance->startctx, WaIndexname, "getIndexFileName", keyFilename, dynamic, SummaryType::ReadIndex, info.isKeyOpt(), info.isKeySigned());

//virtual IOutputMetaData * queryIndexRecordSize() = 0;
LinkedHqlExpr indexExpr = info.queryRawKey();
Expand Down Expand Up @@ -1583,7 +1585,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityKeyDiff(BuildCtx & ctx, IHqlEx
noteAllFieldsUsed(updated);

//virtual const char * getOutputName() = 0;
buildFilenameFunction(*instance, instance->startctx, WaOutputFilename, "getOutputName", output, hasDynamicFilename(expr), SummaryType::WriteFile, false);
buildFilenameFunction(*instance, instance->startctx, WaOutputFilename, "getOutputName", output, hasDynamicFilename(expr), SummaryType::WriteFile, false, expr->hasAttribute(_signed_Atom));

//virtual int getSequence() = 0;
doBuildSequenceFunc(instance->classctx, querySequence(expr), false);
Expand Down Expand Up @@ -1626,10 +1628,10 @@ ABoundActivity * HqlCppTranslator::doBuildActivityKeyPatch(BuildCtx & ctx, IHqlE
noteAllFieldsUsed(original);

//virtual const char * getPatchName() = 0;
buildFilenameFunction(*instance, instance->startctx, WaPatchFilename, "getPatchName", patch, true, SummaryType::ReadFile, false);
buildFilenameFunction(*instance, instance->startctx, WaPatchFilename, "getPatchName", patch, true, SummaryType::ReadFile, false, false);

//virtual const char * getOutputName() = 0;
buildFilenameFunction(*instance, instance->startctx, WaOutputFilename, "getOutputName", output, hasDynamicFilename(expr), SummaryType::WriteIndex, false);
buildFilenameFunction(*instance, instance->startctx, WaOutputFilename, "getOutputName", output, hasDynamicFilename(expr), SummaryType::WriteIndex, false, false);

//virtual int getSequence() = 0;
doBuildSequenceFunc(instance->classctx, querySequence(expr), false);
Expand Down
18 changes: 10 additions & 8 deletions ecl/hqlcpp/hqlcpp.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -1887,7 +1887,7 @@ public:
void doBuildUserFunctionReturn(BuildCtx & ctx, ITypeInfo * type, IHqlExpression * value);

void addFilenameConstructorParameter(ActivityInstance & instance, WuAttr attr, IHqlExpression * expr, SummaryType summaryType);
void buildFilenameFunction(ActivityInstance & instance, BuildCtx & classctx, WuAttr attr, const char * name, IHqlExpression * expr, bool isDynamic, SummaryType summaryType, bool isOpt);
void buildFilenameFunction(ActivityInstance & instance, BuildCtx & classctx, WuAttr attr, const char * name, IHqlExpression * expr, bool isDynamic, SummaryType summaryType, bool isOpt, bool isSigned);
void buildRefFilenameFunction(ActivityInstance & instance, BuildCtx & classctx, WuAttr attr, const char * name, IHqlExpression * dataset);
void createAccessFunctions(StringBuffer & helperFunc, BuildCtx & declarectx, unsigned prio, const char * interfaceName, const char * object);

Expand All @@ -1911,7 +1911,7 @@ protected:
void buildIteratorNext(BuildCtx & ctx, IHqlExpression * iter, IHqlExpression * row);
bool shouldEvaluateSelectAsAlias(BuildCtx & ctx, IHqlExpression * expr);
IWUResult * createWorkunitResult(int sequence, IHqlExpression * nameExpr);
void noteFilename(ActivityInstance & instance, WuAttr attr, IHqlExpression * expr, bool isDynamic, SummaryType summaryType, bool isOpt);
void noteFilename(ActivityInstance & instance, WuAttr attr, IHqlExpression * expr, bool isDynamic, SummaryType summaryType, bool isOpt, bool isSigned);
bool checkGetResultContext(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt);
void buildGetResultInfo(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr * boundTarget, const CHqlBoundTarget * targetAssign);
void buildGetResultSetInfo(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr * boundTarget, const CHqlBoundTarget * targetAssign);
Expand Down Expand Up @@ -2144,18 +2144,20 @@ protected:

// These are used to generate workunit summary info, to avoid having to walk the xgmml to get it
SummaryMap summaries[(int) SummaryType::NumItems];
void noteSummaryInfo(const char *name, SummaryType type, bool isOpt)
void noteSummaryInfo(const char *name, SummaryType type, bool isOpt, bool isSigned)
{
if (type != SummaryType::None)
{
SummaryMap &map = summaries[(int) type];
SummaryFlags flags = SummaryFlags::None;
if (isOpt)
{
if (map.find(name) == map.end())
map[name] = true;
}
flags |= SummaryFlags::IsOpt;
if (isSigned)
flags |= SummaryFlags::IsSigned;
if (map.find(name) == map.end())
map[name] = flags;
else
map[name] = false;
map[name] = map[name] & flags;
}
}
};
Expand Down
20 changes: 10 additions & 10 deletions ecl/hqlcpp/hqlhtcpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3386,17 +3386,17 @@ void HqlCppTranslator::addFilenameConstructorParameter(ActivityInstance & instan
{
OwnedHqlExpr folded = foldHqlExpression(expr);
instance.addConstructorParameter(folded);
noteFilename(instance, attr, folded, false, summaryType, false);
noteFilename(instance, attr, folded, false, summaryType, false, false);
}

void HqlCppTranslator::buildFilenameFunction(ActivityInstance & instance, BuildCtx & classctx, WuAttr attr, const char * name, IHqlExpression * expr, bool isDynamic, SummaryType summaryType, bool isOpt)
void HqlCppTranslator::buildFilenameFunction(ActivityInstance & instance, BuildCtx & classctx, WuAttr attr, const char * name, IHqlExpression * expr, bool isDynamic, SummaryType summaryType, bool isOpt, bool isSigned)
{
OwnedHqlExpr folded = foldHqlExpression(expr);
doBuildVarStringFunction(classctx, name, folded);
noteFilename(instance, attr, folded, isDynamic, summaryType, isOpt);
noteFilename(instance, attr, folded, isDynamic, summaryType, isOpt, isSigned);
}

void HqlCppTranslator::noteFilename(ActivityInstance & instance, WuAttr attr, IHqlExpression * expr, bool isDynamic, SummaryType summaryType, bool isOpt)
void HqlCppTranslator::noteFilename(ActivityInstance & instance, WuAttr attr, IHqlExpression * expr, bool isDynamic, SummaryType summaryType, bool isOpt, bool isSigned)
{
if (options.addFilesnamesToGraph)
{
Expand All @@ -3417,7 +3417,7 @@ void HqlCppTranslator::noteFilename(ActivityInstance & instance, WuAttr attr, IH
StringBuffer propValue;
folded->queryValue()->getStringValue(propValue);
instance.addAttribute(attr, propValue);
noteSummaryInfo(propValue, summaryType, isOpt);
noteSummaryInfo(propValue, summaryType, isOpt, isSigned);
}
}
if (isDynamic)
Expand Down Expand Up @@ -3477,7 +3477,7 @@ void HqlCppTranslator::buildRefFilenameFunction(ActivityInstance & instance, Bui
break;
}

buildFilenameFunction(instance, classctx, attr, name, filename, hasDynamicFilename(table), summaryType, table->hasAttribute(optAtom));
buildFilenameFunction(instance, classctx, attr, name, filename, hasDynamicFilename(table), summaryType, table->hasAttribute(optAtom), table->hasAttribute(_signed_Atom));
}

void HqlCppTranslator::buildConnectInputOutput(BuildCtx & ctx, ActivityInstance * instance, ABoundActivity * table, unsigned outputIndex, unsigned inputIndex, const char * label, bool nWay)
Expand Down Expand Up @@ -10669,7 +10669,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutputIndex(BuildCtx & ctx, IH
buildInstancePrefix(instance);

//virtual const char * getFileName() { return "x.d00"; }
buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", filename, hasDynamicFilename(expr), SummaryType::WriteIndex, false);
buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", filename, hasDynamicFilename(expr), SummaryType::WriteIndex, false, expr->hasAttribute(_signed_Atom));

//virtual unsigned getFlags() = 0;
IHqlExpression * updateAttr = expr->queryAttribute(updateAtom);
Expand Down Expand Up @@ -10720,7 +10720,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutputIndex(BuildCtx & ctx, IH

IHqlExpression * indexNameAttr = expr->queryAttribute(indexAtom);
if (indexNameAttr)
buildFilenameFunction(*instance, instance->startctx, WaDistributeIndexname, "getDistributeIndexName", indexNameAttr->queryChild(0), hasDynamicFilename(expr), SummaryType::ReadIndex, false);
buildFilenameFunction(*instance, instance->startctx, WaDistributeIndexname, "getDistributeIndexName", indexNameAttr->queryChild(0), hasDynamicFilename(expr), SummaryType::ReadIndex, false, expr->hasAttribute(_signed_Atom));

buildExpiryHelper(instance->createctx, expr->queryAttribute(expireAtom));
buildUpdateHelper(instance->createctx, *instance, dataset, updateAttr);
Expand Down Expand Up @@ -11081,7 +11081,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityOutput(BuildCtx & ctx, IHqlExp
if (filename && filename->getOperator() != no_pipe)
{
bool isDynamic = expr->hasAttribute(resultAtom) || hasDynamicFilename(expr);
buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", filename, isDynamic, summaryType, false);
buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", filename, isDynamic, summaryType, false, expr->hasAttribute(_signed_Atom));
if (!filename->isConstant())
constFilename = false;
}
Expand Down Expand Up @@ -18070,7 +18070,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivitySOAP(BuildCtx & ctx, IHqlExpre
StringBuffer serviceName;
getUTF8Value(serviceName, service);
instance->addAttribute(WaServiceName, serviceName);
noteSummaryInfo(serviceName, SummaryType::Service, false);
noteSummaryInfo(serviceName, SummaryType::Service, false, false);
}

enum class ReqFormat { NONE, XML, JSON, FORM_ENCODED };
Expand Down
8 changes: 5 additions & 3 deletions ecl/hqlcpp/hqlsource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1188,11 +1188,13 @@ void SourceBuilder::buildFilenameMember()
summaryType = SummaryType::SpillFile;
break;
}
if (tableExpr->hasAttribute(jobTempAtom))
if (tableExpr->hasAttribute(_spill_Atom))
summaryType = SummaryType::SpillFile;
else if (tableExpr->hasAttribute(jobTempAtom))
summaryType = SummaryType::JobTemp;
else if (tableExpr->hasAttribute(_workflowPersist_Atom))
summaryType = SummaryType::PersistFile;
translator.buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", nameExpr, translator.hasDynamicFilename(tableExpr), summaryType, tableExpr->hasAttribute(optAtom));
translator.buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", nameExpr, translator.hasDynamicFilename(tableExpr), summaryType, tableExpr->hasAttribute(optAtom), tableExpr->hasAttribute(_signed_Atom));
}

void SourceBuilder::buildReadMembers(IHqlExpression * expr)
Expand Down Expand Up @@ -4861,7 +4863,7 @@ ABoundActivity * HqlCppTranslator::doBuildActivityXmlRead(BuildCtx & ctx, IHqlEx
fieldUsage->noteAll();

//---- virtual const char * getFileName() { return "x.d00"; } ----
buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", filename, hasDynamicFilename(tableExpr), SummaryType::ReadIndex, tableExpr->hasAttribute(optAtom));
buildFilenameFunction(*instance, instance->startctx, WaFilename, "getFileName", filename, hasDynamicFilename(tableExpr), SummaryType::ReadIndex, tableExpr->hasAttribute(optAtom), tableExpr->hasAttribute(_signed_Atom));
buildEncryptHelper(instance->startctx, tableExpr->queryAttribute(encryptAtom));

bool usesContents = false;
Expand Down

0 comments on commit bddfe0c

Please sign in to comment.