Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HPCC-33022 Fix pathological dali load time #19350

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions dali/daliadmin/daadmin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2472,6 +2472,81 @@ void xmlSize(const char *filename, double pc)
}
}

void loadXMLTest(const char *filename, bool parseOnly, bool useLowMemPTree, bool saveFormattedXML)
{
OwnedIFile iFile = createIFile(filename);
OwnedIFileIO iFileIO = iFile->open(IFOread);
if (!iFileIO)
{
WARNLOG("File '%s' not found", filename);
return;
}

class CDummyPTreeMaker : public CSimpleInterfaceOf<IPTreeMaker>
{
StringBuffer xpath;
unsigned level = 0;
public:
virtual IPropertyTree *queryRoot() override { return nullptr; }
virtual IPropertyTree *queryCurrentNode() override { return nullptr; }
virtual void reset() override { }
virtual IPropertyTree *create(const char *tag) override { return nullptr; }
// IPTreeNotifyEvent impl.
virtual void beginNode(const char *tag, bool sequence, offset_t startOffset) override { }
virtual void newAttribute(const char *name, const char *value) override { }
virtual void beginNodeContent(const char *tag) override { }
virtual void endNode(const char *tag, unsigned length, const void *value, bool binary, offset_t endOffset) override { }
};

byte flags=ipt_none;
PTreeReaderOptions readFlags=ptr_ignoreWhiteSpace;
Owned<IPTreeMaker> iMaker;
if (!parseOnly)
{
PROGLOG("Creating property tree from file: %s", filename);
byte flags = ipt_none;
if (useLowMemPTree)
{
PROGLOG("Using low memory property trees");
flags = ipt_lowmem;
}
iMaker.setown(createPTreeMaker(flags));
}
else
{
PROGLOG("Reading property tree from file (without creating it): %s", filename);
iMaker.setown(new CDummyPTreeMaker());
}

offset_t fSize = iFileIO->size();
OwnedIFileIOStream stream = createIOStream(iFileIO);
OwnedIFileIOStream progressedIFileIOStream = createProgressIFileIOStream(stream, fSize, "Load progress", 1);
Owned<IPTreeReader> reader = createXMLStreamReader(*progressedIFileIOStream, *iMaker, readFlags);

ProcessInfo memInfo(ReadMemoryInfo);
__uint64 rss = memInfo.getActiveResidentMemory();
CCycleTimer timer;
reader->load();
memInfo.update(ReadMemoryInfo);
__uint64 rssUsed = memInfo.getActiveResidentMemory() - rss;
reader.clear();
progressedIFileIOStream.clear();
PROGLOG("Load took: %.2f - RSS consumed: %.2f MB", (float)timer.elapsedMs()/1000, (float)rssUsed/0x100000);

if (!parseOnly && saveFormattedXML)
{
assertex(iMaker->queryRoot());
StringBuffer outFilename(filename);
outFilename.append(".out.xml");
PROGLOG("Saving to %s", outFilename.str());
timer.reset();
saveXML(outFilename, iMaker->queryRoot(), 2);
PROGLOG("Save took: %.2f", (float)timer.elapsedMs()/1000);
}

::LINK(iMaker->queryRoot()); // intentionally leak (avoid time clearing up)
}

void translateToXpath(const char *logicalfile, DfsXmlBranchKind tailType)
{
CDfsLogicalFileName lfn;
Expand Down
1 change: 1 addition & 0 deletions dali/daliadmin/daadmin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ namespace daadmin

extern DALIADMIN_API void setDaliConnectTimeoutMs(unsigned timeoutMs);
extern DALIADMIN_API void xmlSize(const char *filename, double pc);
extern DALIADMIN_API void loadXMLTest(const char *filename, bool parseOnly, bool useLowMemPTree, bool saveFormattedXML);
extern DALIADMIN_API void translateToXpath(const char *logicalfile, DfsXmlBranchKind tailType = DXB_File);

extern DALIADMIN_API void exportToFile(const char *path, const char *filename, bool safe = false);
Expand Down
15 changes: 15 additions & 0 deletions dali/daliadmin/daliadmin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ void usage(const char *exe)
printf(" mpping <server-ip> -- time MP connect\n");
printf(" daliping [ <num> ] -- time dali server connect\n");
printf(" getxref <destxmlfile> -- get all XREF information\n");
printf(" loadxml <srcxmlfile> [--lowmem[=<true|false]] -- use lowmem AtomPTree's\n"
" [--parseonly[=<true|false]] -- parse the xml file, don't load it into dali\n"
" [--savexml[=<true|false]] -- save and time the parsed xml\n");
printf(" dalilocks [ <ip-pattern> ] [ files ] -- get all locked files/xpaths\n");
printf(" unlock <xpath or logicalfile> <[path|file]> -- unlocks either matching xpath(s) or matching logical file(s), can contain wildcards\n");
printf(" validatestore [fix=<true|false>]\n"
Expand Down Expand Up @@ -226,6 +229,18 @@ int main(int argc, const char* argv[])
}
else if (strieq(cmd, "remotetest"))
remoteTest(params.item(1), false);
else if (strieq(cmd, "loadxml"))
{
bool useLowMemPTree = false;
bool saveFormatedTree = false;
bool parseOnly = getComponentConfigSP()->getPropBool("@parseonly");
if (!parseOnly)
{
useLowMemPTree = getComponentConfigSP()->getPropBool("@lowmem");
saveFormatedTree = getComponentConfigSP()->getPropBool("@savexml");
}
loadXMLTest(params.item(1), parseOnly, useLowMemPTree, saveFormatedTree);
}
else
{
UERRLOG("Unknown command %s",cmd);
Expand Down
2 changes: 1 addition & 1 deletion system/jlib/jhash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ bool HashTable::keyeq(const void *key1, const void *key2, int ksize) const

unsigned HashTable::hash(const void *key, int ksize) const
{
unsigned h = 0x811C9DC5;
unsigned h = fnvInitialHash32;
unsigned char *bp = (unsigned char *) key;
if (ksize<=0)
{
Expand Down
2 changes: 1 addition & 1 deletion system/jlib/jptree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3847,7 +3847,7 @@ unsigned CAtomPTree::queryHash() const
{
const char *_name = name.get();
size32_t nl = strlen(_name);
return isnocase() ? hashnc((const byte *) _name, nl, 0): hashc((const byte *) _name, nl, 0);
return isnocase() ? hashnc((const byte *) _name, nl, fnvInitialHash32): hashc((const byte *) _name, nl, fnvInitialHash32);
}
}

Expand Down
6 changes: 3 additions & 3 deletions system/jlib/jptree.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ protected:
virtual unsigned getHashFromElement(const void *e) const override;
virtual unsigned getHashFromFindParam(const void *fp) const override
{
return hashcz((const unsigned char *)fp, 0);
return hashcz((const unsigned char *)fp, fnvInitialHash32);
}
virtual bool matchesFindParam(const void *e, const void *fp, unsigned fphash) const override
{
Expand Down Expand Up @@ -108,7 +108,7 @@ public:
// SuperHashTable definitions
virtual unsigned getHashFromFindParam(const void *fp) const override
{
return hashncz((const unsigned char *)fp, 0);
return hashncz((const unsigned char *)fp, fnvInitialHash32);
}
virtual bool matchesFindParam(const void *e, const void *fp, unsigned fphash) const override
{
Expand Down Expand Up @@ -844,7 +844,7 @@ public:
const char *myname = queryName();
assert(myname);
size32_t nl = strlen(myname);
return isnocase() ? hashnc((const byte *)myname, nl, 0): hashc((const byte *)myname, nl, 0);
return isnocase() ? hashnc((const byte *)myname, nl, fnvInitialHash32): hashc((const byte *)myname, nl, fnvInitialHash32);
}
virtual void setName(const char *_name) override;
virtual void setAttribute(const char *attr, const char *val, bool encoded) override;
Expand Down
2 changes: 1 addition & 1 deletion system/jlib/jstats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1290,7 +1290,7 @@ void StatisticsMapping::createMappings()
}

const unsigned * kinds = indexToKind.getArray();
hashcode = hashc((const byte *)kinds, indexToKind.ordinality() * sizeof(unsigned), 0x811C9DC5);
hashcode = hashc((const byte *)kinds, indexToKind.ordinality() * sizeof(unsigned), fnvInitialHash32);

//All StatisticsMapping objects are assumed to be static, and never destroyed.
const StatisticsMapping * existing = allStatsMappings[hashcode];
Expand Down
22 changes: 11 additions & 11 deletions system/jlib/jsuperhash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
//#define MY_TRACE_HASH

#ifdef MY_TRACE_HASH
int my_search_tot = 0;
int my_search_num = 0;
static unsigned my_search_tot = 0;
static unsigned my_search_num = 0;
#endif

//-- SuperHashTable ---------------------------------------------------
Expand Down Expand Up @@ -104,13 +104,13 @@ void SuperHashTable::dumpStats() const
{
#ifdef TRACE_HASH
if (tablecount && search_tot && search_num)
printf("Hash table %d entries, %d size, average search length %d(%d/%d) max %d\n", tablecount, tablesize,
(int) (search_tot/search_num), search_tot, search_num, search_max);
printf("Hash table %u entries, %u size, average search length %" I64F "u(%" I64F "u/%u) max %u\n", tablecount, tablesize,
search_tot/search_num, search_tot, search_num, search_max);
#endif
}

#ifdef TRACE_HASH
void SuperHashTable::note_searchlen(int len) const
void SuperHashTable::note_searchlen(unsigned len) const
{
search_tot += len;
search_num++;
Expand Down Expand Up @@ -151,8 +151,8 @@ unsigned SuperHashTable::doFind(unsigned findHash, const void * findParam) const
}
#ifdef MY_TRACE_HASH
my_search_num++;
if(my_search_num != 0)
printf("Hash table average search length %d\n", (int) (my_search_tot/my_search_num));
if (my_search_num != 0)
printf("Hash table average search length %u\n", my_search_tot/my_search_num);
#endif
#ifdef TRACE_HASH
note_searchlen(searchlen);
Expand Down Expand Up @@ -193,8 +193,8 @@ unsigned SuperHashTable::doFindElement(unsigned v, const void * findET) const
}
#ifdef MY_TRACE_HASH
my_search_num++;
if(my_search_num != 0)
printf("Hash table average search length %d\n", (int) (my_search_tot/my_search_num));
if (my_search_num != 0)
printf("Hash table average search length %u\n", my_search_tot/my_search_num);
#endif
#ifdef TRACE_HASH
note_searchlen(searchlen);
Expand Down Expand Up @@ -233,8 +233,8 @@ unsigned SuperHashTable::doFindNew(unsigned v) const
}
#ifdef MY_TRACE_HASH
my_search_num++;
if(my_search_num != 0)
printf("Hash table average search length %d\n", (int) (my_search_tot/my_search_num));
if (my_search_num != 0)
printf("Hash table average search length %u\n", my_search_tot/my_search_num);
#endif
#ifdef TRACE_HASH
note_searchlen(searchlen);
Expand Down
18 changes: 10 additions & 8 deletions system/jlib/jsuperhash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include "jstring.hpp"
#include "jmutex.hpp"

constexpr unsigned fnvInitialHash32 = 0x811C9DC5;

extern jlib_decl unsigned hashc( const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashnc( const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashcz( const unsigned char *k, unsigned initval);
Expand Down Expand Up @@ -88,7 +90,7 @@ class jlib_decl SuperHashTable : public CInterface
void doKill(void);
void expand();
void expand(unsigned newsize);
void note_searchlen(int) const;
void note_searchlen(unsigned) const;

virtual void onAdd(void *et) = 0;
virtual void onRemove(void *et) = 0;
Expand All @@ -105,9 +107,9 @@ class jlib_decl SuperHashTable : public CInterface
unsigned tablesize;
unsigned tablecount;
#ifdef TRACE_HASH
mutable int search_tot;
mutable int search_num;
mutable int search_max;
mutable unsigned __int64 search_tot;
mutable unsigned search_num;
mutable unsigned search_max;
#endif
};

Expand Down Expand Up @@ -514,9 +516,9 @@ class jlib_decl AtomRefTable : public SuperHashTableOf<HashKeyElement, constchar
HashKeyElement *hke = (HashKeyElement *) checked_malloc(sizeof(HashKeyElement)+l+1,-605);
memcpy((void *) (hke->keyPtr()), key, l+1);
if (nocase)
hke->hashValue = hashnc((const unsigned char *)key, l, 0);
hke->hashValue = hashnc((const unsigned char *)key, l, fnvInitialHash32);
else
hke->hashValue = hashc((const unsigned char *)key, l, 0);
hke->hashValue = hashc((const unsigned char *)key, l, fnvInitialHash32);
hke->linkCount = 0;
return hke;
}
Expand Down Expand Up @@ -609,9 +611,9 @@ class jlib_decl AtomRefTable : public SuperHashTableOf<HashKeyElement, constchar
virtual unsigned getHashFromFindParam(const void *fp) const
{
if (nocase)
return hashncz((const unsigned char *)fp, 0);
return hashncz((const unsigned char *)fp, fnvInitialHash32);
else
return hashcz((const unsigned char *)fp, 0);
return hashcz((const unsigned char *)fp, fnvInitialHash32);
}

virtual const void *getFindParam(const void *e) const
Expand Down
Loading