Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HPCC-33126 Add FNV-1a hashing function and use in jptree #19361

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions dali/daliadmin/daadmin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2472,7 +2472,7 @@ void xmlSize(const char *filename, double pc)
}
}

void loadXMLTest(const char *filename, bool parseOnly, bool useLowMemPTree, bool saveFormattedXML)
void loadXMLTest(const char *filename, bool parseOnly, bool useLowMemPTree, bool saveFormattedXML, bool freePTree)
{
OwnedIFile iFile = createIFile(filename);
OwnedIFileIO iFileIO = iFile->open(IFOread);
Expand Down Expand Up @@ -2544,7 +2544,8 @@ void loadXMLTest(const char *filename, bool parseOnly, bool useLowMemPTree, bool
PROGLOG("Save took: %.2f", (float)timer.elapsedMs()/1000);
}

::LINK(iMaker->queryRoot()); // intentionally leak (avoid time clearing up)
if (!freePTree)
::LINK(iMaker->queryRoot()); // intentionally leak (avoid time clearing up)
}

void translateToXpath(const char *logicalfile, DfsXmlBranchKind tailType)
Expand Down
2 changes: 1 addition & 1 deletion dali/daliadmin/daadmin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace daadmin

extern DALIADMIN_API void setDaliConnectTimeoutMs(unsigned timeoutMs);
extern DALIADMIN_API void xmlSize(const char *filename, double pc);
extern DALIADMIN_API void loadXMLTest(const char *filename, bool parseOnly, bool useLowMemPTree, bool saveFormattedXML);
extern DALIADMIN_API void loadXMLTest(const char *filename, bool parseOnly, bool useLowMemPTree, bool saveFormattedXML, bool freePTree);
extern DALIADMIN_API void translateToXpath(const char *logicalfile, DfsXmlBranchKind tailType = DXB_File);

extern DALIADMIN_API void exportToFile(const char *path, const char *filename, bool safe = false);
Expand Down
4 changes: 3 additions & 1 deletion dali/daliadmin/daliadmin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,15 @@ int main(int argc, const char* argv[])
{
bool useLowMemPTree = false;
bool saveFormatedTree = false;
bool freePTree = false;
bool parseOnly = getComponentConfigSP()->getPropBool("@parseonly");
if (!parseOnly)
{
useLowMemPTree = getComponentConfigSP()->getPropBool("@lowmem");
saveFormatedTree = getComponentConfigSP()->getPropBool("@savexml");
freePTree = getComponentConfigSP()->getPropBool("@free");
}
loadXMLTest(params.item(1), parseOnly, useLowMemPTree, saveFormatedTree);
loadXMLTest(params.item(1), parseOnly, useLowMemPTree, saveFormatedTree, freePTree);
}
else
{
Expand Down
145 changes: 132 additions & 13 deletions system/jlib/jhash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,14 @@ MODULE_EXIT()
// do not want these in your leak checking call releaseAtoms()
}

#define HASHONE(hash, c) { hash *= 0x01000193; hash ^= c; } // Fowler/Noll/Vo Hash... seems to work pretty well, and fast
#define HASHONE(hash, c) { hash *= fnvPrime32; hash ^= c; } // Fowler/Noll/Vo Hash... seems to work pretty well, and fast
#define HASHONE_FNV1a(hash, c) { hash ^= c; hash *= fnvPrime32; }

//Following is potentially quicker, but not tested
//#define HASHONE(hash, c) { hash += (hash<<1) + (hash<<4) + (hash<<7) + (hash<<8) + (hash<<24); hash ^= c; }

unsigned hashc( const unsigned char *k, unsigned length, unsigned initval)
// deprecatedHash* functions (non template versions) kept for unittest only
unsigned deprecatedHashc(const unsigned char *k, unsigned length, unsigned initval)
{
unsigned hash = initval;
unsigned char c;
Expand All @@ -94,7 +96,7 @@ unsigned hashc( const unsigned char *k, unsigned length, unsigned initval)
return hash;
}

unsigned hashcz( const unsigned char *k, unsigned initval)
unsigned deprecatedHashcz(const unsigned char *k, unsigned initval)
{
unsigned hash = initval;
for (;;)
Expand All @@ -107,8 +109,87 @@ unsigned hashcz( const unsigned char *k, unsigned initval)
return hash;
}

template <typename T>
inline unsigned doHashValue( T value, unsigned initval)
// FNV Hash Operations
struct FNV1
{
static inline void apply(unsigned &hash, unsigned char c)
{
hash *= fnvPrime32;
hash ^= c;
}
};

struct FNV1a
{
static inline void apply(unsigned &hash, unsigned char c)
{
hash ^= c;
hash *= fnvPrime32;
}
};

template <typename HashPolicy>
unsigned policyHashc(const unsigned char *k, unsigned length, unsigned initval)
{
unsigned hash = initval;
unsigned char c;
while (length >= 8)
{
c = (*k++); HashPolicy::apply(hash, c);
c = (*k++); HashPolicy::apply(hash, c);
c = (*k++); HashPolicy::apply(hash, c);
c = (*k++); HashPolicy::apply(hash, c);
length-=4;
}
switch (length)
{
case 7: c = (*k++); HashPolicy::apply(hash, c); // fallthrough
case 6: c = (*k++); HashPolicy::apply(hash, c); // fallthrough
case 5: c = (*k++); HashPolicy::apply(hash, c); // fallthrough
case 4: c = (*k++); HashPolicy::apply(hash, c); // fallthrough
case 3: c = (*k++); HashPolicy::apply(hash, c); // fallthrough
case 2: c = (*k++); HashPolicy::apply(hash, c); // fallthrough
case 1: c = (*k++); HashPolicy::apply(hash, c);
}
return hash;
}

template <typename HashPolicy>
unsigned policyHashcz(const unsigned char *k, unsigned initval)
{
unsigned hash = initval;
for (;;)
{
unsigned char c = (*k++);
if (c == 0)
break;
HashPolicy::apply(hash, c);
}
return hash;
}

unsigned hashc(const unsigned char *k, unsigned length, unsigned initval)
{
return policyHashc<FNV1>(k, length, initval);
}

unsigned hashcz(const unsigned char *k, unsigned initval)
{
return policyHashcz<FNV1>(k, initval);
}

unsigned hashc_fnv1a(const unsigned char *k, unsigned length, unsigned initval)
{
return policyHashc<FNV1a>(k, length, initval);
}

unsigned hashcz_fnv1a(const unsigned char *k, unsigned initval)
{
return policyHashcz<FNV1a>(k, initval);
}

template <typename HashPolicy, typename T>
inline unsigned doHashValue(T value, unsigned initval)
{
//The values returned from this function are only consistent with those from hashn() if running on little endian architecture
unsigned hash = initval;
Expand All @@ -117,30 +198,47 @@ inline unsigned doHashValue( T value, unsigned initval)
{
c = (byte)value;
value >>= 8;
HASHONE(hash, c);
HashPolicy::apply(hash, c);
}
return hash;
}

unsigned hashvalue( unsigned value, unsigned initval)
unsigned hashvalue(unsigned value, unsigned initval)
{
return doHashValue(value, initval);
return doHashValue<FNV1>(value, initval);
}

unsigned hashvalue( unsigned __int64 value, unsigned initval)
{
return doHashValue(value, initval);
return doHashValue<FNV1>(value, initval);
}

unsigned hashvalue( const void * value, unsigned initval)
{
return doHashValue((memsize_t)value, initval);
return doHashValue<FNV1>((memsize_t)value, initval);
}

unsigned hashvalue_fnv1a(unsigned value, unsigned initval)
{
return doHashValue<FNV1a>(value, initval);
}

unsigned hashvalue_fnv1a(unsigned __int64 value, unsigned initval)
{
return doHashValue<FNV1a>(value, initval);
}

unsigned hashvalue_fnv1a(const void * value, unsigned initval)
{
return doHashValue<FNV1a>((memsize_t)value, initval);
}


#define GETWORDNC(k,n) ((GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))&0xdfdfdfdf)


unsigned hashnc( const unsigned char *k, unsigned length, unsigned initval)
template <typename HashPolicy>
unsigned policyHashnc(const unsigned char *k, unsigned length, unsigned initval)
{
unsigned hash = initval;
unsigned char c;
Expand All @@ -165,8 +263,8 @@ unsigned hashnc( const unsigned char *k, unsigned length, unsigned initval)
return hash;
}


unsigned hashncz( const unsigned char *k, unsigned initval)
template <typename HashPolicy>
unsigned policyHashncz(const unsigned char *k, unsigned initval)
{
unsigned hash = initval;
for (;;)
Expand All @@ -180,6 +278,26 @@ unsigned hashncz( const unsigned char *k, unsigned initval)
return hash;
}

unsigned hashnc(const unsigned char *k, unsigned length, unsigned initval)
{
return policyHashnc<FNV1>(k, length, initval);
}

unsigned hashncz(const unsigned char *k, unsigned initval)
{
return policyHashncz<FNV1>(k, initval);
}

unsigned hashnc_fnv1a(const unsigned char *k, unsigned length, unsigned initval)
{
return policyHashnc<FNV1a>(k, length, initval);
}

unsigned hashncz_fnv1a(const unsigned char *k, unsigned initval)
{
return policyHashncz<FNV1a>(k, initval);
}

MappingKey::MappingKey(const void * inKey, int keysize)
{
int ksm = keysize;
Expand All @@ -201,6 +319,7 @@ MappingKey::MappingKey(const void * inKey, int keysize)
key = temp;
}


//-- Mapping ---------------------------------------------------

unsigned MappingBase::getHash() const { return hash; }
Expand Down
26 changes: 19 additions & 7 deletions system/jlib/jhash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,13 +425,25 @@ extern jlib_decl IIdAtom * createIdAtom(const char *value);
extern jlib_decl IIdAtom * createIdAtom(const char *value, size32_t len);

extern jlib_decl void releaseAtoms();
extern jlib_decl unsigned hashc( const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashnc( const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashcz( const unsigned char *k, unsigned initval);
extern jlib_decl unsigned hashncz( const unsigned char *k, unsigned initval);
extern jlib_decl unsigned hashvalue( unsigned value, unsigned initval);
extern jlib_decl unsigned hashvalue( unsigned __int64 value, unsigned initval);
extern jlib_decl unsigned hashvalue( const void * value, unsigned initval);

extern jlib_decl unsigned deprecatedHashc(const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned deprecatedHashcz(const unsigned char *k, unsigned initval);

extern jlib_decl unsigned hashc(const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashnc(const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashcz(const unsigned char *k, unsigned initval);
extern jlib_decl unsigned hashncz(const unsigned char *k, unsigned initval);
extern jlib_decl unsigned hashvalue(unsigned value, unsigned initval);
extern jlib_decl unsigned hashvalue(unsigned __int64 value, unsigned initval);
extern jlib_decl unsigned hashvalue(const void * value, unsigned initval);

extern jlib_decl unsigned hashc_fnv1a(const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashnc_fnv1a(const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashcz_fnv1a(const unsigned char *k, unsigned initval);
extern jlib_decl unsigned hashncz_fnv1a(const unsigned char *k, unsigned initval);
extern jlib_decl unsigned hashvalue_fnv1a(unsigned value, unsigned initval);
extern jlib_decl unsigned hashvalue_fnv1a(unsigned __int64 value, unsigned initval);
extern jlib_decl unsigned hashvalue_fnv1a(const void * value, unsigned initval);

//================================================
// Minimal Hash table template - slightly less overhead that HashTable/SuperHashTable
Expand Down
2 changes: 1 addition & 1 deletion system/jlib/jlzw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ void CLZWCompressor::open(void *buf,size32_t max)



#define HASHC(code,ch) (((0x01000193*(unsigned)code)^(unsigned char)ch)%LZW_HASH_TABLE_SIZE)
#define HASHC(code,ch) (((fnvPrime32*(unsigned)code)^(unsigned char)ch)%LZW_HASH_TABLE_SIZE)

#define BE_MEMCPY4(dst,src) { if (supportbigendian) _WINCPYREV4(dst,src); else memcpy(dst,src,4); }

Expand Down
2 changes: 1 addition & 1 deletion system/jlib/jptree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3847,7 +3847,7 @@ unsigned CAtomPTree::queryHash() const
{
const char *_name = name.get();
size32_t nl = strlen(_name);
return isnocase() ? hashnc((const byte *) _name, nl, fnvInitialHash32): hashc((const byte *) _name, nl, fnvInitialHash32);
return isnocase() ? hashnc_fnv1a((const byte *) _name, nl, fnvInitialHash32): hashc_fnv1a((const byte *) _name, nl, fnvInitialHash32);
}
}

Expand Down
6 changes: 3 additions & 3 deletions system/jlib/jptree.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ protected:
virtual unsigned getHashFromElement(const void *e) const override;
virtual unsigned getHashFromFindParam(const void *fp) const override
{
return hashcz((const unsigned char *)fp, fnvInitialHash32);
return hashcz_fnv1a((const unsigned char *)fp, fnvInitialHash32);
}
virtual bool matchesFindParam(const void *e, const void *fp, unsigned fphash) const override
{
Expand Down Expand Up @@ -108,7 +108,7 @@ public:
// SuperHashTable definitions
virtual unsigned getHashFromFindParam(const void *fp) const override
{
return hashncz((const unsigned char *)fp, fnvInitialHash32);
return hashncz_fnv1a((const unsigned char *)fp, fnvInitialHash32);
}
virtual bool matchesFindParam(const void *e, const void *fp, unsigned fphash) const override
{
Expand Down Expand Up @@ -844,7 +844,7 @@ public:
const char *myname = queryName();
assert(myname);
size32_t nl = strlen(myname);
return isnocase() ? hashnc((const byte *)myname, nl, fnvInitialHash32): hashc((const byte *)myname, nl, fnvInitialHash32);
return isnocase() ? hashnc_fnv1a((const byte *)myname, nl, fnvInitialHash32): hashc_fnv1a((const byte *)myname, nl, fnvInitialHash32);
}
virtual void setName(const char *_name) override;
virtual void setAttribute(const char *attr, const char *val, bool encoded) override;
Expand Down
13 changes: 9 additions & 4 deletions system/jlib/jsuperhash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,16 @@
#include "jmutex.hpp"

constexpr unsigned fnvInitialHash32 = 0x811C9DC5;
constexpr unsigned fnvPrime32 = 0x01000193;

extern jlib_decl unsigned hashc( const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashnc( const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashcz( const unsigned char *k, unsigned initval);
extern jlib_decl unsigned hashncz( const unsigned char *k, unsigned initval);
extern jlib_decl unsigned hashc_fnv1a(const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashnc_fnv1a(const unsigned char *k, unsigned length, unsigned initval);
extern jlib_decl unsigned hashcz_fnv1a(const unsigned char *k, unsigned initval);
extern jlib_decl unsigned hashncz_fnv1a(const unsigned char *k, unsigned initval);

class jlib_decl SuperHashTable : public CInterface
{
Expand Down Expand Up @@ -516,9 +521,9 @@ class jlib_decl AtomRefTable : public SuperHashTableOf<HashKeyElement, constchar
HashKeyElement *hke = (HashKeyElement *) checked_malloc(sizeof(HashKeyElement)+l+1,-605);
memcpy((void *) (hke->keyPtr()), key, l+1);
if (nocase)
hke->hashValue = hashnc((const unsigned char *)key, l, fnvInitialHash32);
hke->hashValue = hashnc_fnv1a((const unsigned char *)key, l, fnvInitialHash32);
else
hke->hashValue = hashc((const unsigned char *)key, l, fnvInitialHash32);
hke->hashValue = hashc_fnv1a((const unsigned char *)key, l, fnvInitialHash32);
hke->linkCount = 0;
return hke;
}
Expand Down Expand Up @@ -611,9 +616,9 @@ class jlib_decl AtomRefTable : public SuperHashTableOf<HashKeyElement, constchar
virtual unsigned getHashFromFindParam(const void *fp) const
{
if (nocase)
return hashncz((const unsigned char *)fp, fnvInitialHash32);
return hashncz_fnv1a((const unsigned char *)fp, fnvInitialHash32);
else
return hashcz((const unsigned char *)fp, fnvInitialHash32);
return hashcz_fnv1a((const unsigned char *)fp, fnvInitialHash32);
}

virtual const void *getFindParam(const void *e) const
Expand Down
Loading
Loading