-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement BloomFilter query pushdown optimization (#271)
* Add pushdown optimization by painless script Signed-off-by: Chen Dai <[email protected]> * Update UT and indent script code Signed-off-by: Chen Dai <[email protected]> * Minor refactor painless script Signed-off-by: Chen Dai <[email protected]> * Fix broken IT Signed-off-by: Chen Dai <[email protected]> --------- Signed-off-by: Chen Dai <[email protected]>
- Loading branch information
Showing
6 changed files
with
172 additions
and
4 deletions.
There are no files selected for viewing
109 changes: 109 additions & 0 deletions
109
flint-spark-integration/src/main/resources/bloom_filter_query.script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
int hashLong(long input, int seed) { | ||
int low = (int) input; | ||
int high = (int) (input >>> 32); | ||
int k1 = mixK1(low); | ||
int h1 = mixH1(seed, k1); | ||
k1 = mixK1(high); | ||
h1 = mixH1(h1, k1); | ||
return fmix(h1, 8); | ||
} | ||
|
||
int mixK1(int k1) { | ||
k1 *= 0xcc9e2d51L; | ||
k1 = Integer.rotateLeft(k1, 15); | ||
k1 *= 0x1b873593L; | ||
return k1; | ||
} | ||
|
||
int mixH1(int h1, int k1) { | ||
h1 ^= k1; | ||
h1 = Integer.rotateLeft(h1, 13); | ||
h1 = h1 * 5 + (int) 0xe6546b64L; | ||
return h1; | ||
} | ||
|
||
int fmix(int h1, int length) { | ||
h1 ^= length; | ||
h1 ^= h1 >>> 16; | ||
h1 *= 0x85ebca6bL; | ||
h1 ^= h1 >>> 13; | ||
h1 *= 0xc2b2ae35L; | ||
h1 ^= h1 >>> 16; | ||
return h1; | ||
} | ||
|
||
BytesRef bfBytes = doc[params.fieldName].value; | ||
byte[] buf = bfBytes.bytes; | ||
int pos = 0; | ||
int count = buf.length; | ||
int ch1 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
int ch2 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
int ch3 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
int ch4 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
int version = ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)); | ||
ch1 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
ch2 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
ch3 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
ch4 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
int numHashFunctions = ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)); | ||
ch1 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
ch2 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
ch3 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
ch4 = (pos < count) ? (buf[pos++] & (int) 0xffL) : -1; | ||
int numWords = ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)); | ||
|
||
long[] data = new long[numWords]; | ||
byte[] readBuffer = new byte[8]; | ||
for (int i = 0; i < numWords; i++) { | ||
int n = 0; | ||
while (n < 8) { | ||
int readCount; | ||
int off = n; | ||
int len = 8 - n; | ||
if (pos >= count) { | ||
readCount = -1; | ||
} else { | ||
int avail = count - pos; | ||
if (len > avail) { | ||
len = avail; | ||
} | ||
if (len <= 0) { | ||
readCount = 0; | ||
} else { | ||
System.arraycopy(buf, pos, readBuffer, off, len); | ||
pos += len; | ||
readCount = len; | ||
} | ||
} | ||
n += readCount; | ||
} | ||
data[i] = (((long) readBuffer[0] << 56) + | ||
((long) (readBuffer[1] & 255) << 48) + | ||
((long) (readBuffer[2] & 255) << 40) + | ||
((long) (readBuffer[3] & 255) << 32) + | ||
((long) (readBuffer[4] & 255) << 24) + | ||
((readBuffer[5] & 255) << 16) + | ||
((readBuffer[6] & 255) << 8) + | ||
((readBuffer[7] & 255) << 0)); | ||
} | ||
|
||
long bitCount = 0; | ||
for (long word : data) { | ||
bitCount += Long.bitCount(word); | ||
} | ||
|
||
long item = params.value; | ||
int h1 = hashLong(item, 0); | ||
int h2 = hashLong(item, h1); | ||
long bitSize = (long) data.length * Long.SIZE; | ||
for (int i = 1; i <= numHashFunctions; i++) { | ||
int combinedHash = h1 + (i * h2); | ||
if (combinedHash < 0) { | ||
combinedHash = ~combinedHash; | ||
} | ||
long index = combinedHash % bitSize; | ||
if ((data[(int) (index >>> 6)] & (1L << index)) == 0) { | ||
return false; | ||
} | ||
} | ||
return true; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters