Skip to content

Commit

Permalink
Optimize MurmurHash3 (elastic#101202)
Browse files Browse the repository at this point in the history
- Makes Murmur3Hasher.update allocation free by not allocating a byte[]
- Adds digestHash methods that don't return a newly allocated byte[]. The method takes an optional and re-usable Hash128 instance.
- Hash128 adds a getBytes method that takes an optional target byte[] to avoid allocations
  • Loading branch information
felixbarny authored Oct 27, 2023
1 parent 3a806b4 commit 6571d39
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 51 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/101202.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 101202
summary: Optimize `MurmurHash3`
area: "Ingest Node"
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@

package org.elasticsearch.common.hash;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Numbers;

/**
* Wraps {@link MurmurHash3} to provide an interface similar to {@link java.security.MessageDigest} that
* allows hashing of byte arrays passed through multiple calls to {@link #update(byte[])}. Like
Expand All @@ -35,38 +32,45 @@ public Murmur3Hasher(long seed) {

/**
* Supplies some or all of the bytes to be hashed. Multiple calls to this method may
* be made to sequentially supply the bytes for hashing. Once all bytes have been supplied, the
* {@link #digest()} method should be called to complete the hash calculation.
* be made to sequentially supply the bytes for hashing. Once all bytes have been supplied, either the
* {@link #digestHash} method (preferred) or the {@link #digest()} method should be called to complete the hash calculation.
*/
public void update(byte[] inputBytes) {
int totalLength = remainderLength + inputBytes.length;
if (totalLength >= 16) {
// hash as many bytes as available in integer multiples of 16
int numBytesToHash = totalLength & 0xFFFFFFF0;
byte[] bytesToHash;
update(inputBytes, 0, inputBytes.length);
}

private void update(byte[] inputBytes, int offset, int length) {
if (remainderLength + length >= remainder.length) {
if (remainderLength > 0) {
bytesToHash = new byte[numBytesToHash];
System.arraycopy(remainder, 0, bytesToHash, 0, remainderLength);
System.arraycopy(inputBytes, 0, bytesToHash, remainderLength, numBytesToHash - remainderLength);
} else {
bytesToHash = inputBytes;
}
// fill rest of remainder from inputBytes and hash remainder
int bytesToCopyFromInputToRemainder = remainder.length - remainderLength;
System.arraycopy(inputBytes, offset, remainder, remainderLength, bytesToCopyFromInputToRemainder);
offset = bytesToCopyFromInputToRemainder;
length = length - bytesToCopyFromInputToRemainder;

MurmurHash3.IntermediateResult result = MurmurHash3.intermediateHash(bytesToHash, 0, numBytesToHash, h1, h2);
h1 = result.h1;
h2 = result.h2;
this.length += numBytesToHash;
MurmurHash3.IntermediateResult result = MurmurHash3.intermediateHash(remainder, 0, remainder.length, h1, h2);
h1 = result.h1;
h2 = result.h2;
remainderLength = 0;
this.length += remainder.length;
}
// hash as many bytes as available in integer multiples of 16 as intermediateHash can only process multiples of 16
int numBytesToHash = length & 0xFFFFFFF0;
if (numBytesToHash > 0) {
MurmurHash3.IntermediateResult result = MurmurHash3.intermediateHash(inputBytes, offset, numBytesToHash, h1, h2);
h1 = result.h1;
h2 = result.h2;
this.length += numBytesToHash;
}

// save the remaining bytes, if any
if (totalLength > numBytesToHash) {
System.arraycopy(inputBytes, numBytesToHash - remainderLength, remainder, 0, totalLength - numBytesToHash);
remainderLength = totalLength - numBytesToHash;
} else {
remainderLength = 0;
if (length > numBytesToHash) {
this.remainderLength = length - numBytesToHash;
System.arraycopy(inputBytes, offset + numBytesToHash, remainder, 0, remainderLength);
}
} else {
System.arraycopy(inputBytes, 0, remainder, remainderLength, inputBytes.length);
remainderLength += inputBytes.length;
System.arraycopy(inputBytes, 0, remainder, remainderLength, length);
remainderLength += length;
}
}

Expand All @@ -81,29 +85,30 @@ public void reset() {
}

/**
* Completes the hash of all bytes previously passed to {@link #update(byte[])}.
* Completes the hash of all bytes previously passed to {@link #update}.
*/
public byte[] digest() {
length += remainderLength;
MurmurHash3.Hash128 h = MurmurHash3.finalizeHash(new MurmurHash3.Hash128(), remainder, 0, length, h1, h2);
byte[] hash = new byte[16];
System.arraycopy(Numbers.longToBytes(h.h1), 0, hash, 0, 8);
System.arraycopy(Numbers.longToBytes(h.h2), 0, hash, 8, 8);
return hash;
return digestHash().getBytes();
}

public static String getAlgorithm() {
return METHOD;
/**
* Completes the hash of all bytes previously passed to {@link #update}.
*/
public MurmurHash3.Hash128 digestHash() {
return digestHash(new MurmurHash3.Hash128());
}

/**
* Converts the 128-bit byte array returned by {@link #digest()} to a
* {@link org.elasticsearch.common.hash.MurmurHash3.Hash128}
* Completes the hash of all bytes previously passed to {@link #update}.
* Allows passing in a re-usable {@link org.elasticsearch.common.hash.MurmurHash3.Hash128} instance to avoid allocations.
*/
public static MurmurHash3.Hash128 toHash128(byte[] doubleLongBytes) {
MurmurHash3.Hash128 hash128 = new MurmurHash3.Hash128();
hash128.h1 = Numbers.bytesToLong(new BytesRef(doubleLongBytes, 0, 8));
hash128.h2 = Numbers.bytesToLong(new BytesRef(doubleLongBytes, 8, 8));
return hash128;
public MurmurHash3.Hash128 digestHash(MurmurHash3.Hash128 hash) {
length += remainderLength;
MurmurHash3.finalizeHash(hash, remainder, 0, length, h1, h2);
return hash;
}

public static String getAlgorithm() {
return METHOD;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

package org.elasticsearch.common.hash;

import org.elasticsearch.common.Numbers;
import org.elasticsearch.common.util.ByteUtils;

import java.math.BigInteger;
Expand All @@ -29,6 +28,17 @@ public static class Hash128 {
/** higher 64 bits part **/
public long h2;

public byte[] getBytes() {
byte[] hash = new byte[16];
getBytes(hash, 0);
return hash;
}

public void getBytes(byte[] bytes, int offset) {
ByteUtils.writeLongBE(h1, bytes, offset);
ByteUtils.writeLongBE(h2, bytes, offset + 8);
}

@Override
public boolean equals(Object other) {
if (this == other) {
Expand All @@ -49,8 +59,7 @@ public int hashCode() {
@Override
public String toString() {
byte[] longBytes = new byte[17];
System.arraycopy(Numbers.longToBytes(h1), 0, longBytes, 1, 8);
System.arraycopy(Numbers.longToBytes(h2), 0, longBytes, 9, 8);
getBytes(longBytes, 1);
BigInteger bi = new BigInteger(longBytes);
return "0x" + bi.toString(16);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@

package org.elasticsearch.common.hashing;

import org.elasticsearch.common.Numbers;
import org.elasticsearch.common.hash.Murmur3Hasher;
import org.elasticsearch.common.hash.MurmurHash3;
import org.elasticsearch.test.ESTestCase;

import java.nio.charset.StandardCharsets;

import static org.hamcrest.Matchers.equalTo;

public class Murmur3HasherTests extends ESTestCase {

public void testKnownValues() {
Expand All @@ -37,13 +36,21 @@ private static void assertHash(long lower, long upper, String inputString, long
byte[] bytes = inputString.getBytes(StandardCharsets.UTF_8);
Murmur3Hasher mh = new Murmur3Hasher(seed);
mh.update(bytes);
MurmurHash3.Hash128 actual = Murmur3Hasher.toHash128(mh.digest());
MurmurHash3.Hash128 actual = mh.digestHash();
assertHash(expected, actual);
}

private static void assertHash(MurmurHash3.Hash128 expected, MurmurHash3.Hash128 actual) {
assertEquals(expected.h1, actual.h1);
assertEquals(expected.h2, actual.h2);
assertEquals(expected, toHash128(expected.getBytes()));
}

public static MurmurHash3.Hash128 toHash128(byte[] doubleLongBytes) {
MurmurHash3.Hash128 hash128 = new MurmurHash3.Hash128();
hash128.h1 = Numbers.bytesToLong(doubleLongBytes, 0);
hash128.h2 = Numbers.bytesToLong(doubleLongBytes, 8);
return hash128;
}

public void testSingleVsSequentialMurmur3() {
Expand Down Expand Up @@ -85,7 +92,7 @@ public void testSingleVsSequentialMurmur3() {
mh.update(splitBytes[k]);
}
}
MurmurHash3.Hash128 sequentialHash = Murmur3Hasher.toHash128(mh.digest());
assertThat(singleHash, equalTo(sequentialHash));
MurmurHash3.Hash128 sequentialHash = mh.digestHash();
assertHash(singleHash, sequentialHash);
}
}

0 comments on commit 6571d39

Please sign in to comment.