diff --git a/docs/changelog/101202.yaml b/docs/changelog/101202.yaml new file mode 100644 index 0000000000000..565338a2dbb6e --- /dev/null +++ b/docs/changelog/101202.yaml @@ -0,0 +1,5 @@ +pr: 101202 +summary: Optimize `MurmurHash3` +area: "Ingest Node" +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/common/hash/Murmur3Hasher.java b/server/src/main/java/org/elasticsearch/common/hash/Murmur3Hasher.java index 0d4d6fd4f61f2..b85e3107ba09d 100644 --- a/server/src/main/java/org/elasticsearch/common/hash/Murmur3Hasher.java +++ b/server/src/main/java/org/elasticsearch/common/hash/Murmur3Hasher.java @@ -8,9 +8,6 @@ package org.elasticsearch.common.hash; -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.common.Numbers; - /** * Wraps {@link MurmurHash3} to provide an interface similar to {@link java.security.MessageDigest} that * allows hashing of byte arrays passed through multiple calls to {@link #update(byte[])}. Like @@ -35,38 +32,45 @@ public Murmur3Hasher(long seed) { /** * Supplies some or all of the bytes to be hashed. Multiple calls to this method may - * be made to sequentially supply the bytes for hashing. Once all bytes have been supplied, the - * {@link #digest()} method should be called to complete the hash calculation. + * be made to sequentially supply the bytes for hashing. Once all bytes have been supplied, either the + * {@link #digestHash} method (preferred) or the {@link #digest()} method should be called to complete the hash calculation. */ public void update(byte[] inputBytes) { - int totalLength = remainderLength + inputBytes.length; - if (totalLength >= 16) { - // hash as many bytes as available in integer multiples of 16 - int numBytesToHash = totalLength & 0xFFFFFFF0; - byte[] bytesToHash; + update(inputBytes, 0, inputBytes.length); + } + + private void update(byte[] inputBytes, int offset, int length) { + if (remainderLength + length >= remainder.length) { if (remainderLength > 0) { - bytesToHash = new byte[numBytesToHash]; - System.arraycopy(remainder, 0, bytesToHash, 0, remainderLength); - System.arraycopy(inputBytes, 0, bytesToHash, remainderLength, numBytesToHash - remainderLength); - } else { - bytesToHash = inputBytes; - } + // fill rest of remainder from inputBytes and hash remainder + int bytesToCopyFromInputToRemainder = remainder.length - remainderLength; + System.arraycopy(inputBytes, offset, remainder, remainderLength, bytesToCopyFromInputToRemainder); + offset = bytesToCopyFromInputToRemainder; + length = length - bytesToCopyFromInputToRemainder; - MurmurHash3.IntermediateResult result = MurmurHash3.intermediateHash(bytesToHash, 0, numBytesToHash, h1, h2); - h1 = result.h1; - h2 = result.h2; - this.length += numBytesToHash; + MurmurHash3.IntermediateResult result = MurmurHash3.intermediateHash(remainder, 0, remainder.length, h1, h2); + h1 = result.h1; + h2 = result.h2; + remainderLength = 0; + this.length += remainder.length; + } + // hash as many bytes as available in integer multiples of 16 as intermediateHash can only process multiples of 16 + int numBytesToHash = length & 0xFFFFFFF0; + if (numBytesToHash > 0) { + MurmurHash3.IntermediateResult result = MurmurHash3.intermediateHash(inputBytes, offset, numBytesToHash, h1, h2); + h1 = result.h1; + h2 = result.h2; + this.length += numBytesToHash; + } // save the remaining bytes, if any - if (totalLength > numBytesToHash) { - System.arraycopy(inputBytes, numBytesToHash - remainderLength, remainder, 0, totalLength - numBytesToHash); - remainderLength = totalLength - numBytesToHash; - } else { - remainderLength = 0; + if (length > numBytesToHash) { + this.remainderLength = length - numBytesToHash; + System.arraycopy(inputBytes, offset + numBytesToHash, remainder, 0, remainderLength); } } else { - System.arraycopy(inputBytes, 0, remainder, remainderLength, inputBytes.length); - remainderLength += inputBytes.length; + System.arraycopy(inputBytes, 0, remainder, remainderLength, length); + remainderLength += length; } } @@ -81,29 +85,30 @@ public void reset() { } /** - * Completes the hash of all bytes previously passed to {@link #update(byte[])}. + * Completes the hash of all bytes previously passed to {@link #update}. */ public byte[] digest() { - length += remainderLength; - MurmurHash3.Hash128 h = MurmurHash3.finalizeHash(new MurmurHash3.Hash128(), remainder, 0, length, h1, h2); - byte[] hash = new byte[16]; - System.arraycopy(Numbers.longToBytes(h.h1), 0, hash, 0, 8); - System.arraycopy(Numbers.longToBytes(h.h2), 0, hash, 8, 8); - return hash; + return digestHash().getBytes(); } - public static String getAlgorithm() { - return METHOD; + /** + * Completes the hash of all bytes previously passed to {@link #update}. + */ + public MurmurHash3.Hash128 digestHash() { + return digestHash(new MurmurHash3.Hash128()); } /** - * Converts the 128-bit byte array returned by {@link #digest()} to a - * {@link org.elasticsearch.common.hash.MurmurHash3.Hash128} + * Completes the hash of all bytes previously passed to {@link #update}. + * Allows passing in a re-usable {@link org.elasticsearch.common.hash.MurmurHash3.Hash128} instance to avoid allocations. */ - public static MurmurHash3.Hash128 toHash128(byte[] doubleLongBytes) { - MurmurHash3.Hash128 hash128 = new MurmurHash3.Hash128(); - hash128.h1 = Numbers.bytesToLong(new BytesRef(doubleLongBytes, 0, 8)); - hash128.h2 = Numbers.bytesToLong(new BytesRef(doubleLongBytes, 8, 8)); - return hash128; + public MurmurHash3.Hash128 digestHash(MurmurHash3.Hash128 hash) { + length += remainderLength; + MurmurHash3.finalizeHash(hash, remainder, 0, length, h1, h2); + return hash; + } + + public static String getAlgorithm() { + return METHOD; } } diff --git a/server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java b/server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java index 903b7a080a6ca..6d6fdbc45ec99 100644 --- a/server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java +++ b/server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java @@ -8,7 +8,6 @@ package org.elasticsearch.common.hash; -import org.elasticsearch.common.Numbers; import org.elasticsearch.common.util.ByteUtils; import java.math.BigInteger; @@ -29,6 +28,17 @@ public static class Hash128 { /** higher 64 bits part **/ public long h2; + public byte[] getBytes() { + byte[] hash = new byte[16]; + getBytes(hash, 0); + return hash; + } + + public void getBytes(byte[] bytes, int offset) { + ByteUtils.writeLongBE(h1, bytes, offset); + ByteUtils.writeLongBE(h2, bytes, offset + 8); + } + @Override public boolean equals(Object other) { if (this == other) { @@ -49,8 +59,7 @@ public int hashCode() { @Override public String toString() { byte[] longBytes = new byte[17]; - System.arraycopy(Numbers.longToBytes(h1), 0, longBytes, 1, 8); - System.arraycopy(Numbers.longToBytes(h2), 0, longBytes, 9, 8); + getBytes(longBytes, 1); BigInteger bi = new BigInteger(longBytes); return "0x" + bi.toString(16); } diff --git a/server/src/test/java/org/elasticsearch/common/hashing/Murmur3HasherTests.java b/server/src/test/java/org/elasticsearch/common/hashing/Murmur3HasherTests.java index 8574f8debb8c0..fdebec676192c 100644 --- a/server/src/test/java/org/elasticsearch/common/hashing/Murmur3HasherTests.java +++ b/server/src/test/java/org/elasticsearch/common/hashing/Murmur3HasherTests.java @@ -8,14 +8,13 @@ package org.elasticsearch.common.hashing; +import org.elasticsearch.common.Numbers; import org.elasticsearch.common.hash.Murmur3Hasher; import org.elasticsearch.common.hash.MurmurHash3; import org.elasticsearch.test.ESTestCase; import java.nio.charset.StandardCharsets; -import static org.hamcrest.Matchers.equalTo; - public class Murmur3HasherTests extends ESTestCase { public void testKnownValues() { @@ -37,13 +36,21 @@ private static void assertHash(long lower, long upper, String inputString, long byte[] bytes = inputString.getBytes(StandardCharsets.UTF_8); Murmur3Hasher mh = new Murmur3Hasher(seed); mh.update(bytes); - MurmurHash3.Hash128 actual = Murmur3Hasher.toHash128(mh.digest()); + MurmurHash3.Hash128 actual = mh.digestHash(); assertHash(expected, actual); } private static void assertHash(MurmurHash3.Hash128 expected, MurmurHash3.Hash128 actual) { assertEquals(expected.h1, actual.h1); assertEquals(expected.h2, actual.h2); + assertEquals(expected, toHash128(expected.getBytes())); + } + + public static MurmurHash3.Hash128 toHash128(byte[] doubleLongBytes) { + MurmurHash3.Hash128 hash128 = new MurmurHash3.Hash128(); + hash128.h1 = Numbers.bytesToLong(doubleLongBytes, 0); + hash128.h2 = Numbers.bytesToLong(doubleLongBytes, 8); + return hash128; } public void testSingleVsSequentialMurmur3() { @@ -85,7 +92,7 @@ public void testSingleVsSequentialMurmur3() { mh.update(splitBytes[k]); } } - MurmurHash3.Hash128 sequentialHash = Murmur3Hasher.toHash128(mh.digest()); - assertThat(singleHash, equalTo(sequentialHash)); + MurmurHash3.Hash128 sequentialHash = mh.digestHash(); + assertHash(singleHash, sequentialHash); } }