From b2ec3ef4e6bb0d4600ea52d504b645fc0aa2dfa2 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Thu, 11 Apr 2024 14:56:31 +0530 Subject: [PATCH] Add exponential scaling FNV composite value hash algorithm for remote path Signed-off-by: Ashish Singh --- .../remotestore/RemoteRestoreSnapshotIT.java | 6 +- .../index/remote/RemoteStoreEnums.java | 19 +++- .../RemoteStorePathStrategyResolver.java | 2 +- .../index/remote/RemoteStoreUtils.java | 65 ++++++++++++- .../MetadataCreateIndexServiceTests.java | 2 +- .../index/remote/RemoteStoreEnumsTests.java | 56 +++++------ .../index/remote/RemoteStoreUtilsTests.java | 96 ++++++++++++++++++- ...oteStoreShardShallowCopySnapshotTests.java | 10 +- .../RemoteSegmentStoreDirectoryTests.java | 2 +- 9 files changed, 213 insertions(+), 45 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java index d34a5f4edbaec..c06cffd389967 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java @@ -272,13 +272,13 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() { .get(); assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status()); ensureGreen(restoredIndexName1version2); - validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A); + validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64); // Create index with cluster setting cluster.remote_store.index.path.prefix.type as hashed_prefix. indexSettings = getIndexSettings(1, 0).build(); createIndex(indexName2, indexSettings); ensureGreen(indexName2); - validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A); + validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64); // Validating that custom data has not changed for indexes which were created before the cluster setting got updated validatePathType(indexName1, PathType.FIXED); @@ -309,7 +309,7 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() { ensureGreen(indexName2); // Validating that custom data has not changed for testindex2 which was created before the cluster setting got updated - validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A); + validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64); } private void validatePathType(String index, PathType pathType) { diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java index b51abf19fc000..d95246ee7aa3c 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java @@ -23,6 +23,8 @@ import static java.util.Collections.unmodifiableMap; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; +import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeUrlBase64AndBinaryEncodingUsing20Bits; +import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64; /** * This class contains the different enums related to remote store like data categories and types, path types @@ -216,13 +218,26 @@ public static PathType parseString(String pathType) { @PublicApi(since = "2.14.0") public enum PathHashAlgorithm { - FNV_1A(0) { + FNV_1A_BASE64(0) { @Override String hash(PathInput pathInput) { String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType() .getName(); long hash = FNV1a.hash64(input); - return RemoteStoreUtils.longToUrlBase64(hash); + return longToUrlBase64(hash); + } + }, + /** + * This hash algorithm will generate a hash value which will use 1st 6 bits to create bas64 character and next 14 + * bits to create binary string. + */ + FNV_1A_COMPOSITE(1) { + @Override + String hash(PathInput pathInput) { + String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType() + .getName(); + long hash = FNV1a.hash64(input); + return longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(hash); } }; diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java index 5b067115df781..0e6aa3afa1e64 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java @@ -39,7 +39,7 @@ public RemoteStorePathStrategy get() { // Min node version check ensures that we are enabling the new prefix type only when all the nodes understand it. pathType = Version.CURRENT.compareTo(minNodeVersionSupplier.get()) <= 0 ? type : PathType.FIXED; // If the path type is fixed, hash algorithm is not applicable. - pathHashAlgorithm = pathType == PathType.FIXED ? null : PathHashAlgorithm.FNV_1A; + pathHashAlgorithm = pathType == PathType.FIXED ? null : PathHashAlgorithm.FNV_1A_BASE64; return new RemoteStorePathStrategy(pathType, pathHashAlgorithm); } diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java index 7d0743e70b6cb..742abdc9ad0c4 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java @@ -10,11 +10,14 @@ import org.opensearch.common.collect.Tuple; +import java.math.BigInteger; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Base64; +import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.function.Function; @@ -26,10 +29,26 @@ public class RemoteStoreUtils { public static final int LONG_MAX_LENGTH = String.valueOf(Long.MAX_VALUE).length(); + /** + * URL safe base 64 character set. This must not be changed as this is used in deriving the base64 equivalent of binary. + */ + private static final char[] URL_BASE64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".toCharArray(); + + private static Map BASE64_CHARSET_IDX_MAP; + + static { + Map charToIndexMap = new HashMap<>(); + for (int i = 0; i < URL_BASE64_CHARSET.length; i++) { + charToIndexMap.put(URL_BASE64_CHARSET[i], i); + } + BASE64_CHARSET_IDX_MAP = Collections.unmodifiableMap(charToIndexMap); + } + /** * This method subtracts given numbers from Long.MAX_VALUE and returns a string representation of the result. * The resultant string is guaranteed to be of the same length that of Long.MAX_VALUE. If shorter, we add left padding * of 0s to the string. + * * @param num number to get the inverted long string for * @return String value of Long.MAX_VALUE - num */ @@ -46,6 +65,7 @@ public static String invertLong(long num) { /** * This method converts the given string into long and subtracts it from Long.MAX_VALUE + * * @param str long in string format to be inverted * @return long value of the invert result */ @@ -59,6 +79,7 @@ public static long invertLong(String str) { /** * Extracts the segment name from the provided segment file name + * * @param filename Segment file name to parse * @return Name of the segment that the segment file belongs to */ @@ -79,10 +100,9 @@ public static String getSegmentName(String filename) { } /** - * * @param mdFiles List of segment/translog metadata files - * @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name . - * fn returns null if node id is not part of the file name + * @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name . + * fn returns null if node id is not part of the file name */ public static void verifyNoMultipleWriters(List mdFiles, Function> fn) { Map nodesByPrimaryTermAndGen = new HashMap<>(); @@ -116,4 +136,43 @@ static String longToUrlBase64(long value) { String base64Str = Base64.getUrlEncoder().encodeToString(hashBytes); return base64Str.substring(0, base64Str.length() - 1); } + + static long urlBase64ToLong(String base64Str) { + byte[] hashBytes = Base64.getUrlDecoder().decode(base64Str); + return ByteBuffer.wrap(hashBytes).getLong(); + } + + /** + * Converts an input hash which occupies 64 bits of memory into a composite encoded string. The string will have 2 parts - + * 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string. + * For the second part, we will use the next 14 bits. For eg - A010001010100010. + */ + static String longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(long value) { + return longToCompositeBase64AndBinaryEncoding(value, 20); + } + + /** + * Converts an input hash which occupies 64 bits of memory into a composite encoded string. The string will have 2 parts - + * 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string. + * For the second part, the rest of the bits will be used as is in string form. + */ + static String longToCompositeBase64AndBinaryEncoding(long value, int len) { + if (len < 7 || len > 64) { + throw new IllegalArgumentException("In longToCompositeBase64AndBinaryEncoding, len must be between 7 and 64 (both inclusive)"); + } + String binaryEncoding = String.format(Locale.ROOT, "%64s", Long.toBinaryString(value)).replace(' ', '0'); + String base64Part = binaryEncoding.substring(0, 6); + String binaryPart = binaryEncoding.substring(6, len); + int base64DecimalValue = Integer.valueOf(base64Part, 2); + assert base64DecimalValue >= 0 && base64DecimalValue < 64; + return URL_BASE64_CHARSET[base64DecimalValue] + binaryPart; + } + + static long compositeUrlBase64BinaryEncodingToLong(String encodedValue) { + char ch = encodedValue.charAt(0); + int base64BitsIntValue = BASE64_CHARSET_IDX_MAP.get(ch); + String base64PartBinary = Integer.toBinaryString(base64BitsIntValue); + String binaryString = base64PartBinary + encodedValue.substring(1); + return new BigInteger(binaryString, 2).longValue(); + } } diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java index fa71b77648d35..634d9b160a9b7 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -1607,7 +1607,7 @@ public void testRemoteCustomData() { validateRemoteCustomData( indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY), PathHashAlgorithm.NAME, - PathHashAlgorithm.FNV_1A.name() + PathHashAlgorithm.FNV_1A_BASE64.name() ); } diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreEnumsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreEnumsTests.java index fe5635063f783..b5781de5ae2bb 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreEnumsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreEnumsTests.java @@ -25,7 +25,7 @@ import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.LOCK_FILES; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; -import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A; +import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64; import static org.opensearch.index.remote.RemoteStoreEnums.PathType.FIXED; import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_INFIX; import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_PREFIX; @@ -161,10 +161,10 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - BlobPath result = HASHED_PREFIX.path(pathInput, FNV_1A); + BlobPath result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) ); // assert with exact value for known base path @@ -178,7 +178,7 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_PREFIX.path(pathInput, FNV_1A); + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertEquals("DgSI70IciXs/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/data/", result.buildAsString()); // Translog Metadata @@ -190,10 +190,10 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_PREFIX.path(pathInput, FNV_1A); + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) ); // assert with exact value for known base path @@ -204,7 +204,7 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_PREFIX.path(pathInput, FNV_1A); + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertEquals("oKU5SjILiy4/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/metadata/", result.buildAsString()); // Translog Lock files - This is a negative case where the assertion will trip. @@ -238,10 +238,10 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_PREFIX.path(pathInput, FNV_1A); + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) ); // assert with exact value for known base path @@ -252,7 +252,7 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_PREFIX.path(pathInput, FNV_1A); + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertEquals("AUBRfCIuWdk/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/data/", result.buildAsString()); // Segment Metadata @@ -264,10 +264,10 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_PREFIX.path(pathInput, FNV_1A); + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) ); // assert with exact value for known base path @@ -278,7 +278,7 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_PREFIX.path(pathInput, FNV_1A); + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertEquals("erwR-G735Uw/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/metadata/", result.buildAsString()); // Segment Lockfiles @@ -290,10 +290,10 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_PREFIX.path(pathInput, FNV_1A); + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) ); // assert with exact value for known base path @@ -304,7 +304,7 @@ public void testGeneratePathForHashedPrefixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_PREFIX.path(pathInput, FNV_1A); + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertEquals("KeYDIk0mJXI/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/lock_files/", result.buildAsString()); } @@ -330,7 +330,7 @@ public void testGeneratePathForHashedInfixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - BlobPath result = HASHED_INFIX.path(pathInput, FNV_1A); + BlobPath result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); String expected = derivePath(basePath, pathInput); String actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -346,7 +346,7 @@ public void testGeneratePathForHashedInfixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_INFIX.path(pathInput, FNV_1A); + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/DgSI70IciXs/k2ijhe877d7yuhx7/10/translog/data/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -361,7 +361,7 @@ public void testGeneratePathForHashedInfixType() { .dataType(dataType) .build(); - result = HASHED_INFIX.path(pathInput, FNV_1A); + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); expected = derivePath(basePath, pathInput); actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -374,7 +374,7 @@ public void testGeneratePathForHashedInfixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_INFIX.path(pathInput, FNV_1A); + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/oKU5SjILiy4/k2ijhe877d7yuhx7/10/translog/metadata/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -410,7 +410,7 @@ public void testGeneratePathForHashedInfixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_INFIX.path(pathInput, FNV_1A); + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); expected = derivePath(basePath, pathInput); actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -423,7 +423,7 @@ public void testGeneratePathForHashedInfixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_INFIX.path(pathInput, FNV_1A); + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/AUBRfCIuWdk/k2ijhe877d7yuhx7/10/segments/data/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -437,7 +437,7 @@ public void testGeneratePathForHashedInfixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_INFIX.path(pathInput, FNV_1A); + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); expected = derivePath(basePath, pathInput); actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -450,7 +450,7 @@ public void testGeneratePathForHashedInfixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_INFIX.path(pathInput, FNV_1A); + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/erwR-G735Uw/k2ijhe877d7yuhx7/10/segments/metadata/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -464,7 +464,7 @@ public void testGeneratePathForHashedInfixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_INFIX.path(pathInput, FNV_1A); + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); expected = derivePath(basePath, pathInput); actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -477,7 +477,7 @@ public void testGeneratePathForHashedInfixType() { .dataCategory(dataCategory) .dataType(dataType) .build(); - result = HASHED_INFIX.path(pathInput, FNV_1A); + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/KeYDIk0mJXI/k2ijhe877d7yuhx7/10/segments/lock_files/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -487,7 +487,7 @@ private String derivePath(String basePath, PathInput pathInput) { return "".equals(basePath) ? String.join( SEPARATOR, - FNV_1A.hash(pathInput), + FNV_1A_BASE64.hash(pathInput), pathInput.indexUUID(), pathInput.shardId(), pathInput.dataCategory().getName(), @@ -496,7 +496,7 @@ private String derivePath(String basePath, PathInput pathInput) { : String.join( SEPARATOR, basePath, - FNV_1A.hash(pathInput), + FNV_1A_BASE64.hash(pathInput), pathInput.indexUUID(), pathInput.shardId(), pathInput.dataCategory().getName(), diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java index 34074861f2764..4bd582d5e2afe 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java @@ -20,7 +20,11 @@ import java.util.Map; import java.util.stream.Collectors; +import static org.opensearch.index.remote.RemoteStoreUtils.compositeUrlBase64BinaryEncodingToLong; +import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeBase64AndBinaryEncoding; +import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeUrlBase64AndBinaryEncodingUsing20Bits; import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64; +import static org.opensearch.index.remote.RemoteStoreUtils.urlBase64ToLong; import static org.opensearch.index.remote.RemoteStoreUtils.verifyNoMultipleWriters; import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.METADATA_PREFIX; import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.SEPARATOR; @@ -205,8 +209,98 @@ public void testLongToBase64() { "6kv3yZNv9kY" ); for (Map.Entry entry : longToExpectedBase64String.entrySet()) { - assertEquals(entry.getValue(), longToUrlBase64(entry.getKey())); + String base64Str = longToUrlBase64(entry.getKey()); + assertEquals(entry.getValue(), base64Str); assertEquals(11, entry.getValue().length()); + assertEquals((long) entry.getKey(), urlBase64ToLong(base64Str)); + } + + int iters = randomInt(100); + for (int i = 0; i < iters; i++) { + long value = randomLong(); + String base64Str = longToUrlBase64(value); + assertEquals(value, urlBase64ToLong(base64Str)); + } + } + + public void testLongToCompositeUrlBase64AndBinaryEncodingUsing20Bits() { + Map longToExpectedBase64String = Map.of( + -5537941589147079860L, + "s11001001010100", + -5878421770170594047L, + "r10011010111010", + -5147010836697060622L, + "u00100100100010", + 937096430362711837L, + "D01000000010011", + 8422273604115462710L, + "d00111000011110", + -2528761975013221124L, + "300111010000000", + -5512387536280560513L, + "s11100000000001", + -5749656451579835857L, + "s00001101010001", + 5569654857969679538L, + "T01010010110110", + -1563884000447039930L, + "610010010111111" + ); + for (Map.Entry entry : longToExpectedBase64String.entrySet()) { + String base64Str = longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(entry.getKey()); + assertEquals(entry.getValue(), base64Str); + assertEquals(15, entry.getValue().length()); + assertEquals(longToUrlBase64(entry.getKey()).charAt(0), base64Str.charAt(0)); + } + + int iters = randomInt(1000); + for (int i = 0; i < iters; i++) { + long value = randomLong(); + assertEquals(longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(value).charAt(0), longToUrlBase64(value).charAt(0)); + } + } + + public void testLongToCompositeUrlBase64AndBinaryEncoding() { + Map longToExpectedBase64String = Map.of( + -5537941589147079860L, + "s1100100101010001110111011101001000000001101010101101001100", + -5878421770170594047L, + "r1001101011101001101000101110010101000011110000110100000001", + -5147010836697060622L, + "u0010010010001001001110100111111111100101011110101011110010", + 937096430362711837L, + "D0100000001001111000011110100001100000011100101011100011101", + 8422273604115462710L, + "d0011100001111011010011100001000110011100110111101000110110", + -2528761975013221124L, + "30011101000000010000110000110110101110100100101110011111100", + -5512387536280560513L, + "s1110000000000100001011110111011011101101001101110001111111", + -5749656451579835857L, + "s0000110101000111011110101110010111000011010000101000101111", + 5569654857969679538L, + "T0101001011011000111001010110000010110011111011110010110010", + -1563884000447039930L, + "61001001011111101111100100110010011011011111111011001000110" + ); + for (Map.Entry entry : longToExpectedBase64String.entrySet()) { + Long hashValue = entry.getKey(); + String expectedCompositeEncoding = entry.getValue(); + String actualCompositeEncoding = longToCompositeBase64AndBinaryEncoding(hashValue, 64); + assertEquals(expectedCompositeEncoding, actualCompositeEncoding); + assertEquals(59, expectedCompositeEncoding.length()); + assertEquals(longToUrlBase64(entry.getKey()).charAt(0), actualCompositeEncoding.charAt(0)); + assertEquals(longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(hashValue), actualCompositeEncoding.substring(0, 15)); + + Long computedHashValue = compositeUrlBase64BinaryEncodingToLong(actualCompositeEncoding); + assertEquals(hashValue, computedHashValue); + } + + int iters = randomInt(1000); + for (int i = 0; i < iters; i++) { + long value = randomLong(); + String compositeEncoding = longToCompositeBase64AndBinaryEncoding(value, 64); + assertEquals(value, compositeUrlBase64BinaryEncodingToLong(compositeEncoding)); } } } diff --git a/server/src/test/java/org/opensearch/index/snapshots/blobstore/RemoteStoreShardShallowCopySnapshotTests.java b/server/src/test/java/org/opensearch/index/snapshots/blobstore/RemoteStoreShardShallowCopySnapshotTests.java index e3259a3097278..fc96a168d076f 100644 --- a/server/src/test/java/org/opensearch/index/snapshots/blobstore/RemoteStoreShardShallowCopySnapshotTests.java +++ b/server/src/test/java/org/opensearch/index/snapshots/blobstore/RemoteStoreShardShallowCopySnapshotTests.java @@ -119,7 +119,7 @@ public void testToXContent() throws IOException { repositoryBasePath, fileNames, PathType.HASHED_PREFIX, - PathHashAlgorithm.FNV_1A + PathHashAlgorithm.FNV_1A_BASE64 ); try (XContentBuilder builder = MediaTypeRegistry.JSON.contentBuilder()) { builder.startObject(); @@ -223,7 +223,7 @@ public void testFromXContent() throws IOException { repositoryBasePath, fileNames, PathType.HASHED_PREFIX, - PathHashAlgorithm.FNV_1A + PathHashAlgorithm.FNV_1A_BASE64 ); try (XContentParser parser = createParser(JsonXContent.jsonXContent, xContent)) { RemoteStoreShardShallowCopySnapshot actualShardShallowCopySnapshot = RemoteStoreShardShallowCopySnapshot.fromXContent(parser); @@ -296,19 +296,19 @@ public void testFromXContentInvalid() throws IOException { break; case 10: version = "1"; - pathHashAlgorithm = PathHashAlgorithm.FNV_1A; + pathHashAlgorithm = PathHashAlgorithm.FNV_1A_BASE64; failure = "Invalid combination of pathType=null pathHashAlgorithm=FNV_1A for version=1"; break; case 11: version = "2"; pathType = PathType.FIXED; - pathHashAlgorithm = PathHashAlgorithm.FNV_1A; + pathHashAlgorithm = PathHashAlgorithm.FNV_1A_BASE64; failure = "Invalid combination of pathType=FIXED pathHashAlgorithm=FNV_1A for version=2"; break; case 12: version = "2"; pathType = PathType.HASHED_PREFIX; - pathHashAlgorithm = PathHashAlgorithm.FNV_1A; + pathHashAlgorithm = PathHashAlgorithm.FNV_1A_BASE64; break; case 13: break; diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index 44ddd2de9d007..d95fc238a721e 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -706,7 +706,7 @@ public void testCleanupAsync() throws Exception { ShardId shardId = new ShardId(Index.UNKNOWN_INDEX_NAME, indexUUID, Integer.parseInt("0")); RemoteStorePathStrategy pathStrategy = randomFrom( new RemoteStorePathStrategy(PathType.FIXED), - new RemoteStorePathStrategy(PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A) + new RemoteStorePathStrategy(PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64) ); RemoteSegmentStoreDirectory.remoteDirectoryCleanup(