From 2d1a4080d5b1601bf3362fecd85384348af1f326 Mon Sep 17 00:00:00 2001 From: Navneet Verma Date: Tue, 19 Nov 2024 13:47:02 -0800 Subject: [PATCH] Fixing the bug when a segment has no vector field present for disk based vector search (#2281) Signed-off-by: Navneet Verma --- CHANGELOG.md | 1 + .../knn/index/query/ResultUtil.java | 10 +++--- .../nativelib/NativeEngineKnnVectorQuery.java | 8 ++++- .../knn/index/query/ResultUtilTests.java | 9 +++++ .../NativeEngineKNNVectorQueryTests.java | 5 ++- .../knn/integ/ModeAndCompressionIT.java | 36 +++++++++++++++++++ .../org/opensearch/knn/KNNRestTestCase.java | 13 +++++++ 7 files changed, 75 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed2cfea00..cc1f03718 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Enhancements - Introduced a writing layer in native engines where relies on the writing interface to process IO. (#2241)[https://github.com/opensearch-project/k-NN/pull/2241] ### Bug Fixes +* Fix NPE in ANN search when a segment doesn't contain vector field (#2278)[https://github.com/opensearch-project/k-NN/pull/2278] ### Infrastructure * Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259) * Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279) diff --git a/src/main/java/org/opensearch/knn/index/query/ResultUtil.java b/src/main/java/org/opensearch/knn/index/query/ResultUtil.java index f62c09cb0..df1ce3827 100644 --- a/src/main/java/org/opensearch/knn/index/query/ResultUtil.java +++ b/src/main/java/org/opensearch/knn/index/query/ResultUtil.java @@ -58,17 +58,17 @@ public static void reduceToTopK(List> perLeafResults, int k) } /** - * Convert map to bit set + * Convert map to bit set, if resultMap is empty or null then returns an Optional. Returning an optional here to + * ensure that the caller is aware that BitSet may not be present * * @param resultMap Map of results - * @return BitSet of results + * @return BitSet of results; null is returned if the result map is empty * @throws IOException If an error occurs during the search. */ public static BitSet resultMapToMatchBitSet(Map resultMap) throws IOException { - if (resultMap.isEmpty()) { - return BitSet.of(DocIdSetIterator.empty(), 0); + if (resultMap == null || resultMap.isEmpty()) { + return null; } - final int maxDoc = Collections.max(resultMap.keySet()) + 1; return BitSet.of(resultMapToDocIds(resultMap, maxDoc), maxDoc); } diff --git a/src/main/java/org/opensearch/knn/index/query/nativelib/NativeEngineKnnVectorQuery.java b/src/main/java/org/opensearch/knn/index/query/nativelib/NativeEngineKnnVectorQuery.java index a34a0f1ee..f782b0180 100644 --- a/src/main/java/org/opensearch/knn/index/query/nativelib/NativeEngineKnnVectorQuery.java +++ b/src/main/java/org/opensearch/knn/index/query/nativelib/NativeEngineKnnVectorQuery.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -112,7 +113,12 @@ private List> doRescore( LeafReaderContext leafReaderContext = leafReaderContexts.get(i); int finalI = i; rescoreTasks.add(() -> { - BitSet convertedBitSet = ResultUtil.resultMapToMatchBitSet(perLeafResults.get(finalI)); + final BitSet convertedBitSet = ResultUtil.resultMapToMatchBitSet(perLeafResults.get(finalI)); + // if there is no docIds to re-score from a segment we should return early to ensure that we are not + // wasting any computation + if (convertedBitSet == null) { + return Collections.emptyMap(); + } final ExactSearcher.ExactSearcherContext exactSearcherContext = ExactSearcher.ExactSearcherContext.builder() .matchedDocs(convertedBitSet) // setting to false because in re-scoring we want to do exact search on full precision vectors diff --git a/src/test/java/org/opensearch/knn/index/query/ResultUtilTests.java b/src/test/java/org/opensearch/knn/index/query/ResultUtilTests.java index 70cb86e02..7cda1ed79 100644 --- a/src/test/java/org/opensearch/knn/index/query/ResultUtilTests.java +++ b/src/test/java/org/opensearch/knn/index/query/ResultUtilTests.java @@ -9,6 +9,7 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.BitSet; +import org.junit.Assert; import org.opensearch.knn.KNNTestCase; import java.io.IOException; @@ -48,6 +49,14 @@ public void testResultMapToMatchBitSet() throws IOException { assertResultMapToMatchBitSet(perLeafResults, resultBitset); } + public void testResultMapToMatchBitSet_whenResultMapEmpty_thenReturnEmptyOptional() throws IOException { + BitSet resultBitset = ResultUtil.resultMapToMatchBitSet(Collections.emptyMap()); + Assert.assertNull(resultBitset); + + BitSet resultBitset2 = ResultUtil.resultMapToMatchBitSet(null); + Assert.assertNull(resultBitset2); + } + public void testResultMapToDocIds() throws IOException { int firstPassK = 42; Map perLeafResults = getRandomResults(firstPassK); diff --git a/src/test/java/org/opensearch/knn/index/query/nativelib/NativeEngineKNNVectorQueryTests.java b/src/test/java/org/opensearch/knn/index/query/nativelib/NativeEngineKNNVectorQueryTests.java index 4577a34d4..01e3fa6f9 100644 --- a/src/test/java/org/opensearch/knn/index/query/nativelib/NativeEngineKNNVectorQueryTests.java +++ b/src/test/java/org/opensearch/knn/index/query/nativelib/NativeEngineKNNVectorQueryTests.java @@ -229,7 +229,7 @@ public void testRescore() { when(reader.leaves()).thenReturn(leaves); int k = 2; - int firstPassK = 3; + int firstPassK = 100; Map initialLeaf1Results = new HashMap<>(Map.of(0, 21f, 1, 19f, 2, 17f, 3, 15f)); Map initialLeaf2Results = new HashMap<>(Map.of(0, 20f, 1, 18f, 2, 16f, 3, 14f)); Map rescoredLeaf1Results = new HashMap<>(Map.of(0, 18f, 1, 20f)); @@ -257,6 +257,9 @@ public void testRescore() { mockedKnnSettings.when(() -> KNNSettings.isShardLevelRescoringEnabledForDiskBasedVector(any())).thenReturn(true); mockedResultUtil.when(() -> ResultUtil.reduceToTopK(any(), anyInt())).thenAnswer(InvocationOnMock::callRealMethod); + mockedResultUtil.when(() -> ResultUtil.resultMapToMatchBitSet(any())).thenAnswer(InvocationOnMock::callRealMethod); + mockedResultUtil.when(() -> ResultUtil.resultMapToDocIds(any(), anyInt())).thenAnswer(InvocationOnMock::callRealMethod); + mockedResultUtil.when(() -> ResultUtil.resultMapToTopDocs(eq(rescoredLeaf1Results), anyInt())).thenAnswer(t -> topDocs1); mockedResultUtil.when(() -> ResultUtil.resultMapToTopDocs(eq(rescoredLeaf2Results), anyInt())).thenAnswer(t -> topDocs2); try (MockedStatic mockedStaticNativeKnnVectorQuery = mockStatic(NativeEngineKnnVectorQuery.class)) { diff --git a/src/test/java/org/opensearch/knn/integ/ModeAndCompressionIT.java b/src/test/java/org/opensearch/knn/integ/ModeAndCompressionIT.java index 0913d9b36..ad7f8b057 100644 --- a/src/test/java/org/opensearch/knn/integ/ModeAndCompressionIT.java +++ b/src/test/java/org/opensearch/knn/integ/ModeAndCompressionIT.java @@ -11,6 +11,7 @@ import org.opensearch.client.Request; import org.opensearch.client.Response; import org.opensearch.client.ResponseException; +import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.XContentBuilder; @@ -220,6 +221,41 @@ public void testDeletedDocsWithSegmentMerge_whenValid_ThenSucceed() { validateGreenIndex(indexName); } + @SneakyThrows + public void testCompressionIndexWithNonVectorFieldsSegment_whenValid_ThenSucceed() { + CompressionLevel compressionLevel = CompressionLevel.x32; + String indexName = INDEX_NAME + compressionLevel; + try ( + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject(FIELD_NAME) + .field("type", "knn_vector") + .field("dimension", DIMENSION) + .field(COMPRESSION_LEVEL_PARAMETER, compressionLevel.getName()) + .field(MODE_PARAMETER, Mode.ON_DISK.getName()) + .endObject() + .endObject() + .endObject() + ) { + String mapping = builder.toString(); + Settings indexSettings = buildKNNIndexSettings(0); + createKnnIndex(indexName, indexSettings, mapping); + // since we are going to delete a document, so its better to have 1 more extra doc so that we can re-use some tests + addKNNDocs(indexName, FIELD_NAME, DIMENSION, 0, NUM_DOCS + 1); + addNonKNNDoc(indexName, String.valueOf(NUM_DOCS + 2), FIELD_NAME_NON_KNN, "Hello world"); + deleteKnnDoc(indexName, "0"); + validateGreenIndex(indexName); + validateSearch( + indexName, + METHOD_PARAMETER_EF_SEARCH, + KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH, + compressionLevel.getName(), + Mode.ON_DISK.getName() + ); + } + } + @SneakyThrows public void testTraining_whenInvalid_thenFail() { setupTrainingIndex(); diff --git a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java index 8a4885cfe..2afbd9639 100644 --- a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java +++ b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java @@ -116,6 +116,7 @@ public class KNNRestTestCase extends ODFERestTestCase { public static final String INDEX_NAME = "test_index"; public static final String FIELD_NAME = "test_field"; + public static final String FIELD_NAME_NON_KNN = "test_field_non_knn"; public static final String PROPERTIES_FIELD = "properties"; public static final String STORE_FIELD = "store"; public static final String STORED_QUERY_FIELD = "stored_fields"; @@ -607,6 +608,18 @@ protected void addKnnDoc(String index, String docId, String fieldName, T vec assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); } + protected void addNonKNNDoc(String index, String docId, String fieldName, String text) throws IOException { + Request request = new Request("POST", "/" + index + "/_doc/" + docId + "?refresh=true"); + + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field(fieldName, text).endObject(); + request.setJsonEntity(builder.toString()); + client().performRequest(request); + + request = new Request("POST", "/" + index + "/_refresh"); + Response response = client().performRequest(request); + assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + } + /** * Add a single KNN Doc to an index with a nested vector field *