From 27a20b2b49250d825789347137a209b2cb5de8c5 Mon Sep 17 00:00:00 2001 From: Varun Jain Date: Tue, 23 Jan 2024 22:15:04 -0800 Subject: [PATCH] Refactoring BaseSparseEncodingIT and enabling BWC tests Signed-off-by: Varun Jain --- ...backwards_compatibility_tests_workflow.yml | 6 +- .../AbstractRestartUpgradeRestTestCase.java | 4 +- .../bwc/AbstractRollingUpgradeTestCase.java | 4 +- .../processor/SparseEncodingProcessIT.java | 6 +- .../query/NeuralSparseQueryIT.java | 4 +- .../neuralsearch/BaseNeuralSearchIT.java | 113 ++++++++++++++ .../neuralsearch/BaseSparseEncodingIT.java | 138 ------------------ 7 files changed, 124 insertions(+), 151 deletions(-) delete mode 100644 src/testFixtures/java/org/opensearch/neuralsearch/BaseSparseEncodingIT.java diff --git a/.github/workflows/backwards_compatibility_tests_workflow.yml b/.github/workflows/backwards_compatibility_tests_workflow.yml index 9b34b6356..e5069bf41 100644 --- a/.github/workflows/backwards_compatibility_tests_workflow.yml +++ b/.github/workflows/backwards_compatibility_tests_workflow.yml @@ -36,8 +36,7 @@ jobs: - name: Run NeuralSearch Restart-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}} run: | echo "Running restart-upgrade backwards compatibility tests ..." -# Disabling BWC tests due to ongoing build failure. https://github.com/opensearch-project/neural-search/issues/536 -# ./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}' + ./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}' Rolling-Upgrade-BWCTests-NeuralSearch: strategy: @@ -64,5 +63,4 @@ jobs: - name: Run NeuralSearch Rolling-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}} run: | echo "Running rolling-upgrade backwards compatibility tests ..." -# Disabling BWC tests due to ongoing build failure. https://github.com/opensearch-project/neural-search/issues/536 -# ./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}' + ./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}' diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java index 832821b94..cf985d759 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java @@ -8,14 +8,14 @@ import java.util.Optional; import org.junit.Before; import org.opensearch.common.settings.Settings; -import org.opensearch.neuralsearch.BaseSparseEncodingIT; +import org.opensearch.neuralsearch.BaseNeuralSearchIT; import static org.opensearch.neuralsearch.TestUtils.CLIENT_TIMEOUT_VALUE; import static org.opensearch.neuralsearch.TestUtils.RESTART_UPGRADE_OLD_CLUSTER; import static org.opensearch.neuralsearch.TestUtils.BWC_VERSION; import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX; import org.opensearch.test.rest.OpenSearchRestTestCase; -public abstract class AbstractRestartUpgradeRestTestCase extends BaseSparseEncodingIT { +public abstract class AbstractRestartUpgradeRestTestCase extends BaseNeuralSearchIT { @Before protected String getIndexNameForTest() { diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java index bcb4bc584..98ce95b72 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java @@ -8,7 +8,7 @@ import java.util.Optional; import org.junit.Before; import org.opensearch.common.settings.Settings; -import org.opensearch.neuralsearch.BaseSparseEncodingIT; +import org.opensearch.neuralsearch.BaseNeuralSearchIT; import org.opensearch.test.rest.OpenSearchRestTestCase; import static org.opensearch.neuralsearch.TestUtils.OLD_CLUSTER; import static org.opensearch.neuralsearch.TestUtils.MIXED_CLUSTER; @@ -18,7 +18,7 @@ import static org.opensearch.neuralsearch.TestUtils.BWCSUITE_CLUSTER; import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX; -public abstract class AbstractRollingUpgradeTestCase extends BaseSparseEncodingIT { +public abstract class AbstractRollingUpgradeTestCase extends BaseNeuralSearchIT { @Before protected String getIndexNameForTest() { diff --git a/src/test/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessIT.java b/src/test/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessIT.java index 416707956..226ed01b8 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessIT.java @@ -15,13 +15,13 @@ import org.opensearch.client.Response; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.XContentType; -import org.opensearch.neuralsearch.BaseSparseEncodingIT; +import org.opensearch.neuralsearch.BaseNeuralSearchIT; import com.google.common.collect.ImmutableList; import lombok.SneakyThrows; -public class SparseEncodingProcessIT extends BaseSparseEncodingIT { +public class SparseEncodingProcessIT extends BaseNeuralSearchIT { private static final String INDEX_NAME = "sparse_encoding_index"; @@ -39,7 +39,7 @@ public void tearDown() { } public void testSparseEncodingProcessor() throws Exception { - String modelId = prepareModel(); + String modelId = prepareSparseEncodingModel(); createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.SPARSE_ENCODING); createSparseEncodingIndex(); ingestDocument(); diff --git a/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java index afabd46c9..9aea33db5 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java @@ -4,6 +4,7 @@ */ package org.opensearch.neuralsearch.query; +import org.opensearch.neuralsearch.BaseNeuralSearchIT; import static org.opensearch.neuralsearch.TestUtils.objectToFloat; import java.util.List; @@ -15,12 +16,11 @@ import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; -import org.opensearch.neuralsearch.BaseSparseEncodingIT; import org.opensearch.neuralsearch.TestUtils; import lombok.SneakyThrows; -public class NeuralSparseQueryIT extends BaseSparseEncodingIT { +public class NeuralSparseQueryIT extends BaseNeuralSearchIT { private static final String TEST_BASIC_INDEX_NAME = "test-sparse-basic-index"; private static final String TEST_MULTI_NEURAL_SPARSE_FIELD_INDEX_NAME = "test-sparse-multi-field-index"; private static final String TEST_TEXT_AND_NEURAL_SPARSE_FIELD_INDEX_NAME = "test-sparse-text-and-field-index"; diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java index 4133e905f..d104a1bde 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java @@ -48,6 +48,7 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.knn.index.SpaceType; import org.opensearch.neuralsearch.util.NeuralSearchClusterUtil; +import org.opensearch.neuralsearch.util.TokenWeightUtil; import org.opensearch.test.ClusterServiceUtils; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -211,6 +212,21 @@ protected String prepareModel() { return modelId; } + /** + * Upload default model and load into the cluster + * + * @return modelID + */ + @SneakyThrows + protected String prepareSparseEncodingModel() { + String requestBody = Files.readString( + Path.of(classLoader.getResource("processor/UploadSparseEncodingModelRequestBody.json").toURI()) + ); + String modelId = registerModelGroupAndUploadModel(requestBody); + loadModel(modelId); + return modelId; + } + /** * Execute model inference on the provided query text * @@ -473,6 +489,36 @@ protected void addKnnDoc( assertEquals(request.getEndpoint() + ": failed", RestStatus.CREATED, RestStatus.fromCode(response.getStatusLine().getStatusCode())); } + @SneakyThrows + protected void addSparseEncodingDoc(String index, String docId, List fieldNames, List> docs) { + addSparseEncodingDoc(index, docId, fieldNames, docs, Collections.emptyList(), Collections.emptyList()); + } + + @SneakyThrows + protected void addSparseEncodingDoc( + String index, + String docId, + List fieldNames, + List> docs, + List textFieldNames, + List texts + ) { + Request request = new Request("POST", "/" + index + "/_doc/" + docId + "?refresh=true"); + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + for (int i = 0; i < fieldNames.size(); i++) { + builder.field(fieldNames.get(i), docs.get(i)); + } + + for (int i = 0; i < textFieldNames.size(); i++) { + builder.field(textFieldNames.get(i), texts.get(i)); + } + builder.endObject(); + + request.setJsonEntity(builder.toString()); + Response response = client().performRequest(request); + assertEquals(request.getEndpoint() + ": failed", RestStatus.CREATED, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + } + /** * Parse the first returned hit from a search response as a map * @@ -534,6 +580,19 @@ protected void prepareKnnIndex(String indexName, List knnFieldCo createIndexWithConfiguration(indexName, buildIndexConfiguration(knnFieldConfigs, numOfShards), ""); } + @SneakyThrows + protected void prepareSparseEncodingIndex(String indexName, List sparseEncodingFieldNames) { + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("mappings").startObject("properties"); + + for (String fieldName : sparseEncodingFieldNames) { + xContentBuilder.startObject(fieldName).field("type", "rank_features").endObject(); + } + + xContentBuilder.endObject().endObject().endObject(); + String indexMappings = xContentBuilder.toString(); + createIndexWithConfiguration(indexName, indexMappings, ""); + } + /** * Computes the expected distance between an indexVector and query text without using the neural query type. * @@ -943,6 +1002,60 @@ protected Map deletePipeline(String pipelineName) { return responseMap; } + protected float computeExpectedScore(String modelId, Map tokenWeightMap, String queryText) { + Map queryTokens = runSparseModelInference(modelId, queryText); + return computeExpectedScore(tokenWeightMap, queryTokens); + } + + protected float computeExpectedScore(Map tokenWeightMap, Map queryTokens) { + Float score = 0f; + for (Map.Entry entry : queryTokens.entrySet()) { + if (tokenWeightMap.containsKey(entry.getKey())) { + score += entry.getValue() * getFeatureFieldCompressedNumber(tokenWeightMap.get(entry.getKey())); + } + } + return score; + } + + @SneakyThrows + protected Map runSparseModelInference(String modelId, String queryText) { + Response inferenceResponse = makeRequest( + client(), + "POST", + String.format(LOCALE, "/_plugins/_ml/models/%s/_predict", modelId), + null, + toHttpEntity(String.format(LOCALE, "{\"text_docs\": [\"%s\"]}", queryText)), + ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, DEFAULT_USER_AGENT)) + ); + + Map inferenceResJson = XContentHelper.convertToMap( + XContentType.JSON.xContent(), + EntityUtils.toString(inferenceResponse.getEntity()), + false + ); + + Object inference_results = inferenceResJson.get("inference_results"); + assertTrue(inference_results instanceof List); + List inferenceResultsAsMap = (List) inference_results; + assertEquals(1, inferenceResultsAsMap.size()); + Map result = (Map) inferenceResultsAsMap.get(0); + List output = (List) result.get("output"); + assertEquals(1, output.size()); + Map map = (Map) output.get(0); + assertEquals(1, map.size()); + Map dataAsMap = (Map) map.get("dataAsMap"); + return TokenWeightUtil.fetchListOfTokenWeightMap(List.of(dataAsMap)).get(0); + } + + // rank_features use lucene FeatureField, which will compress the Float number to 16 bit + // this function simulate the encoding and decoding progress in lucene FeatureField + protected Float getFeatureFieldCompressedNumber(Float originNumber) { + int freqBits = Float.floatToIntBits(originNumber); + freqBits = freqBits >> 15; + freqBits = ((int) ((float) freqBits)) << 15; + return Float.intBitsToFloat(freqBits); + } + /** * Enumeration for types of pipeline processors, used to lookup resources like create * processor request as those are type specific diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/BaseSparseEncodingIT.java b/src/testFixtures/java/org/opensearch/neuralsearch/BaseSparseEncodingIT.java deleted file mode 100644 index 53982258b..000000000 --- a/src/testFixtures/java/org/opensearch/neuralsearch/BaseSparseEncodingIT.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ -package org.opensearch.neuralsearch; - -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import org.apache.hc.core5.http.HttpHeaders; -import org.apache.hc.core5.http.io.entity.EntityUtils; -import org.apache.hc.core5.http.message.BasicHeader; -import org.opensearch.client.Request; -import org.opensearch.client.Response; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.common.xcontent.XContentHelper; -import org.opensearch.common.xcontent.XContentType; -import org.opensearch.core.rest.RestStatus; -import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.neuralsearch.util.TokenWeightUtil; - -import com.google.common.collect.ImmutableList; -import static org.opensearch.neuralsearch.TestUtils.DEFAULT_USER_AGENT; -import lombok.SneakyThrows; - -public abstract class BaseSparseEncodingIT extends BaseNeuralSearchIT { - - @SneakyThrows - @Override - protected String prepareModel() { - String requestBody = Files.readString( - Path.of(classLoader.getResource("processor/UploadSparseEncodingModelRequestBody.json").toURI()) - ); - String modelId = registerModelGroupAndUploadModel(requestBody); - loadModel(modelId); - return modelId; - } - - @SneakyThrows - protected void prepareSparseEncodingIndex(String indexName, List sparseEncodingFieldNames) { - XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("mappings").startObject("properties"); - - for (String fieldName : sparseEncodingFieldNames) { - xContentBuilder.startObject(fieldName).field("type", "rank_features").endObject(); - } - - xContentBuilder.endObject().endObject().endObject(); - String indexMappings = xContentBuilder.toString(); - createIndexWithConfiguration(indexName, indexMappings, ""); - } - - @SneakyThrows - protected void addSparseEncodingDoc(String index, String docId, List fieldNames, List> docs) { - addSparseEncodingDoc(index, docId, fieldNames, docs, Collections.emptyList(), Collections.emptyList()); - } - - @SneakyThrows - protected void addSparseEncodingDoc( - String index, - String docId, - List fieldNames, - List> docs, - List textFieldNames, - List texts - ) { - Request request = new Request("POST", "/" + index + "/_doc/" + docId + "?refresh=true"); - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - for (int i = 0; i < fieldNames.size(); i++) { - builder.field(fieldNames.get(i), docs.get(i)); - } - - for (int i = 0; i < textFieldNames.size(); i++) { - builder.field(textFieldNames.get(i), texts.get(i)); - } - builder.endObject(); - - request.setJsonEntity(builder.toString()); - Response response = client().performRequest(request); - assertEquals(request.getEndpoint() + ": failed", RestStatus.CREATED, RestStatus.fromCode(response.getStatusLine().getStatusCode())); - } - - protected float computeExpectedScore(String modelId, Map tokenWeightMap, String queryText) { - Map queryTokens = runSparseModelInference(modelId, queryText); - return computeExpectedScore(tokenWeightMap, queryTokens); - } - - protected float computeExpectedScore(Map tokenWeightMap, Map queryTokens) { - Float score = 0f; - for (Map.Entry entry : queryTokens.entrySet()) { - if (tokenWeightMap.containsKey(entry.getKey())) { - score += entry.getValue() * getFeatureFieldCompressedNumber(tokenWeightMap.get(entry.getKey())); - } - } - return score; - } - - @SneakyThrows - protected Map runSparseModelInference(String modelId, String queryText) { - Response inferenceResponse = makeRequest( - client(), - "POST", - String.format(LOCALE, "/_plugins/_ml/models/%s/_predict", modelId), - null, - toHttpEntity(String.format(LOCALE, "{\"text_docs\": [\"%s\"]}", queryText)), - ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, DEFAULT_USER_AGENT)) - ); - - Map inferenceResJson = XContentHelper.convertToMap( - XContentType.JSON.xContent(), - EntityUtils.toString(inferenceResponse.getEntity()), - false - ); - - Object inference_results = inferenceResJson.get("inference_results"); - assertTrue(inference_results instanceof List); - List inferenceResultsAsMap = (List) inference_results; - assertEquals(1, inferenceResultsAsMap.size()); - Map result = (Map) inferenceResultsAsMap.get(0); - List output = (List) result.get("output"); - assertEquals(1, output.size()); - Map map = (Map) output.get(0); - assertEquals(1, map.size()); - Map dataAsMap = (Map) map.get("dataAsMap"); - return TokenWeightUtil.fetchListOfTokenWeightMap(List.of(dataAsMap)).get(0); - } - - // rank_features use lucene FeatureField, which will compress the Float number to 16 bit - // this function simulate the encoding and decoding progress in lucene FeatureField - protected Float getFeatureFieldCompressedNumber(Float originNumber) { - int freqBits = Float.floatToIntBits(originNumber); - freqBits = freqBits >> 15; - freqBits = ((int) ((float) freqBits)) << 15; - return Float.intBitsToFloat(freqBits); - } -}