BWC tests for Multimodal Search, Hybrid Search and Neural Sparse Sear…

…ch (#533) * Initial commit of BWC Test Signed-off-by: Varun Jain <[email protected]>
opensearch-project · Jan 26, 2024 · 1dadf25 · 1dadf25
1 parent 98e5534
commit 1dadf25
Show file tree

Hide file tree

Showing 34 changed files with 1,259 additions and 300 deletions.
diff --git a/.github/workflows/backwards_compatibility_tests_workflow.yml b/.github/workflows/backwards_compatibility_tests_workflow.yml
@@ -36,13 +36,13 @@ jobs:
       - name: Run NeuralSearch Restart-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
         run: |
           echo "Running restart-upgrade backwards compatibility tests ..."
-#          Disabling BWC tests due to ongoing build failure. https://github.com/opensearch-project/neural-search/issues/536
-#          ./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}'
+          ./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}'
 
   Rolling-Upgrade-BWCTests-NeuralSearch:
     strategy:
       matrix:
-        java: [ 11, 17, 21 ]
+        # Restricting java 21 to 21.0.1 due to ongoing bug in JDK 21.0.2 https://bugs.openjdk.org/browse/JDK-8323659. Once the fix https://github.com/opensearch-project/OpenSearch/pull/11968 get merged this change will be reverted.
+        java: [ 11, 17, 21.0.1 ]
         os: [ubuntu-latest,windows-latest]
         bwc_version: [ "2.12.0-SNAPSHOT" ]
         opensearch_version: [ "3.0.0-SNAPSHOT" ]
@@ -64,5 +64,4 @@ jobs:
       - name: Run NeuralSearch Rolling-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
         run: |
           echo "Running rolling-upgrade backwards compatibility tests ..."
-#          Disabling BWC tests due to ongoing build failure. https://github.com/opensearch-project/neural-search/issues/536
-#         ./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}'
+          ./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}'
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 ### Infrastructure
 - BWC tests for Neural Search ([#515](https://github.com/opensearch-project/neural-search/pull/515))
 - Github action to run integ tests in secure opensearch cluster ([#535](https://github.com/opensearch-project/neural-search/pull/535))
+- BWC tests for Multimodal search, Hybrid Search and Neural Sparse Search ([#533](https://github.com/opensearch-project/neural-search/pull/533))
 ### Documentation
 ### Maintenance
 ### Refactoring

diff --git a/qa/restart-upgrade/build.gradle b/qa/restart-upgrade/build.gradle
@@ -35,6 +35,15 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
     systemProperty 'tests.skip_delete_model_index', 'true'
     systemProperty 'tests.plugin_bwc_version', neural_search_bwc_version
 
+    //Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
+    if (neural_search_bwc_version.startsWith("2.9") || neural_search_bwc_version.startsWith("2.10")){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'
@@ -53,6 +62,15 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
     systemProperty 'tests.is_old_cluster', 'false'
     systemProperty 'tests.plugin_bwc_version', neural_search_bwc_version
 
+    //Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
+    if (neural_search_bwc_version.startsWith("2.9") || neural_search_bwc_version.startsWith("2.10")){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'

diff --git a/...ade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java b/...ade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java
@@ -4,15 +4,18 @@
  */
 package org.opensearch.neuralsearch.bwc;
 
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.Locale;
 import java.util.Optional;
 import org.junit.Before;
 import org.opensearch.common.settings.Settings;
 import org.opensearch.neuralsearch.BaseNeuralSearchIT;
+import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX;
 import static org.opensearch.neuralsearch.TestUtils.CLIENT_TIMEOUT_VALUE;
 import static org.opensearch.neuralsearch.TestUtils.RESTART_UPGRADE_OLD_CLUSTER;
 import static org.opensearch.neuralsearch.TestUtils.BWC_VERSION;
-import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX;
+import static org.opensearch.neuralsearch.TestUtils.generateModelId;
 import org.opensearch.test.rest.OpenSearchRestTestCase;
 
 public abstract class AbstractRestartUpgradeRestTestCase extends BaseNeuralSearchIT {
@@ -57,4 +60,43 @@ protected static final boolean isRunningAgainstOldCluster() {
     protected final Optional<String> getBWCVersion() {
         return Optional.ofNullable(System.getProperty(BWC_VERSION, null));
     }
+
+    protected String uploadTextEmbeddingModel() throws Exception {
+        String requestBody = Files.readString(Path.of(classLoader.getResource("processor/UploadModelRequestBody.json").toURI()));
+        return registerModelGroupAndGetModelId(requestBody);
+    }
+
+    protected String registerModelGroupAndGetModelId(final String requestBody) throws Exception {
+        String modelGroupRegisterRequestBody = Files.readString(
+            Path.of(classLoader.getResource("processor/CreateModelGroupRequestBody.json").toURI())
+        );
+        String modelGroupId = registerModelGroup(String.format(LOCALE, modelGroupRegisterRequestBody, generateModelId()));
+        return uploadModel(String.format(LOCALE, requestBody, modelGroupId));
+    }
+
+    protected void createPipelineProcessor(final String modelId, final String pipelineName) throws Exception {
+        String requestBody = Files.readString(Path.of(classLoader.getResource("processor/PipelineConfiguration.json").toURI()));
+        createPipelineProcessor(requestBody, pipelineName, modelId);
+    }
+
+    protected String uploadSparseEncodingModel() throws Exception {
+        String requestBody = Files.readString(
+            Path.of(classLoader.getResource("processor/UploadSparseEncodingModelRequestBody.json").toURI())
+        );
+        return registerModelGroupAndGetModelId(requestBody);
+    }
+
+    protected void createPipelineForTextImageProcessor(final String modelId, final String pipelineName) throws Exception {
+        String requestBody = Files.readString(
+            Path.of(classLoader.getResource("processor/PipelineForTextImageProcessorConfiguration.json").toURI())
+        );
+        createPipelineProcessor(requestBody, pipelineName, modelId);
+    }
+
+    protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception {
+        String requestBody = Files.readString(
+            Path.of(classLoader.getResource("processor/PipelineForSparseEncodingProcessorConfiguration.json").toURI())
+        );
+        createPipelineProcessor(requestBody, pipelineName, modelId);
+    }
 }
diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.neuralsearch.bwc;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.opensearch.index.query.MatchQueryBuilder;
+import static org.opensearch.neuralsearch.TestUtils.getModelId;
+import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER;
+import static org.opensearch.neuralsearch.TestUtils.PARAM_NAME_WEIGHTS;
+import static org.opensearch.neuralsearch.TestUtils.TEXT_EMBEDDING_PROCESSOR;
+import static org.opensearch.neuralsearch.TestUtils.DEFAULT_NORMALIZATION_METHOD;
+import static org.opensearch.neuralsearch.TestUtils.DEFAULT_COMBINATION_METHOD;
+import org.opensearch.neuralsearch.query.HybridQueryBuilder;
+import org.opensearch.neuralsearch.query.NeuralQueryBuilder;
+
+public class HybridSearchIT extends AbstractRestartUpgradeRestTestCase {
+    private static final String PIPELINE_NAME = "nlp-hybrid-pipeline";
+    private static final String PIPELINE1_NAME = "nlp-hybrid-1-pipeline";
+    private static final String SEARCH_PIPELINE_NAME = "nlp-search-pipeline";
+    private static final String SEARCH_PIPELINE1_NAME = "nlp-search-1-pipeline";
+    private static final String TEST_FIELD = "passage_text";
+    private static final String TEXT_1 = "Hello world";
+    private static final String TEXT_2 = "Hi planet";
+    private static final String TEXT_3 = "Hi earth";
+    private static final String TEXT_4 = "Hi amazon";
+    private static final String TEXT_5 = "Hi mars";
+    private static final String TEXT_6 = "Hi opensearch";
+    private static final String QUERY = "Hi world";
+
+    // Test restart-upgrade normalization processor when index with multiple shards
+    // Create Text Embedding Processor, Ingestion Pipeline, add document and search pipeline with normalization processor
+    // Validate process , pipeline and document count in restart-upgrade scenario
+    public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() throws Exception {
+        validateNormalizationProcessor("processor/IndexMappingMultipleShard.json", PIPELINE_NAME, SEARCH_PIPELINE_NAME);
+    }
+
+    // Test restart-upgrade normalization processor when index with single shard
+    // Create Text Embedding Processor, Ingestion Pipeline, add document and search pipeline with normalization processor
+    // Validate process , pipeline and document count in restart-upgrade scenario
+    public void testNormalizationProcessor_whenIndexWithSingleShard_E2EFlow() throws Exception {
+        validateNormalizationProcessor("processor/IndexMappingSingleShard.json", PIPELINE1_NAME, SEARCH_PIPELINE1_NAME);
+    }
+
+    private void validateNormalizationProcessor(final String fileName, final String pipelineName, final String searchPipelineName)
+        throws Exception {
+        waitForClusterHealthGreen(NODES_BWC_CLUSTER);
+        if (isRunningAgainstOldCluster()) {
+            String modelId = uploadTextEmbeddingModel();
+            loadModel(modelId);
+            createPipelineProcessor(modelId, pipelineName);
+            createIndexWithConfiguration(
+                getIndexNameForTest(),
+                Files.readString(Path.of(classLoader.getResource(fileName).toURI())),
+                pipelineName
+            );
+            addDocuments(getIndexNameForTest(), true);
+            createSearchPipeline(searchPipelineName);
+        } else {
+            String modelId = null;
+            try {
+                modelId = getModelId(getIngestionPipeline(pipelineName), TEXT_EMBEDDING_PROCESSOR);
+                loadModel(modelId);
+                addDocuments(getIndexNameForTest(), false);
+                validateTestIndex(modelId, getIndexNameForTest(), searchPipelineName);
+            } finally {
+                wipeOfTestResources(getIndexNameForTest(), pipelineName, modelId, searchPipelineName);
+            }
+        }
+    }
+
+    private void addDocuments(final String indexName, boolean isRunningAgainstOldCluster) throws IOException {
+        if (isRunningAgainstOldCluster) {
+            addDocument(indexName, "0", TEST_FIELD, TEXT_1, null, null);
+            addDocument(indexName, "1", TEST_FIELD, TEXT_2, null, null);
+            addDocument(indexName, "2", TEST_FIELD, TEXT_3, null, null);
+            addDocument(indexName, "3", TEST_FIELD, TEXT_4, null, null);
+            addDocument(indexName, "4", TEST_FIELD, TEXT_5, null, null);
+        } else {
+            addDocument(indexName, "5", TEST_FIELD, TEXT_6, null, null);
+        }
+    }
+
+    private void createSearchPipeline(final String pipelineName) {
+        createSearchPipeline(
+            pipelineName,
+            DEFAULT_NORMALIZATION_METHOD,
+            DEFAULT_COMBINATION_METHOD,
+            Map.of(PARAM_NAME_WEIGHTS, Arrays.toString(new float[] { 0.3f, 0.7f }))
+        );
+    }
+
+    private void validateTestIndex(final String modelId, final String index, final String searchPipeline) throws Exception {
+        int docCount = getDocCount(index);
+        assertEquals(6, docCount);
+        HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId);
+        Map<String, Object> searchResponseAsMap = search(index, hybridQueryBuilder, null, 1, Map.of("search_pipeline", searchPipeline));
+        assertNotNull(searchResponseAsMap);
+        int hits = getHitCount(searchResponseAsMap);
+        assertEquals(1, hits);
+        List<Double> scoresList = getNormalizationScoreList(searchResponseAsMap);
+        for (Double score : scoresList) {
+            assertTrue(0 <= score && score <= 2);
+        }
+    }
+
+    private HybridQueryBuilder getQueryBuilder(final String modelId) {
+        NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder();
+        neuralQueryBuilder.fieldName("passage_embedding");
+        neuralQueryBuilder.modelId(modelId);
+        neuralQueryBuilder.queryText(QUERY);
+        neuralQueryBuilder.k(5);
+
+        MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", QUERY);
+
+        HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
+        hybridQueryBuilder.add(matchQueryBuilder);
+        hybridQueryBuilder.add(neuralQueryBuilder);
+
+        return hybridQueryBuilder;
+    }
+
+}
diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.neuralsearch.bwc;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Map;
+import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER;
+import static org.opensearch.neuralsearch.TestUtils.TEXT_IMAGE_EMBEDDING_PROCESSOR;
+import static org.opensearch.neuralsearch.TestUtils.getModelId;
+import org.opensearch.neuralsearch.query.NeuralQueryBuilder;
+
+public class MultiModalSearchIT extends AbstractRestartUpgradeRestTestCase {
+    private static final String PIPELINE_NAME = "nlp-ingest-pipeline";
+    private static final String TEST_FIELD = "passage_text";
+    private static final String TEST_IMAGE_FIELD = "passage_image";
+    private static final String TEXT = "Hello world";
+    private static final String TEXT_1 = "Hello world a";
+    private static final String TEST_IMAGE_TEXT = "/9j/4AAQSkZJRgABAQAASABIAAD";
+    private static final String TEST_IMAGE_TEXT_1 = "/9j/4AAQSkZJRgbdwoeicfhoid";
+
+    // Test restart-upgrade test image embedding processor
+    // Create Text Image Embedding Processor, Ingestion Pipeline and add document
+    // Validate process , pipeline and document count in restart-upgrade scenario
+    public void testTextImageEmbeddingProcessor_E2EFlow() throws Exception {
+        waitForClusterHealthGreen(NODES_BWC_CLUSTER);
+
+        if (isRunningAgainstOldCluster()) {
+            String modelId = uploadTextEmbeddingModel();
+            loadModel(modelId);
+            createPipelineForTextImageProcessor(modelId, PIPELINE_NAME);
+            createIndexWithConfiguration(
+                getIndexNameForTest(),
+                Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())),
+                PIPELINE_NAME
+            );
+            addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT);
+        } else {
+            String modelId = null;
+            try {
+                modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_IMAGE_EMBEDDING_PROCESSOR);
+                loadModel(modelId);
+                addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_1, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT_1);
+                validateTestIndex(modelId);
+            } finally {
+                wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null);
+            }
+        }
+    }
+
+    private void validateTestIndex(final String modelId) throws Exception {
+        int docCount = getDocCount(getIndexNameForTest());
+        assertEquals(2, docCount);
+        NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder("passage_embedding", TEXT, TEST_IMAGE_TEXT, modelId, 1, null, null);
+        Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
+        assertNotNull(response);
+    }
+
+}