Skip to content

Commit

Permalink
BWC tests for Multimodal Search, Hybrid Search and Neural Sparse Sear…
Browse files Browse the repository at this point in the history
…ch (#533)

* Initial commit of BWC Test

Signed-off-by: Varun Jain <[email protected]>
  • Loading branch information
vibrantvarun authored Jan 26, 2024
1 parent 98e5534 commit 1dadf25
Show file tree
Hide file tree
Showing 34 changed files with 1,259 additions and 300 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/backwards_compatibility_tests_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ jobs:
- name: Run NeuralSearch Restart-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
run: |
echo "Running restart-upgrade backwards compatibility tests ..."
# Disabling BWC tests due to ongoing build failure. https://github.com/opensearch-project/neural-search/issues/536
# ./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}'
./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}'
Rolling-Upgrade-BWCTests-NeuralSearch:
strategy:
matrix:
java: [ 11, 17, 21 ]
# Restricting java 21 to 21.0.1 due to ongoing bug in JDK 21.0.2 https://bugs.openjdk.org/browse/JDK-8323659. Once the fix https://github.com/opensearch-project/OpenSearch/pull/11968 get merged this change will be reverted.
java: [ 11, 17, 21.0.1 ]
os: [ubuntu-latest,windows-latest]
bwc_version: [ "2.12.0-SNAPSHOT" ]
opensearch_version: [ "3.0.0-SNAPSHOT" ]
Expand All @@ -64,5 +64,4 @@ jobs:
- name: Run NeuralSearch Rolling-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
run: |
echo "Running rolling-upgrade backwards compatibility tests ..."
# Disabling BWC tests due to ongoing build failure. https://github.com/opensearch-project/neural-search/issues/536
# ./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}'
./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}'
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Infrastructure
- BWC tests for Neural Search ([#515](https://github.com/opensearch-project/neural-search/pull/515))
- Github action to run integ tests in secure opensearch cluster ([#535](https://github.com/opensearch-project/neural-search/pull/535))
- BWC tests for Multimodal search, Hybrid Search and Neural Sparse Search ([#533](https://github.com/opensearch-project/neural-search/pull/533))
### Documentation
### Maintenance
### Refactoring
Expand Down
18 changes: 18 additions & 0 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
if (neural_search_bwc_version.startsWith("2.9") || neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand All @@ -53,6 +62,15 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.is_old_cluster', 'false'
systemProperty 'tests.plugin_bwc_version', neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
if (neural_search_bwc_version.startsWith("2.9") || neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@
*/
package org.opensearch.neuralsearch.bwc;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Locale;
import java.util.Optional;
import org.junit.Before;
import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.BaseNeuralSearchIT;
import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX;
import static org.opensearch.neuralsearch.TestUtils.CLIENT_TIMEOUT_VALUE;
import static org.opensearch.neuralsearch.TestUtils.RESTART_UPGRADE_OLD_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.BWC_VERSION;
import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX;
import static org.opensearch.neuralsearch.TestUtils.generateModelId;
import org.opensearch.test.rest.OpenSearchRestTestCase;

public abstract class AbstractRestartUpgradeRestTestCase extends BaseNeuralSearchIT {
Expand Down Expand Up @@ -57,4 +60,43 @@ protected static final boolean isRunningAgainstOldCluster() {
protected final Optional<String> getBWCVersion() {
return Optional.ofNullable(System.getProperty(BWC_VERSION, null));
}

protected String uploadTextEmbeddingModel() throws Exception {
String requestBody = Files.readString(Path.of(classLoader.getResource("processor/UploadModelRequestBody.json").toURI()));
return registerModelGroupAndGetModelId(requestBody);
}

protected String registerModelGroupAndGetModelId(final String requestBody) throws Exception {
String modelGroupRegisterRequestBody = Files.readString(
Path.of(classLoader.getResource("processor/CreateModelGroupRequestBody.json").toURI())
);
String modelGroupId = registerModelGroup(String.format(LOCALE, modelGroupRegisterRequestBody, generateModelId()));
return uploadModel(String.format(LOCALE, requestBody, modelGroupId));
}

protected void createPipelineProcessor(final String modelId, final String pipelineName) throws Exception {
String requestBody = Files.readString(Path.of(classLoader.getResource("processor/PipelineConfiguration.json").toURI()));
createPipelineProcessor(requestBody, pipelineName, modelId);
}

protected String uploadSparseEncodingModel() throws Exception {
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/UploadSparseEncodingModelRequestBody.json").toURI())
);
return registerModelGroupAndGetModelId(requestBody);
}

protected void createPipelineForTextImageProcessor(final String modelId, final String pipelineName) throws Exception {
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/PipelineForTextImageProcessorConfiguration.json").toURI())
);
createPipelineProcessor(requestBody, pipelineName, modelId);
}

protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception {
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/PipelineForSparseEncodingProcessorConfiguration.json").toURI())
);
createPipelineProcessor(requestBody, pipelineName, modelId);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.opensearch.index.query.MatchQueryBuilder;
import static org.opensearch.neuralsearch.TestUtils.getModelId;
import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.PARAM_NAME_WEIGHTS;
import static org.opensearch.neuralsearch.TestUtils.TEXT_EMBEDDING_PROCESSOR;
import static org.opensearch.neuralsearch.TestUtils.DEFAULT_NORMALIZATION_METHOD;
import static org.opensearch.neuralsearch.TestUtils.DEFAULT_COMBINATION_METHOD;
import org.opensearch.neuralsearch.query.HybridQueryBuilder;
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;

public class HybridSearchIT extends AbstractRestartUpgradeRestTestCase {
private static final String PIPELINE_NAME = "nlp-hybrid-pipeline";
private static final String PIPELINE1_NAME = "nlp-hybrid-1-pipeline";
private static final String SEARCH_PIPELINE_NAME = "nlp-search-pipeline";
private static final String SEARCH_PIPELINE1_NAME = "nlp-search-1-pipeline";
private static final String TEST_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world";
private static final String TEXT_2 = "Hi planet";
private static final String TEXT_3 = "Hi earth";
private static final String TEXT_4 = "Hi amazon";
private static final String TEXT_5 = "Hi mars";
private static final String TEXT_6 = "Hi opensearch";
private static final String QUERY = "Hi world";

// Test restart-upgrade normalization processor when index with multiple shards
// Create Text Embedding Processor, Ingestion Pipeline, add document and search pipeline with normalization processor
// Validate process , pipeline and document count in restart-upgrade scenario
public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() throws Exception {
validateNormalizationProcessor("processor/IndexMappingMultipleShard.json", PIPELINE_NAME, SEARCH_PIPELINE_NAME);
}

// Test restart-upgrade normalization processor when index with single shard
// Create Text Embedding Processor, Ingestion Pipeline, add document and search pipeline with normalization processor
// Validate process , pipeline and document count in restart-upgrade scenario
public void testNormalizationProcessor_whenIndexWithSingleShard_E2EFlow() throws Exception {
validateNormalizationProcessor("processor/IndexMappingSingleShard.json", PIPELINE1_NAME, SEARCH_PIPELINE1_NAME);
}

private void validateNormalizationProcessor(final String fileName, final String pipelineName, final String searchPipelineName)
throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
if (isRunningAgainstOldCluster()) {
String modelId = uploadTextEmbeddingModel();
loadModel(modelId);
createPipelineProcessor(modelId, pipelineName);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource(fileName).toURI())),
pipelineName
);
addDocuments(getIndexNameForTest(), true);
createSearchPipeline(searchPipelineName);
} else {
String modelId = null;
try {
modelId = getModelId(getIngestionPipeline(pipelineName), TEXT_EMBEDDING_PROCESSOR);
loadModel(modelId);
addDocuments(getIndexNameForTest(), false);
validateTestIndex(modelId, getIndexNameForTest(), searchPipelineName);
} finally {
wipeOfTestResources(getIndexNameForTest(), pipelineName, modelId, searchPipelineName);
}
}
}

private void addDocuments(final String indexName, boolean isRunningAgainstOldCluster) throws IOException {
if (isRunningAgainstOldCluster) {
addDocument(indexName, "0", TEST_FIELD, TEXT_1, null, null);
addDocument(indexName, "1", TEST_FIELD, TEXT_2, null, null);
addDocument(indexName, "2", TEST_FIELD, TEXT_3, null, null);
addDocument(indexName, "3", TEST_FIELD, TEXT_4, null, null);
addDocument(indexName, "4", TEST_FIELD, TEXT_5, null, null);
} else {
addDocument(indexName, "5", TEST_FIELD, TEXT_6, null, null);
}
}

private void createSearchPipeline(final String pipelineName) {
createSearchPipeline(
pipelineName,
DEFAULT_NORMALIZATION_METHOD,
DEFAULT_COMBINATION_METHOD,
Map.of(PARAM_NAME_WEIGHTS, Arrays.toString(new float[] { 0.3f, 0.7f }))
);
}

private void validateTestIndex(final String modelId, final String index, final String searchPipeline) throws Exception {
int docCount = getDocCount(index);
assertEquals(6, docCount);
HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId);
Map<String, Object> searchResponseAsMap = search(index, hybridQueryBuilder, null, 1, Map.of("search_pipeline", searchPipeline));
assertNotNull(searchResponseAsMap);
int hits = getHitCount(searchResponseAsMap);
assertEquals(1, hits);
List<Double> scoresList = getNormalizationScoreList(searchResponseAsMap);
for (Double score : scoresList) {
assertTrue(0 <= score && score <= 2);
}
}

private HybridQueryBuilder getQueryBuilder(final String modelId) {
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder();
neuralQueryBuilder.fieldName("passage_embedding");
neuralQueryBuilder.modelId(modelId);
neuralQueryBuilder.queryText(QUERY);
neuralQueryBuilder.k(5);

MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", QUERY);

HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(matchQueryBuilder);
hybridQueryBuilder.add(neuralQueryBuilder);

return hybridQueryBuilder;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;
import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.TEXT_IMAGE_EMBEDDING_PROCESSOR;
import static org.opensearch.neuralsearch.TestUtils.getModelId;
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;

public class MultiModalSearchIT extends AbstractRestartUpgradeRestTestCase {
private static final String PIPELINE_NAME = "nlp-ingest-pipeline";
private static final String TEST_FIELD = "passage_text";
private static final String TEST_IMAGE_FIELD = "passage_image";
private static final String TEXT = "Hello world";
private static final String TEXT_1 = "Hello world a";
private static final String TEST_IMAGE_TEXT = "/9j/4AAQSkZJRgABAQAASABIAAD";
private static final String TEST_IMAGE_TEXT_1 = "/9j/4AAQSkZJRgbdwoeicfhoid";

// Test restart-upgrade test image embedding processor
// Create Text Image Embedding Processor, Ingestion Pipeline and add document
// Validate process , pipeline and document count in restart-upgrade scenario
public void testTextImageEmbeddingProcessor_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);

if (isRunningAgainstOldCluster()) {
String modelId = uploadTextEmbeddingModel();
loadModel(modelId);
createPipelineForTextImageProcessor(modelId, PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())),
PIPELINE_NAME
);
addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT);
} else {
String modelId = null;
try {
modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_IMAGE_EMBEDDING_PROCESSOR);
loadModel(modelId);
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_1, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT_1);
validateTestIndex(modelId);
} finally {
wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null);
}
}
}

private void validateTestIndex(final String modelId) throws Exception {
int docCount = getDocCount(getIndexNameForTest());
assertEquals(2, docCount);
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder("passage_embedding", TEXT, TEST_IMAGE_TEXT, modelId, 1, null, null);
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
assertNotNull(response);
}

}
Loading

0 comments on commit 1dadf25

Please sign in to comment.